使用python爬虫模拟12306登录方法

试了好久登录的时候总是显示:系统忙,请刷新,,,太折磨人了,搞了半天才想到是请求头部的问题…..

验证码还是要人工识图..

#!/bin/env python
# -*- coding=utf-8 -*-
import ssl
import sys
import urllib2
import random
import httplib
import json
from cookielib import lwpcookiejar
import urllib
import re
import getpass
reload(sys)
sys.setdefaultencoding(‘utf8’)
cookiejar = lwpcookiejar()
cookiesuppor = urllib2.httpcookieprocessor(cookiejar)
opener = urllib2.build_opener(cookiesuppor, urllib2.httphandler)
urllib2.install_opener(opener)
ssl._create_default_https_context = ssl._create_unverified_context
codeimg = ‘https://kyfw.12306.cn/otn/passcodenew/getpasscodenew?module=login&rand=sjrand&%s’ % random.random()
baner = “””
##################################
12306登录脚本,作者mr rjl
python版本:2.7,适用于linux
验证码输入方式:
输入问题对应的图片序号,1-8;
多个以’,’分隔.如:1,2,3
##################################
“””
def get(url):
try:
request = urllib2.request(url=url)
# req.add_header(‘user-agent’, ‘mozilla/5.0 (x11; fedora; linux x86_64; rv:52.0) gecko/20100101 firefox/52.0’)
request.add_header(“content-type”, “application/x-www-form-urlencoded; charset=utf-8”)
request.add_header(‘x-requested-with’, ‘xmlhttprequest’)
request.add_header(‘user-agent’,
‘mozilla/5.0 (windows nt 6.1) applewebkit/537.36 (khtml, like gecko) chrome/33.0.1750.154 safari/537.36’)
request.add_header(‘referer’, ‘https://kyfw.12306.cn/otn/login/init’)
request.add_header(‘accept’, ‘*/*’)
result = urllib2.urlopen(request).read()
assert isinstance(result, object)
return result
except httplib.error as e:
print e
pass
except urllib2.urlerror as e:
print e
pass
except urllib2.httpbasicauthhandler, urllib2.httperror:
print ‘error’
pass
def post(url, data):
try:
request = urllib2.request(url=url, data=urllib.urlencode(data))
# req.add_header(‘user-agent’, ‘mozilla/5.0 (x11; fedora; linux x86_64; rv:52.0) gecko/20100101 firefox/52.0’)
# request = urllib2.request(ajax_url, urllib.urlencode(dc))
request.add_header(“content-type”, “application/x-www-form-urlencoded; charset=utf-8”)
request.add_header(‘x-requested-with’, ‘xmlhttprequest’)
request.add_header(‘user-agent’,
‘mozilla/5.0 (windows nt 6.1) applewebkit/537.36 (khtml, like gecko) chrome/33.0.1750.154 safari/537.36’)
request.add_header(‘referer’, ‘https://kyfw.12306.cn/otn/login/init’)
request.add_header(‘accept’, ‘*/*’)
# request.add_header(‘accept-encoding’, ‘gzip, deflate’)
result = urllib2.urlopen(request).read()
return result
except httplib.error as e:
return e
except urllib2.urlerror as e:
return e
except urllib2.httpbasicauthhandler, urllib2.httperror:
return ‘error’
def cookietp():
stoidinput(“获取cookie”)
url = “https://kyfw.12306.cn/otn/login/init”
get(url)
for index, c in enumerate(cookiejar):
stoidinput(c)
def getimg():
stoidinput(“下载验证码…”)
result = get(codeimg)
try:
if open(‘/tmp/tkcode’, ‘wb’).write(result) :
import os
os.system(“oeg /tmp/tkcode &”)
else:
return false
except oserror as e:
print e
pass
def stoidinput(text):
“””
正常信息输出
:param text:
:return:
“””
print “\033[34m[*]\033[0m %s ” % text
def errorinput(text):
“””
错误信息输出
:param text:
:return:
“””
print “\033[32m[!]\033[0m %s ” % text
return false
def codexy():
“””
获取验证码
:return: str
“””
ofset = raw_input(“[*] 请输入验证码: “)
select = ofset.split(‘,’)
global randcode
post = []
offsetsx = 0 # 选择的答案的left值,通过浏览器点击8个小图的中点得到的,这样基本没问题
offsetsy = 0 # 选择的答案的top值
for ofset in select:
if ofset == ‘1’:
offsetsy = 46
offsetsx = 42
elif ofset == ‘2’:
offsetsy = 46
offsetsx = 105
elif ofset == ‘3’:
offsetsy = 45
offsetsx = 184
elif ofset == ‘4’:
offsetsy = 48
offsetsx = 256
elif ofset == ‘5’:
offsetsy = 36
offsetsx = 117
elif ofset == ‘6’:
offsetsy = 112
offsetsx = 115
elif ofset == ‘7’:
offsetsy = 114
offsetsx = 181
elif ofset == ‘8’:
offsetsy = 111
offsetsx = 252
else:
pass
post.append(offsetsx)
post.append(offsetsy)
randcode = str(post).replace(‘]’, ”).replace(‘[‘, ”).replace(“‘”, ”).replace(‘ ‘, ”)
def login(user, passwd):
randurl = ‘https://kyfw.12306.cn/otn/passcodenew/checkrandcodeansyn’
logurl = ‘https://kyfw.12306.cn/otn/login/loginaysnsuggest’
surl = ‘https://kyfw.12306.cn/otn/login/userlogin’
geturl = ‘https://kyfw.12306.cn/otn/index/initmy12306′
randdata = {
“randcode”: randcode,
“rand”: “sjrand”
}
logdata = {
“loginuserdto.user_name”: user,
“userdto.password”: passwd,
“randcode”: randcode
}
ldata = {
“_json_att”: none
}
fresult = json.loads(post(randurl, randdata), encoding=’utf8’)
checkcode = fresult[‘data’][‘msg’]
if checkcode == ‘false’:
errorinput(“验证码有误,请重试”)
else:
stoidinput(“验证码通过,开始登录..”)
try:
tresult = json.loads(post(logurl, logdata), encoding=’utf8′)
if tresult[‘data’].__len__() == 0:
errorinput(“登录失败: %s” % tresult[‘messages’][0])
else:
stoidinput(“登录成功”)
sult = post(surl, ldata)
getuserinfo()
except valueerror as e:
errorinput(e)
def getuserinfo():
“””
登录成功后,显示用户名
:return:
“””
url = ‘https://kyfw.12306.cn/otn/modifyuser/initqueryuserinfo’
data = dict(_json_att=none)
result = post(url, data)
userinfo = result
name = r”
try:
stoidinput(“欢迎 %s 登录” % re.search(name, result).group(1))
except attributeerror:
pass
def main():
user = raw_input(“[+] 用户名(用户名/邮箱/手机): “)
passwd = getpass.getpass(“[+] 密码: “)
if user == ” or passwd == ”:
errorinput(“用户名或密码不能为空!”)
else:
cookietp()
getimg()
codexy()
login(user, passwd)
def logout():
url = ‘https://kyfw.12306.cn/otn/login/loginout’
result = get(url)
if result:
stoidinput(“已退出”)
else:
errorinput(“退出失败”)
if __name__ == “__main__”:
print baner
main()
logout()

以上就是使用python爬虫模拟12306登录方法的详细内容,更多请关注 第一php社区 其它相关文章!

Posted in 未分类