代码如下。
# -*- coding: utf-8 -*-
import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )
import urllib
import urllib2
import cookielib
import re
username = ''
password = ''
url = 'http://www.zhihu.com'
request = urllib2.Request(url)
res = urllib2.urlopen(request)
contents = res.read().decode('utf-8')
#pattern = re.compile( r'name="_xsrf" value="(.*)"/>')
pattern = re.compile(r'<input type="hidden" name="_xsrf" value="(.*?)"/>',re.S)
result = re.findall(pattern,contents)
xsrf = result[0]
lgurl = 'http://www.zhihu.com/#sgin'
cookie = cookielib.CookieJar()
cookie_handler = urllib2.HTTPCookieProcessor(cookie)
hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:43.0) Gecko/20100101 Firefox/43.0'}
post_data = {'_xsrf':xsrf,'email':username,'password':password,'rememberme':'y'}
dt = urllib.urlencode(post_data)
req = urllib2.Request(lgurl,dt,hdr)
opener = urllib2.build_opener(cookie_handler)
urllib2.install_opener(opener)
response = opener.open(req)
page = response.read()
print page
ps.很多人lgurl是'http://www.zhihu.com/login'但是这个现在是404。
麻烦各位有空指点一下。谢谢了》
请参考这个: http://.com/q/1010000003855057
和你一样的问题.
# -*- coding: utf-8 -*-
import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )
import urllib
import urllib2
import cookielib
import re
#设置账号(Email类型)
username = ''
password = ''
#Cookie
cookie = cookielib.CookieJar()
cookie_handler = urllib2.HTTPCookieProcessor(cookie)
opener = urllib2.build_opener(cookie_handler)
urllib2.install_opener(opener)
#请求首页
url = 'http://www.zhihu.com/'
request = urllib2.Request(url)
res = urllib2.urlopen(request)
contents = res.read().decode('utf-8')
#得到 token
pattern = re.compile(r'<input type="hidden" name="_xsrf" value="(.*?)"/>',re.S)
result = re.findall(pattern,contents)
xsrf = result[0]
#请求验证码
Captcha_URL= 'http://www.zhihu.com/captcha.gif?r=11111111'
r = urllib2.urlopen(urllib2.Request(Captcha_URL))
#写入文件
with open('code.gif','wb') as f:
f.write(r.read())
#输入 4位的 验证码
captcha =raw_input('captcha: ')
#构造登陆时的数据
lgurl = 'http://www.zhihu.com/login/email'
hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:43.0) Gecko/20100101 Firefox/43.0'}
post_data = {
'_xsrf':xsrf,
'email':username,
'password':password,
'rememberme':'true',
'captcha':captcha
}
#提交登陆
dt = urllib.urlencode(post_data)
req = urllib2.Request(lgurl, dt, hdr)
response = opener.open(req)
page = response.read()
print page
# 登陆成功, 输出的是下面这样的数据
#{"r":0,
# "msg": "\u767b\u9646\u6210\u529f"
#}
#
为啥同样的代码我出现的一直是 10030报错 说我登录过于频繁 但是网页却可以登录
还有大兄弟 你的Lgurl错了 http://www.zhihu.com/login/email和http://www.zhihu.com/login/phone_num,如果是email账户就用email,手机号就用后者