代码如下
# -*- coding: utf-8 -*-
import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )
import urllib
import urllib2
import cookielib
import re
import webbrowser
class ZhiHu:
def __init__(self,):
self.username = ''
self.password = ''
self.filename = 'cookie.txt'
self.lgurl = 'http://www.zhihu.com/login/email'
self.cookie = cookielib.MozillaCookieJar(self.filename)
self.cookie_handler = urllib2.HTTPCookieProcessor(self.cookie)
self.opener = urllib2.build_opener(self.cookie_handler)
#第一次登陆得到cookie
def firstlogin(self):
zhihu = 'http://www.zhihu.com'
request = urllib2.Request(zhihu)
res = urllib2.urlopen(request)
contents = res.read().decode('utf-8')
#self.cookie.save(ignore_discard=True,ignore_expires=True)
return contents
#第二次登陆,返回登陆的getcode()
def seclogin(self,contents):
loginURL = self.lgurl
pattern = re.compile(r'<input type="hidden" name="_xsrf" value="(.*?)"/>',re.S)
result = re.findall(pattern,contents)
xsrf = result[0]
hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:43.0) Gecko/20100101 Firefox/43.0'}
post_data = {'_xsrf':xsrf,'email':self.username,'password':self.password,'rememberme':'y'}
dt = urllib.urlencode(post_data)
req = urllib2.Request(loginURL,dt,hdr)
response = self.opener.open(req)
page = response.read().decode('gbk')
return response.getcode()
#判断是否需要验证码
def needIdenCode(self):
home ='http://www.zhihu.com/#signin'
sign = urllib2.urlopen(urllib2.Request(home))
details= sign.read().decode('utf-8')
contents = self.firstlogin()
status = self.seclogin(contents)
if status==200:
print "make it"
#正则验证码,可能是这里出现问题
pattern = re.compile(u'\u9a8c\u8bc1\u7081',re.S)
result = re.search(pattern,sign)
if result:
print u'需要验证码'
return sign
else:
print 'ok'
#得到验证码图片。
def getpic(self,sgin):
pattern = re.compile(r'<img class="js-captcha-img" width.*?src="(.*?)"></imhg>',re.S)
items = re.search(patttern,sign)
if items and items.goup(1):
print items.group(1)
return items.group(1)
else:
print 'false'
return False
def main(self):
needResult = self.needIdenCode()
if needResult == True:
print 'input'
idencode = self.getpic(needResult)
if idencode ==True:
print u"在浏览器中获取验证码"
webbrowser.open_new_tab(idencode)
else:
print "shibai"
else:
print u"直接登陆"
start = ZhiHu()
start.main()
代码比较繁琐,刚开始写。出现的错误是
make it
Traceback (most recent call last):
File "D:\pythonIDE\py\jiu.py", line 91, in <module>
start.main()
File "D:\pythonIDE\py\jiu.py", line 74, in main
needResult = self.needIdenCode()
File "D:\pythonIDE\py\jiu.py", line 56, in needIdenCode
result = re.search(pattern,sign)
File "C:\Python27\lib\re.py", line 146, in search
return _compile(pattern, flags).search(string)
TypeError: expected string or buffer
[Finished in 1.2s with exit code 1]
请大家帮忙看一下,就快登陆成功了,谢谢。
是否需要验证码是在seclogin
中给出的, 你检测你获取的 首页的内容有什么用嘛?
还有为什么不按我说的方法做呢? 不管要不要, 直接获取验证码, 然后一块提交就行了.
你目前这样做, 要先提交一次登陆, 然后再判断要不要验证码, 然后再获取验证码, 然后再提交.
直接把前面两步省掉, 直接获取验证码, 然后提交登陆, 不可以吗?
还有, 知乎 登陆的API返回的内容是JSON格式
的, 所以你可以使用 python
的 json
模块来处理.
使用方法:
import json