首页 > 登陆知乎验证码问题?

登陆知乎验证码问题?

代码如下

# -*- coding: utf-8 -*-

import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )

import urllib
import urllib2
import cookielib
import re
import webbrowser

class ZhiHu:

    def __init__(self,):
        self.username = ''
        self.password = ''
        self.filename = 'cookie.txt'
        
        self.lgurl = 'http://www.zhihu.com/login/email'
        self.cookie = cookielib.MozillaCookieJar(self.filename)
        self.cookie_handler = urllib2.HTTPCookieProcessor(self.cookie)
        self.opener = urllib2.build_opener(self.cookie_handler)
        
    #第一次登陆得到cookie
    def firstlogin(self):
        zhihu = 'http://www.zhihu.com'
        request = urllib2.Request(zhihu)
        res = urllib2.urlopen(request)
        contents = res.read().decode('utf-8')
        #self.cookie.save(ignore_discard=True,ignore_expires=True)
        return contents
    #第二次登陆,返回登陆的getcode()
    def seclogin(self,contents):
        loginURL = self.lgurl
        pattern = re.compile(r'<input type="hidden" name="_xsrf" value="(.*?)"/>',re.S)
        result = re.findall(pattern,contents)
        xsrf = result[0]
        hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:43.0) Gecko/20100101 Firefox/43.0'}
        post_data = {'_xsrf':xsrf,'email':self.username,'password':self.password,'rememberme':'y'}
        dt = urllib.urlencode(post_data)
        req = urllib2.Request(loginURL,dt,hdr)
        response = self.opener.open(req)
        page = response.read().decode('gbk')
        return response.getcode()
    #判断是否需要验证码
    def needIdenCode(self):
        home ='http://www.zhihu.com/#signin'
        sign = urllib2.urlopen(urllib2.Request(home))
        details= sign.read().decode('utf-8')
        contents = self.firstlogin()        
        status = self.seclogin(contents)
        if status==200:
            print "make it"
            #正则验证码,可能是这里出现问题
            pattern = re.compile(u'\u9a8c\u8bc1\u7081',re.S)
            result = re.search(pattern,sign)
            if result:
                print u'需要验证码'
                return sign
            else:
                print 'ok'
    #得到验证码图片。            
    def getpic(self,sgin):
        pattern = re.compile(r'<img class="js-captcha-img" width.*?src="(.*?)"></imhg>',re.S)
        items = re.search(patttern,sign)
        if items and items.goup(1):
            print items.group(1)
            return items.group(1)
        else:
            print 'false'
            return False


    def main(self):
        needResult = self.needIdenCode()
        if needResult == True:
            print 'input'
            idencode = self.getpic(needResult)

            if idencode ==True:
                print u"在浏览器中获取验证码"
                webbrowser.open_new_tab(idencode)
            else:
                print "shibai"

        else:
            print u"直接登陆"



start = ZhiHu()
start.main()

代码比较繁琐,刚开始写。出现的错误是

make it
Traceback (most recent call last):
  File "D:\pythonIDE\py\jiu.py", line 91, in <module>
    start.main()
  File "D:\pythonIDE\py\jiu.py", line 74, in main
    needResult = self.needIdenCode()
  File "D:\pythonIDE\py\jiu.py", line 56, in needIdenCode
    result = re.search(pattern,sign)
  File "C:\Python27\lib\re.py", line 146, in search
    return _compile(pattern, flags).search(string)
TypeError: expected string or buffer
[Finished in 1.2s with exit code 1]

请大家帮忙看一下,就快登陆成功了,谢谢。


是否需要验证码是在seclogin中给出的, 你检测你获取的 首页的内容有什么用嘛?


还有为什么不按我说的方法做呢? 不管要不要, 直接获取验证码, 然后一块提交就行了.

你目前这样做, 要先提交一次登陆, 然后再判断要不要验证码, 然后再获取验证码, 然后再提交.

直接把前面两步省掉, 直接获取验证码, 然后提交登陆, 不可以吗?


还有, 知乎 登陆的API返回的内容是JSON格式的, 所以你可以使用 pythonjson 模块来处理.

使用方法:

import json

【热门文章】
【热门文章】