首页 > python模拟登陆知乎遇到的forbidden问题?

python模拟登陆知乎遇到的forbidden问题?

代码如下。

# -*- coding: utf-8 -*-

import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )

import urllib
import urllib2
import cookielib
import re

username = ''
password = ''

url = 'http://www.zhihu.com'
request = urllib2.Request(url)
res = urllib2.urlopen(request)
contents = res.read().decode('utf-8')

#pattern = re.compile( r'name="_xsrf" value="(.*)"/>')
pattern = re.compile(r'<input type="hidden" name="_xsrf" value="(.*?)"/>',re.S)
result = re.findall(pattern,contents)
xsrf = result[0]


lgurl = 'http://www.zhihu.com/#sgin'
cookie = cookielib.CookieJar()
cookie_handler = urllib2.HTTPCookieProcessor(cookie)
hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:43.0) Gecko/20100101 Firefox/43.0'}
post_data = {'_xsrf':xsrf,'email':username,'password':password,'rememberme':'y'}
dt = urllib.urlencode(post_data)
req = urllib2.Request(lgurl,dt,hdr)
opener = urllib2.build_opener(cookie_handler)
urllib2.install_opener(opener)
response = opener.open(req)
page = response.read()
print page

ps.很多人lgurl是'http://www.zhihu.com/login'但是这个现在是404。
麻烦各位有空指点一下。谢谢了》


请参考这个: http://.com/q/1010000003855057
和你一样的问题.


# -*- coding: utf-8 -*-

import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )

import urllib
import urllib2
import cookielib
import re

#设置账号(Email类型)
username = ''
password = ''


#Cookie
cookie = cookielib.CookieJar()
cookie_handler = urllib2.HTTPCookieProcessor(cookie)
opener = urllib2.build_opener(cookie_handler)
urllib2.install_opener(opener)


#请求首页
url = 'http://www.zhihu.com/'
request = urllib2.Request(url)
res = urllib2.urlopen(request)
contents = res.read().decode('utf-8')

#得到 token
pattern = re.compile(r'<input type="hidden" name="_xsrf" value="(.*?)"/>',re.S)
result = re.findall(pattern,contents)
xsrf = result[0]

#请求验证码
Captcha_URL= 'http://www.zhihu.com/captcha.gif?r=11111111'
r = urllib2.urlopen(urllib2.Request(Captcha_URL))

#写入文件
with open('code.gif','wb') as f:
    f.write(r.read())

#输入 4位的 验证码
captcha =raw_input('captcha: ')


#构造登陆时的数据
lgurl = 'http://www.zhihu.com/login/email'
hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:43.0) Gecko/20100101 Firefox/43.0'}
post_data = {
  '_xsrf':xsrf,
  'email':username,
  'password':password,
  'rememberme':'true',
  'captcha':captcha
}

#提交登陆
dt = urllib.urlencode(post_data)
req = urllib2.Request(lgurl, dt, hdr)

response = opener.open(req)
page = response.read()
print page

# 登陆成功, 输出的是下面这样的数据
#{"r":0,
# "msg": "\u767b\u9646\u6210\u529f"
#}
#

为啥同样的代码我出现的一直是 10030报错 说我登录过于频繁 但是网页却可以登录

还有大兄弟 你的Lgurl错了 http://www.zhihu.com/login/email和http://www.zhihu.com/login/phone_num,如果是email账户就用email,手机号就用后者

【热门文章】
【热门文章】