问题同上
from urllib import request,parse
import urllib,io,json,time,gzip,http
import re
import requests
# 设置cookie
cookie = http.cookiejar.CookieJar()
cookieProc = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(cookieProc)
urllib.request.install_opener(opener)
# 获取xsrf token
hh = urllib.request.urlopen('https://www.zhihu.com')
h = hh.read().decode("utf8")
patten = re.compile(r'name="_xsrf" value="(.*?)"')
b = patten.search(h)
# 获取captcha
time = str(int(time.time()*1000))
captchaUrl = 'https://www.zhihu.com/captcha.gif?r='+ time +'&type=login'
req = request.Request(captchaUrl)
with request.urlopen(req) as w:
print("Status: %s %s" %(w.status,w.reason))
o = open('d:/captcha.gif','wb')
o.write(w.read())
o.close()
print(w.getheaders())
captcha = input('请输入captcha')
headers = {
'Host': 'www.zhihu.com',
'Connection': 'keep-alive',
'Accept': '*/*',
'Accept-Encoding': 'gzip,deflate',
'Accept-Language': 'zh-CN,zh;q=0.8',
'Origin': 'http://www.zhihu.com',
'X-Requested-With': 'XMLHttpRequest',
'Referer':'http://www.zhihu.com/',
'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36',
'Content-Length':'98'
}
postData = {
'username': 'xxxxx@gmail.com',
'password': 'xxxxx',
'_xsrf':b.group(1),
'captcha':captcha
}
print(postData['_xsrf'])
# login
postData =data = urllib.parse.urlencode(postData).encode(encoding='UTF8')
request1 = urllib.request.Request('http://www.zhihu.com/login/email' ,postData,headers)
response = opener.open(request1)
print(response.getheaders())
bi = io.BytesIO(response.read())
gziper = gzip.GzipFile(fileobj=bi, mode="rb")
data2 = gziper.read()
print(json.loads(data2.decode('utf-8')))
不是吧,要是IP被封了,网页也会打不开的。
import requests, pyquery, urllib
url = 'https://www.zhihu.com'
info_url = '{0}/#signin'.format(url)
login_url = '{0}/login/phone_num'.format(url)
session = requests.Session()
session.headers = {
'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
Q = pyquery.PyQuery(session.get(info_url).text)
data = {
'_xsrf': Q('input[name="_xsrf"]').val(),
'phone_num': '13311111111',
'password': '111'
}
r = session.post(login_url, data=urllib.urlencode(data))
print r.text