首页 > python爬虫模拟登录网站,总是不成功

python爬虫模拟登录网站,总是不成功

#!/usr/bin/python  
# -*- coding=utf-8 -*-  
import HTMLParser  
import urlparse  
import urllib  
import urllib2  
import cookielib  
import string
import mechanize  
import re
from bs4 import BeautifulSoup  
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
br = mechanize.Browser()
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)  
###登录的主页面  
hosturl = 'http://www.iiyi.com/'  
####post数据接收和处理的页面(我们要向这个页面发送我们构造的Post数据)  
posturl = 'http://auth.iiyi.com/?referer=http%3A%2F%2Fwww.iiyi.com%2F' ###从数据包中分析出,处理post请求的url    
###设置一个cookie处理器,它负责从服务器下载cookie到本地,并且在发送请求时带上本地cookie    
cookie_support = urllib2.HTTPCookieProcessor(cj)  
opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler)  
urllib2.install_opener(opener)   
###打开登录主页面(他的目的是从页面下载cookie,这样我们在再送post数据时就有cookie了,否则发送不成功)  
h = urllib2.urlopen(hosturl)    
###构造header,一般header至少要包含一下两项。这两项是从抓到的包里分析得出的。  
headers = {
    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Encoding':'gzip, deflate, sdch',
    'Accept-Language':'zh-CN,zh;q=0.8',
    'Cookie':'cnaday_auth_visit=f92747b31d48d18d76a6740d45da1020; CNZZDATA30049871=cnzz_eid%3D969359211-1439954392-http%253A%252F%252Fiapi.iiyi.com%252F%26ntime%3D1440377229; CNZZDATA30080017=cnzz_eid%3D1280405677-1439955491-http%253A%252F%252Fiapi.iiyi.com%252F%26ntime%3D1440378214; __utma=1.881348118.1439959673.1440148394.1440379536.10; __utmb=1.1.10.1440379536; __utmc=1; __utmz=1.1440379536.10.10.utmcsr=iapi.iiyi.com|utmccn=(referral)|utmcmd=referral|utmcct=/v1/tnav; Hm_lvt_f6462c453d025b6235ffc49efab63e1e=1439965661,1440051563,1440142571,1440341373; Hm_lpvt_f6462c453d025b6235ffc49efab63e1e=1440379536; auth_auto=eb91D55fosxgVGs0%2BK22Se4qfMiWh4AN3QD6%2FzcLxwiSQpjexJmBomJmN7I6EseUqIoxoM4',
    'Host':'auth.iiyi.com',
    'If-Modified-Since':'Mon, 24 Aug 2015 01:25:32 GMT',
    'Proxy-Connection':'keep-alive',
    'Referer':'http://iapi.iiyi.com/v1/tnav?site=mh&0.7480784514918923&op=logout&referer=http%3A%2F%2Fwww.iiyi.com%2F',
    'Upgrade-Insecure-Requests':'1',
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36',
    }  
###构造Post数据,他也是从抓大的包里分析得出的。  
postData = {'username' : 'zzdaye@163.com', ###你的用户名  
            'password' : 'xxxx', ###你的密码
            'auto' : '0',
            'vcode':'',            
           }   
###需要给Post数据编码  
post_Data = urllib.urlencode(postData)    
###通过urllib2提供的request方法来向指定Url发送我们构造的数据,并完成登录过程  
request = urllib2.Request(posturl, post_Data, headers)  
response = urllib2.urlopen(request)  
print response  

#!/usr/bin/python
# -*- coding=utf-8 -*-
import urllib, urllib2, cookielib, sys

reload(sys)
sys.setdefaultencoding('utf-8')

__cookie = cookielib.CookieJar()
__req = urllib2.build_opener(urllib2.HTTPCookieProcessor(__cookie))
__req.addheaders = [
  ('Accept', 'application/javascript, */*;q=0.8'),
  ('User-Agent', 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)')
]

urllib2.install_opener(__req)

headers = {
  'Content-Type': 'application/x-www-form-urlencoded',
  'X-Requested-With': 'XMLHttpRequest'
}

postData = {
  'username' : 'zzdaye@163.com', ###你的用户名
  'password' : 'xxx', ###你的密码
  'auto' : '1',
  'vcode':'',
}

request = urllib2.Request('http://auth.iiyi.com/login/do', urllib.urlencode(postData), headers)
response = urllib2.urlopen(request).read()
#输出登陆结果
print response

#请求登陆后可以访问的页面
request = urllib2.Request('http://service.iiyi.com/tnav/info?11111')
response = urllib2.urlopen(request).read()
#输出结果
print response

楼主代码中 登陆时提交的地址不正确.
另外 请求头也不需要那么多内容.


cookie_support = urllib2.HTTPCookieProcessor(cj)
opener = urllib2.buildopener(cookiesupport
这里有个拼写错误,,,你没看见吗,,,上面有下划线下面的cookiesupport没了,,,

【热门文章】
【热门文章】