首页 > python 爬京东数据时,无法登陆。

python 爬京东数据时,无法登陆。

想使用python爬取京东的快递信息,现在需要解决的首要问题是使用python模拟浏览器登陆,遇到了下面的问题。

_t _ntNBMNX
({"username":"\u8bf7\u60a8\u518d\u6b21\u767b\u5f55","_t":"_ntaLJJS"})

所有代码如下。

import urllib
import urllib2
import cookielib
import re
import socket
from bs4 import BeautifulSoup


def Navigate(url, data={}):
    tryTimes = 0 
    while True:
        if (tryTimes > 20):
            print 'try many time ..'
            break
        try:
            if (data == {}):
                req = urllib2.Request(url)
            else:
                req = urllib2.Request(url, urllib.urlencode(data))
            req = urllib2.urlopen(req).read()
            tryTimes = tryTimes + 1 
        except socket.error:
            print 'connection failure'
        else:
            break
    return req 

def func():
    try:
        cookie = cookielib.CookieJar()
        cookieProc = urllib2.HTTPCookieProcessor(cookie)
    except:
        raise
    else:
        opener = urllib2.build_opener(cookieProc)
        opener.addheaders = [('User-Agent',
                              'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11')]
        urllib2.install_opener(opener)
    
url = "https://passport.jd.com/uc/login"
login = Navigate(url)
loginSoup = BeautifulSoup(login,"html.parser")
# looking for uuid
uuid = loginSoup.find_all("form")[0].find_all("input")[0]['value']
#print uuid
clrName=loginSoup.find_all("form")[0].find_all("input")[6]['name']                                                                          
clrValue=loginSoup.find_all("form")[0].find_all("input")[6]['value']                                                                        
# look rand prama..                                                                                                                         
###clr = loginSoup.find_all("span", "clr")[0]                                                                                               
###clrName = clr.find_next_siblings("input")[0]['name']                                                                                     
###clrValue = clr.find_next_siblings("input")[0]['value']                                                                                   
print clrName,clrValue                                                                                                                      
###                                                                                                                                         
url = "http://passport.jd.com/uc/loginService"                                                                                              
myurl = 'http://127.0.0.1:5000'                                                                                                             
#loginurl = 'https://passport.jd.com/new/misc/js/login2016.js'                                                                              
# print url                                                                                                                                 
                                                                                                                                            
postData = {                                                                                                                                
    'loginname': 'my-username',                                                                                                             
    'nloginpwd': 'my-password',                                                                                                              
    'loginpwd': 'my-password',                                                                                                               
##  'machineNet':'',                                                                                                                        
##    'machineCpu':'',                                                                                                                      
##  'machineDisk':'',                                                                                                                       
    str(clrName):str(clrValue),                                                                                                             
    'uuid': uuid,                                                                                                                           
    'authcode': ''                                                                                                                          
}                                                                                                                                           
passport = Navigate(url, postData)                                                                                                          
print passport 

希望各位高手能够帮帮我,先谢了!

【热门文章】
【热门文章】