首页 > [scrapy selenium] socket.error: [Errno 111] Connection refused

[scrapy selenium] socket.error: [Errno 111] Connection refused

利用Scrapy去爬淘宝的数据,出现了如下的错误:

Traceback (most recent call last):
  File "/usr/local/bin/scrapy", line 4, in <module>
    execute()
  File "/usr/local/lib/python2.7/dist-packages/scrapy/cmdline.py", line 143, in execute
    _run_print_help(parser, _run_command, cmd, args, opts)
  File "/usr/local/lib/python2.7/dist-packages/scrapy/cmdline.py", line 89, in _run_print_help
    func(*a, **kw)
  File "/usr/local/lib/python2.7/dist-packages/scrapy/cmdline.py", line 150, in _run_command
    cmd.run(args, opts)
  File "/usr/local/lib/python2.7/dist-packages/scrapy/commands/crawl.py", line 48, in run
    spider = crawler.spiders.create(spname, **opts.spargs)
  File "/usr/local/lib/python2.7/dist-packages/scrapy/spidermanager.py", line 48, in create
    return spcls(**spider_kwargs)
  File "/srv/sxrapy_test/shiyifang/shiyifang/spiders/shiyifang_spider.py", line 32, in __init__
    self.selenium.start()
  File "/usr/local/lib/python2.7/dist-packages/selenium/selenium.py", line 197, in start
    result = self.get_string("getNewBrowserSession", start_args)
  File "/usr/local/lib/python2.7/dist-packages/selenium/selenium.py", line 231, in get_string
    result = self.do_command(verb, args)
  File "/usr/local/lib/python2.7/dist-packages/selenium/selenium.py", line 220, in do_command
    conn.request("POST", "/selenium-server/driver/", body, headers)
  File "/usr/lib/python2.7/httplib.py", line 962, in request
    self._send_request(method, url, body, headers)
  File "/usr/lib/python2.7/httplib.py", line 996, in _send_request
    self.endheaders(body)
  File "/usr/lib/python2.7/httplib.py", line 958, in endheaders
    self._send_output(message_body)
  File "/usr/lib/python2.7/httplib.py", line 818, in _send_output
    self.send(msg)
  File "/usr/lib/python2.7/httplib.py", line 780, in send
    self.connect()
  File "/usr/lib/python2.7/httplib.py", line 761, in connect
    self.timeout, self.source_address)
  File "/usr/lib/python2.7/socket.py", line 571, in create_connection
    raise err
socket.error: [Errno 111] Connection refused

源码如下:

# -*- coding: utf-8 -*-
from scrapy.spider import BaseSpider
from scrapy.selector import Selector
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.contrib.spiders import CrawlSpider, Rule

from selenium import selenium

from shiyifang.items import ShiyifangItem

class ShiyifangSpider(CrawlSpider):
    name = "shiyifang"
    allowed_domains = ["taobao.com"]
    start_urls = [
        "http://www.taobao.com"
    ]

    rules = (
        Rule(SgmlLinkExtractor(allow=('/market/nvzhuang/index.php?spm=a217f.7297021.a214d5w.2.tvAive', )),
             callback='parse_page', follow=True),
    )

    def __init__(self):
        CrawlSpider.__init__(self)
        self.verificationErrors = []
        self.selenium = selenium("localhost", 4444, "*firefox", "http://www.taobao.com")
        self.selenium.start()

    def __del__(self):
        self.selenium.stop()
        print self.verificationErrors
        CrawlSpider.__del__(self)


    def parse_page(self, response):
        sel = Selector(response)
        from webproxy.items import WebproxyItem

        sel = self.selenium
        sel.open(response.url)
        sel.wait_for_page_to_load("30000")
        import time

        time.sleep(2.5)

是被禁止爬了,还是需要添加其它的一些方法?


我也遇到了同样的问题,之前只单独做过scrapy和selenium,现在将两者结合在一起倒还是头一次,有点摸不着门道,不知道楼主现在解决这个问题了没有呢?

【热门文章】
【热门文章】