Пример #1
0
def setup_crawler(spider, stop=False):
    '''
    Takes a spider class object
    '''
    # Deferred means other functions can wait on this finishing
    # Wait until the callback is triggered by spider close
    # See twisted docs
    d = defer.Deferred()

    def foo(*a, **kw):
        # The result to be passed to any callbacks to deferred
        # (we don't use it, so True could've been False, None w/e)
        d.callback(True)
    settings = get_project_settings()
    crawler = Crawler(settings)
    crawler.configure()
    # Ref to foo otherwise it gets GC'd (garbage collected)
    crawler._tempref = foo
    # foo is the handler for the closed signal from this spider
    # N.B. dispatch returns spider and reason (e.g. 'finished') to foo.
    crawler.signals.connect(foo, signal=signals.spider_closed)
    crawler.crawl(spider)
    # N.B log is scrapy log. log2 is python color logger
    # The crawler arg is necessary for log_count/{ERROR, DEBUG, INFO..} stats
    # which you will want for stats mailer extension.
    # Starting this each time will cause the big torrade of ESMTP Error
    # log.start(crawler=crawler)
    crawler.start()
    return d