Пример #1
0
    def crawl(self, async_mode, url, timeout, request_header, robotstxt_enabled, meta):
        success, result = Downloader.preprocess(url, robotstxt_enabled)
        if not success:
            if async_mode:
                result = defer_fail(result)
        else:
            if async_mode:
                result = self._crawl_async(url, timeout, request_header, meta)
            else:
                result = self._crawl_sync(url, timeout, request_header, meta)

        return misc.postprocess(async_mode, result, Downloader.postprocess)
Пример #2
0
 def call_spider(self, result: Union[Response, Failure], request: Request, spider: Spider) -> Deferred:
     if isinstance(result, Response):
         if getattr(result, "request", None) is None:
             result.request = request
         callback = result.request.callback or spider._parse
         warn_on_generator_with_return_value(spider, callback)
         dfd = defer_succeed(result)
         dfd.addCallbacks(callback=callback, callbackKeywords=result.request.cb_kwargs)
     else:  # result is a Failure
         result.request = request
         warn_on_generator_with_return_value(spider, request.errback)
         dfd = defer_fail(result)
         dfd.addErrback(request.errback)
     return dfd.addCallback(iterate_spider_output)
Пример #3
0
 def call_spider(self, result, request, spider):
     if isinstance(result, Response):  #从spider中拿到的事Response对象
         if getattr(result, "request", None) is None:
             result.request = request
         callback = result.request.callback or spider._parse  # 从request对象里面拿到 对应的callback 否则传入spider的_parse函数作为callback
         warn_on_generator_with_return_value(spider, callback)
         dfd = defer_succeed(result)
         dfd.addCallback(callback, **result.request.cb_kwargs
                         )  # 将spider的callback 添加到 deferred的回调链路上
     else:  # result is a Failure
         result.request = request
         warn_on_generator_with_return_value(spider, request.errback)
         dfd = defer_fail(result)
         dfd.addErrback(request.errback)
     return dfd.addCallback(iterate_spider_output)