def crawl(self, async_mode, url, timeout, request_header, robotstxt_enabled, meta): success, result = Downloader.preprocess(url, robotstxt_enabled) if not success: if async_mode: result = defer_fail(result) else: if async_mode: result = self._crawl_async(url, timeout, request_header, meta) else: result = self._crawl_sync(url, timeout, request_header, meta) return misc.postprocess(async_mode, result, Downloader.postprocess)
def call_spider(self, result: Union[Response, Failure], request: Request, spider: Spider) -> Deferred: if isinstance(result, Response): if getattr(result, "request", None) is None: result.request = request callback = result.request.callback or spider._parse warn_on_generator_with_return_value(spider, callback) dfd = defer_succeed(result) dfd.addCallbacks(callback=callback, callbackKeywords=result.request.cb_kwargs) else: # result is a Failure result.request = request warn_on_generator_with_return_value(spider, request.errback) dfd = defer_fail(result) dfd.addErrback(request.errback) return dfd.addCallback(iterate_spider_output)
def call_spider(self, result, request, spider): if isinstance(result, Response): #从spider中拿到的事Response对象 if getattr(result, "request", None) is None: result.request = request callback = result.request.callback or spider._parse # 从request对象里面拿到 对应的callback 否则传入spider的_parse函数作为callback warn_on_generator_with_return_value(spider, callback) dfd = defer_succeed(result) dfd.addCallback(callback, **result.request.cb_kwargs ) # 将spider的callback 添加到 deferred的回调链路上 else: # result is a Failure result.request = request warn_on_generator_with_return_value(spider, request.errback) dfd = defer_fail(result) dfd.addErrback(request.errback) return dfd.addCallback(iterate_spider_output)