def handle_spider_output(self, result, request, response, spider): if not result: return defer_succeed(None) it = iter_errback(result, self.handle_spider_error, request, spider) dfd = parallel(it, self.concurrent_items, self._process_spidermw_output, request, response, spider) return dfd
def handle_spider_output(self, result, request, response, spider): if not result: return defer_succeed(None) it = iter_errback(result, self.handle_spider_error, request, response, spider) dfd = parallel(it, self.concurrent_items, self._process_spidermw_output, request, response, spider) return dfd
def handle_spider_output(self, result, request, response, spider): if not result: return defer_succeed(None) it = iter_errback(result, self.handle_spider_error, request, response, spider) # 这段代码,使得整个流程是往下走的,或者往下走是一个优先的趋势,很强 dfd = parallel(it, self.concurrent_items, self._process_spidermw_output, request, response, spider) # 感觉就是因为这一段代码,导致整个流程就需要走完某一个函数 return dfd
def handle_spider_output(self, result: Union[Iterable, AsyncIterable], request: Request, response: Response, spider: Spider) -> Deferred: if not result: return defer_succeed(None) it: Union[Generator, AsyncGenerator] if isinstance(result, AsyncIterable): it = aiter_errback(result, self.handle_spider_error, request, response, spider) dfd = parallel_async(it, self.concurrent_items, self._process_spidermw_output, request, response, spider) else: it = iter_errback(result, self.handle_spider_error, request, response, spider) dfd = parallel(it, self.concurrent_items, self._process_spidermw_output, request, response, spider) return dfd
def call_spider(self, result: Union[Response, Failure], request: Request, spider: Spider) -> Deferred: if isinstance(result, Response): if getattr(result, "request", None) is None: result.request = request callback = result.request.callback or spider._parse warn_on_generator_with_return_value(spider, callback) dfd = defer_succeed(result) dfd.addCallbacks(callback=callback, callbackKeywords=result.request.cb_kwargs) else: # result is a Failure result.request = request warn_on_generator_with_return_value(spider, request.errback) dfd = defer_fail(result) dfd.addErrback(request.errback) return dfd.addCallback(iterate_spider_output)
def call_spider(self, result, request, spider): if isinstance(result, Response): #从spider中拿到的事Response对象 if getattr(result, "request", None) is None: result.request = request callback = result.request.callback or spider._parse # 从request对象里面拿到 对应的callback 否则传入spider的_parse函数作为callback warn_on_generator_with_return_value(spider, callback) dfd = defer_succeed(result) dfd.addCallback(callback, **result.request.cb_kwargs ) # 将spider的callback 添加到 deferred的回调链路上 else: # result is a Failure result.request = request warn_on_generator_with_return_value(spider, request.errback) dfd = defer_fail(result) dfd.addErrback(request.errback) return dfd.addCallback(iterate_spider_output)
def process_item(self, item, spider): if not self.pipeline: return defer_succeed(item) def next_stage(item, stages_left): assert isinstance(item, BaseItem), \ 'Item pipelines must return a BaseItem, got %s' % type(item).__name__ if not stages_left: return item current_stage = stages_left.pop(0) d = mustbe_deferred(current_stage.process_item, spider, item) d.addCallback(next_stage, stages_left) return d deferred = mustbe_deferred(next_stage, item, self.pipeline[:]) return deferred