Пример #1
0
 def handle_spider_output(self, result, request, response, spider):
     if not result:
         return defer_succeed(None)
     it = iter_errback(result, self.handle_spider_error, request, spider)
     dfd = parallel(it, self.concurrent_items,
         self._process_spidermw_output, request, response, spider)
     return dfd
Пример #2
0
 def handle_spider_output(self, result, request, response, spider):
     if not result:
         return defer_succeed(None)
     it = iter_errback(result, self.handle_spider_error, request, response, spider)
     dfd = parallel(it, self.concurrent_items,
         self._process_spidermw_output, request, response, spider)
     return dfd
Пример #3
0
 def handle_spider_output(self, result, request, response, spider):
     if not result:
         return defer_succeed(None)
     it = iter_errback(result, self.handle_spider_error, request, response,
                       spider)
     # 这段代码,使得整个流程是往下走的,或者往下走是一个优先的趋势,很强
     dfd = parallel(it, self.concurrent_items,
                    self._process_spidermw_output, request, response,
                    spider)  # 感觉就是因为这一段代码,导致整个流程就需要走完某一个函数
     return dfd
Пример #4
0
 def handle_spider_output(self, result: Union[Iterable, AsyncIterable], request: Request,
                          response: Response, spider: Spider) -> Deferred:
     if not result:
         return defer_succeed(None)
     it: Union[Generator, AsyncGenerator]
     if isinstance(result, AsyncIterable):
         it = aiter_errback(result, self.handle_spider_error, request, response, spider)
         dfd = parallel_async(it, self.concurrent_items, self._process_spidermw_output,
                              request, response, spider)
     else:
         it = iter_errback(result, self.handle_spider_error, request, response, spider)
         dfd = parallel(it, self.concurrent_items, self._process_spidermw_output,
                        request, response, spider)
     return dfd
Пример #5
0
 def call_spider(self, result: Union[Response, Failure], request: Request, spider: Spider) -> Deferred:
     if isinstance(result, Response):
         if getattr(result, "request", None) is None:
             result.request = request
         callback = result.request.callback or spider._parse
         warn_on_generator_with_return_value(spider, callback)
         dfd = defer_succeed(result)
         dfd.addCallbacks(callback=callback, callbackKeywords=result.request.cb_kwargs)
     else:  # result is a Failure
         result.request = request
         warn_on_generator_with_return_value(spider, request.errback)
         dfd = defer_fail(result)
         dfd.addErrback(request.errback)
     return dfd.addCallback(iterate_spider_output)
Пример #6
0
 def call_spider(self, result, request, spider):
     if isinstance(result, Response):  #从spider中拿到的事Response对象
         if getattr(result, "request", None) is None:
             result.request = request
         callback = result.request.callback or spider._parse  # 从request对象里面拿到 对应的callback 否则传入spider的_parse函数作为callback
         warn_on_generator_with_return_value(spider, callback)
         dfd = defer_succeed(result)
         dfd.addCallback(callback, **result.request.cb_kwargs
                         )  # 将spider的callback 添加到 deferred的回调链路上
     else:  # result is a Failure
         result.request = request
         warn_on_generator_with_return_value(spider, request.errback)
         dfd = defer_fail(result)
         dfd.addErrback(request.errback)
     return dfd.addCallback(iterate_spider_output)
Пример #7
0
    def process_item(self, item, spider):
        if not self.pipeline:
            return defer_succeed(item)

        def next_stage(item, stages_left):
            assert isinstance(item, BaseItem), \
                'Item pipelines must return a BaseItem, got %s' % type(item).__name__
            if not stages_left:
                return item
            current_stage = stages_left.pop(0)
            d = mustbe_deferred(current_stage.process_item, spider, item)
            d.addCallback(next_stage, stages_left)
            return d

        deferred = mustbe_deferred(next_stage, item, self.pipeline[:])
        return deferred