Exemplo n.º 1
0
 def _handle_pipeline_result(self, result):
     if result is None:
         pass
     elif isinstance(result, Request):
         self.download(result)
     else:
         assert isinstance(result, (Response, Failure))
         request = result.request
         if isinstance(result, Response):
             flags = ' %s' % result.flags if result.flags else ''
             log.msg(format='Crawled %(url)s [%(status)s]%(flags)s',
                     level=log.DEBUG, url=result.url, status=result.status,
                     flags=flags)
             self.signals.send(signal=signals.response_received,
                               response=result)
         else:
             self.signals.send(signal=signals.failure_received,
                               failure=result)
         dfd = defer_result(result, clock=self.clock)
         dfd.addCallbacks(request.callback or self.spider.parse,
                          request.errback)
         dfd.addCallbacks(
             self._handle_spider_output,
             self._handle_spider_error,
             callbackKeywords={'request': request},
             errbackKeywords={'request': request})
         return dfd
Exemplo n.º 2
0
 def _handle_pipeline_result(self, result):
     if result is None:
         pass
     elif isinstance(result, Request):
         self.download(result)
     else:
         assert isinstance(result, (Response, Failure))
         request = result.request
         if isinstance(result, Response):
             flags = ' %s' % result.flags if result.flags else ''
             log.msg(format='Crawled %(url)s [%(status)s]%(flags)s',
                     level=log.DEBUG,
                     url=result.url,
                     status=result.status,
                     flags=flags)
             self.signals.send(signal=signals.response_received,
                               response=result)
         else:
             self.signals.send(signal=signals.failure_received,
                               failure=result)
         dfd = defer_result(result, clock=self.clock)
         dfd.addCallbacks(request.callback or self.spider.parse,
                          request.errback)
         dfd.addCallbacks(self._handle_spider_output,
                          self._handle_spider_error,
                          callbackKeywords={'request': request},
                          errbackKeywords={'request': request})
         return dfd
Exemplo n.º 3
0
 def _process_queue(self):
     if not self.running:
         return
     elif self.paused:
         self.processing.schedule(self.PAUSED_CHECK_FREQUENCY)
     elif self.response_queue:
         response = self.response_queue.pop()
         if isinstance(response, Response):
             self.signals.send(signal=signals.response_downloaded,
                               response=response)
         dfd = defer_result(response, clock=self.clock)
         dfd.addBoth(self.pipeline.process_response)
         dfd.addBoth(self._handle_pipeline_result)
         dfd.addBoth(self._finalize_download)
         dfd.addBoth(lambda _: self.processing.schedule(0))
     elif self.is_idle():
         # send `spider_idle` signal
         res = self.signals.send(signal=signals.spider_idle,
                                 dont_log=DontStopEngine)
         dont_stop = any(isinstance(x, Failure) and
                         isinstance(x.value, DontStopEngine)
                         for _, x in res)
         # more requests have been scheduled
         if not self.is_idle():
             self.processing.schedule(0)
         # slow down a little, but still run
         elif dont_stop or not self.stop_if_idle:
             self.processing.schedule(self.IDLE_CHECK_FREQUENCY)
         else:
             self.stop('finished')
     else:
         self.processing.schedule(self.QUEUE_CHECK_FREQUENCY)
Exemplo n.º 4
0
 def _process_queue(self):
     if not self.running:
         return
     elif self.paused:
         self.processing.schedule(self.PAUSED_CHECK_FREQUENCY)
     elif self.response_queue:
         response = self.response_queue.pop()
         if isinstance(response, Response):
             self.signals.send(signal=signals.response_downloaded,
                               response=response)
         dfd = defer_result(response, clock=self.clock)
         dfd.addBoth(self.pipeline.process_response)
         dfd.addBoth(self._handle_pipeline_result)
         dfd.addBoth(self._finalize_download)
         dfd.addBoth(lambda _: self.processing.schedule(0))
     elif self.is_idle():
         # send `spider_idle` signal
         res = self.signals.send(signal=signals.spider_idle,
                                 dont_log=DontStopEngine)
         dont_stop = any(
             isinstance(x, Failure) and isinstance(x.value, DontStopEngine)
             for _, x in res)
         # more requests have been scheduled
         if not self.is_idle():
             self.processing.schedule(0)
         # slow down a little, but still run
         elif dont_stop or not self.stop_if_idle:
             self.processing.schedule(self.IDLE_CHECK_FREQUENCY)
         else:
             self.stop('finished')
     else:
         self.processing.schedule(self.QUEUE_CHECK_FREQUENCY)