Exemplo n.º 1
0
 def setUp(self):
     self.clock = Clock()
     self.request_queue = MemoryQueue()
     self.response_queue = ResponseQueue()
     self.dwn = Downloader(Settings(self.default_settings),
                           self.request_queue,
                           self.response_queue,
                           download_handler=MockDownloaderHandler(
                               Settings()),
                           clock=self.clock)
     self.handler = self.dwn.download_handler
Exemplo n.º 2
0
 def _update_dwn(self, **kwargs):
     '''Update downloader with the new settings.
     '''
     new_settings = self.default_settings.copy()
     new_settings.update(**kwargs)
     self.dwn.processing.cancel()
     self.dwn = Downloader(Settings(new_settings),
                           self.request_queue,
                           self.response_queue,
                           download_handler=MockDownloaderHandler(
                               Settings()),
                           clock=self.clock)
     self.handler = self.dwn.download_handler
Exemplo n.º 3
0
    def setup(self):
        assert self.spider is not None, 'Spider is not set in Engine.'

        # IMPORTANT: order of the following initializations is very important
        # so please, think twice about any changes to it

        # initialize logging
        if self.settings.get_bool('LOG_ENABLED'):
            log.start(self.settings['LOG_FILE'], self.settings['LOG_LEVEL'],
                      self.settings['LOG_STDOUT'],
                      self.settings['LOG_ENCODING'])

        # initialize signals
        self.signals = SignalManager(self)

        #initialize stats
        stats_cls = load_object(self.settings.get('STATS_CLASS'))
        self.stats = stats_cls(self)

        # initialize downloader
        self.request_queue = PriorityQueue(lambda _: MemoryQueue())
        self.response_queue = ResponseQueue(
            self.settings.get_int('RESPONSE_ACTIVE_SIZE_LIMIT'))
        self.downloader = Downloader(self.settings,
                                     self.request_queue,
                                     self.response_queue,
                                     clock=self.clock)

        # initialize extensions
        self.extensions = ExtensionManager(self)
        # initialize downloader pipeline
        self.pipeline = PipelineManager(self)

        self.initialized = True

        # now that everything is ready, set the spider's engine
        self.spider.set_engine(self)