def build_postfetch_chain(self, inq): self._postfetch_chain = [] self.dedup_db = Factory.dedup_db(self.options) if self.dedup_db: self._postfetch_chain.append(self.dedup_db.loader()) self.warc_writer_processor = Factory.warc_writer_processor( self.options) self._postfetch_chain.append(self.warc_writer_processor) if self.dedup_db: self._postfetch_chain.append(self.dedup_db.storer()) if self.stats_processor: self._postfetch_chain.append(self.stats_processor) if self.playback_proxy: self._postfetch_chain.append( warcprox.ListenerPostfetchProcessor( self.playback_proxy.playback_index_db, self.options)) crawl_logger = Factory.crawl_logger(self.options) if crawl_logger: self._postfetch_chain.append( warcprox.ListenerPostfetchProcessor(crawl_logger, self.options)) for qualname in self.options.plugins or []: plugin = Factory.plugin(qualname, self.options) if hasattr(plugin, 'notify'): self._postfetch_chain.append( warcprox.ListenerPostfetchProcessor(plugin, self.options)) elif hasattr( plugin, 'CHAIN_POSITION') and plugin.CHAIN_POSITION == 'early': self._postfetch_chain.insert(0, plugin) else: self._postfetch_chain.append(plugin) self._postfetch_chain.append( warcprox.ListenerPostfetchProcessor(self.proxy.running_stats, self.options)) # chain them all up self._postfetch_chain[0].inq = inq for i in range(1, len(self._postfetch_chain)): self.chain(self._postfetch_chain[i - 1], self._postfetch_chain[i])
def storer(self, *args, **kwargs): return warcprox.ListenerPostfetchProcessor(self, self.options)