Exemplo n.º 1
0
 def open(self, spider):
     self.spider = spider
     self.spider.set_logger(self.logger)
     self.spider.set_redis(self.redis_conn)
     self.spider.setup_stats()
     self.create_queues()
     self.setup_zookeeper()
     self.dupefilter = RFPDupeFilter(self.redis_conn,
                                     self.spider.name + ':dupefilter',
                                     self.rfp_timeout)
Exemplo n.º 2
0
 def open(self, spider):
     self.spider = spider
     self.queue = self.queue_cls(self.server, spider, self.queue_key)
     self.df = RFPDupeFilter(self.server,
                             self.dupefilter_key % {'spider': spider.name},
                             10)
     if self.idle_before_close < 0:
         self.idle_before_close = 0
     # notice if there are requests already in the queue to resume the crawl
     if len(self.queue):
         spider.log("Resuming crawl (%d requests scheduled)" %
                    len(self.queue))
Exemplo n.º 3
0
 def open(self, spider):
     self.spider = spider
     self.setup()
     self.dupefilter = RFPDupeFilter(self.redis_conn,
                                     self.spider.name + ':dupefilter',
                                     self.rfp_timeout)