def start(self): """启动这个worker 启动的时候,会将spider中的start_tasks移到待抓取队列 不会重复启动 """ if self.is_started: self.logger.warn("duplicate start") else: self.is_started = True self.worker_statistic.start_time = datetime.datetime.now() try: RecorderManager.instance().record_doing( record( self._worker_name, self.worker_statistic.start_time.strftime( "%Y-%m-%d %H:%M:%S"), get_class_path(self.spider.crawl_schedule.__class__), self.spider.crawl_schedule.schedule_kwargs, get_class_path(self.spider.__class__), self.spider.spider_kwargs)) except Exception, e: self.logger.warn("record worker failed:%s" % e) _move_start_tasks_to_crawl_schedule(self.spider.start_tasks, self.spider.crawl_schedule) ioloop.IOLoop.instance().add_timeout( datetime.timedelta( milliseconds=self.spider.crawl_schedule.interval), self.loop_get_and_execute) self.logger.info("start worker")
def start(self): """启动这个worker 启动的时候,会将spider中的start_tasks移到待抓取队列 不会重复启动 """ if self.is_started: self.logger.warn("duplicate start") else: self.is_started = True self.worker_statistic.start_time = datetime.datetime.now() try: RecorderManager.instance().record_doing( record( self._worker_name, self.worker_statistic.start_time. strftime("%Y-%m-%d %H:%M:%S"), get_class_path(self.spider.crawl_schedule.__class__), self.spider.crawl_schedule.schedule_kwargs, get_class_path(self.spider.__class__), self.spider.spider_kwargs)) except Exception, e: self.logger.warn("record worker failed:%s" % e) _move_start_tasks_to_crawl_schedule(self.spider.start_tasks, self.spider.crawl_schedule) ioloop.IOLoop.instance().add_timeout( datetime.timedelta( milliseconds=self.spider.crawl_schedule.interval), self.loop_get_and_execute) self.logger.info("start worker")
def recover(self): """以恢复模式启动这个worker 不会重复启动 """ if self.is_started: self.logger.warn("duplicate start") else: self.worker_statistic.start_time = datetime.datetime.now() RecorderManager.instance().record_doing( record(self._worker_name, self.worker_statistic. start_time.strftime("%Y-%m-%d %H:%M:%S"), get_class_path(self.spider.crawl_schedule.__class__), self.spider.crawl_schedule.schedule_kwargs, get_class_path(self.spider.__class__), self.spider.spider_kwargs)) self.is_started = True ioloop.IOLoop.instance().add_timeout( datetime.timedelta(milliseconds= self.spider.crawl_schedule.interval), self.loop_get_and_execute) self.logger.info("recover worker")
def recover(self): """以恢复模式启动这个worker 不会重复启动 """ if self.is_started: self.logger.warn("duplicate start") else: self.worker_statistic.start_time = datetime.datetime.now() RecorderManager.instance().record_doing( record( self._worker_name, self.worker_statistic.start_time.strftime( "%Y-%m-%d %H:%M:%S"), get_class_path(self.spider.crawl_schedule.__class__), self.spider.crawl_schedule.schedule_kwargs, get_class_path(self.spider.__class__), self.spider.spider_kwargs)) self.is_started = True ioloop.IOLoop.instance().add_timeout( datetime.timedelta( milliseconds=self.spider.crawl_schedule.interval), self.loop_get_and_execute) self.logger.info("recover worker")