Пример #1
0
    def start(self):
        """启动这个worker
            启动的时候,会将spider中的start_tasks移到待抓取队列
            不会重复启动
        """
        if self.is_started:
            self.logger.warn("duplicate start")
        else:
            self.is_started = True
            self.worker_statistic.start_time = datetime.datetime.now()
            try:
                RecorderManager.instance().record_doing(
                    record(
                        self._worker_name,
                        self.worker_statistic.start_time.strftime(
                            "%Y-%m-%d %H:%M:%S"),
                        get_class_path(self.spider.crawl_schedule.__class__),
                        self.spider.crawl_schedule.schedule_kwargs,
                        get_class_path(self.spider.__class__),
                        self.spider.spider_kwargs))
            except Exception, e:
                self.logger.warn("record worker failed:%s" % e)

            _move_start_tasks_to_crawl_schedule(self.spider.start_tasks,
                                                self.spider.crawl_schedule)

            ioloop.IOLoop.instance().add_timeout(
                datetime.timedelta(
                    milliseconds=self.spider.crawl_schedule.interval),
                self.loop_get_and_execute)
            self.logger.info("start worker")
Пример #2
0
    def start(self):
        """启动这个worker
            启动的时候,会将spider中的start_tasks移到待抓取队列
            不会重复启动
        """
        if self.is_started:
            self.logger.warn("duplicate start")
        else:
            self.is_started = True
            self.worker_statistic.start_time = datetime.datetime.now()
            try:
                RecorderManager.instance().record_doing(
                    record(
                        self._worker_name,
                        self.worker_statistic.start_time.
                        strftime("%Y-%m-%d %H:%M:%S"),
                        get_class_path(self.spider.crawl_schedule.__class__),
                        self.spider.crawl_schedule.schedule_kwargs,
                        get_class_path(self.spider.__class__),
                        self.spider.spider_kwargs))
            except Exception, e:
                self.logger.warn("record worker failed:%s" % e)

            _move_start_tasks_to_crawl_schedule(self.spider.start_tasks,
                                                self.spider.crawl_schedule)

            ioloop.IOLoop.instance().add_timeout(
                datetime.timedelta(
                    milliseconds=self.spider.crawl_schedule.interval),
                self.loop_get_and_execute)
            self.logger.info("start worker")
Пример #3
0
    def recover(self):
        """以恢复模式启动这个worker
            不会重复启动
        """
        if self.is_started:
            self.logger.warn("duplicate start")
        else:
            self.worker_statistic.start_time = datetime.datetime.now()
            RecorderManager.instance().record_doing(
                record(self._worker_name, self.worker_statistic.
                       start_time.strftime("%Y-%m-%d %H:%M:%S"),
                       get_class_path(self.spider.crawl_schedule.__class__),
                       self.spider.crawl_schedule.schedule_kwargs,
                       get_class_path(self.spider.__class__),
                       self.spider.spider_kwargs))

            self.is_started = True
            ioloop.IOLoop.instance().add_timeout(
                datetime.timedelta(milliseconds=
                                   self.spider.crawl_schedule.interval),
                self.loop_get_and_execute)
            self.logger.info("recover worker")
Пример #4
0
    def recover(self):
        """以恢复模式启动这个worker
            不会重复启动
        """
        if self.is_started:
            self.logger.warn("duplicate start")
        else:
            self.worker_statistic.start_time = datetime.datetime.now()
            RecorderManager.instance().record_doing(
                record(
                    self._worker_name,
                    self.worker_statistic.start_time.strftime(
                        "%Y-%m-%d %H:%M:%S"),
                    get_class_path(self.spider.crawl_schedule.__class__),
                    self.spider.crawl_schedule.schedule_kwargs,
                    get_class_path(self.spider.__class__),
                    self.spider.spider_kwargs))

            self.is_started = True
            ioloop.IOLoop.instance().add_timeout(
                datetime.timedelta(
                    milliseconds=self.spider.crawl_schedule.interval),
                self.loop_get_and_execute)
            self.logger.info("recover worker")