示例#1
0
    def spider_end(self):
        self.record_end_time()

        if self._end_callback:
            self._end_callback()

        for parser in self._parsers:
            if not self._keep_alive:
                parser.close()
            parser.end_callback()

        if not self._keep_alive:
            # 关闭webdirver
            if Request.webdriver_pool:
                Request.webdriver_pool.close()

            # 关闭打点
            metrics.close()
        else:
            metrics.flush()

        # 计算抓取时长
        data = self._redisdb.hget(self._tab_spider_time,
                                  SPIDER_START_TIME_KEY,
                                  is_pop=True)
        if data:
            begin_timestamp = int(data)

            spand_time = tools.get_current_timestamp() - begin_timestamp

            msg = "《%s》爬虫结束,耗时 %s" % (
                self._spider_name,
                tools.format_seconds(spand_time),
            )
            log.info(msg)

            self.send_msg(msg)

        if self._keep_alive:
            log.info("爬虫不自动结束, 等待下一轮任务...")
        else:
            self.delete_tables(self._tab_spider_status)
示例#2
0
    def run(self):
        self.start_callback()

        for i in range(self._thread_count):
            parser_control = AirSpiderParserControl(self._memory_db,
                                                    self._item_buffer)
            parser_control.add_parser(self)
            parser_control.start()
            self._parser_controls.append(parser_control)

        self._item_buffer.start()

        self.distribute_task()

        while True:
            try:
                if self.all_thread_is_done():
                    # 停止 parser_controls
                    for parser_control in self._parser_controls:
                        parser_control.stop()

                    # 关闭item_buffer
                    self._item_buffer.stop()

                    # 关闭webdirver
                    if Request.webdriver_pool:
                        Request.webdriver_pool.close()

                    log.info("无任务,爬虫结束")
                    break

            except Exception as e:
                log.exception(e)

            tools.delay_time(1)  # 1秒钟检查一次爬虫状态

        self.end_callback()
        # 为了线程可重复start
        self._started.clear()
        # 关闭打点
        metrics.close()
示例#3
0
from feapder.utils import metrics

# 初始化打点系统
metrics.init()

metrics.emit_counter("key", count=1, classify="test")

metrics.close()