Пример #1
0
    def __init__(self, master='127.0.0.1:2181', type='spider'):
        """
        处理执行器
        :param master: 主节点地址
        :param type: 执行器类型
        :return:
        """
        Worker.__init__(self, master, type)

        # 注册任务队列
        self.processer_queue = FIFOQueue(self.redis, self.config.get("processor_queue"))
        # 注册存储数据库
        self.storage_pipline = MongodbStorage(self.mongodb, self.config.get("storage_db"))
Пример #2
0
 def __init__(self, master='127.0.0.1:2181', type='spider', concurrency=5, **kwargs):
     """
     异步爬虫执行器
     :param master: 主节点地址
     :param type: 执行器类型
     :param concurrency: 并发数
     :param kwargs:
     :return:
     """
     Worker.__init__(self, master, type)
     # 注册任务队列
     self.spider_queue = FIFOQueue(self.redis, self.config.get("spider_queue"))
     self.processer_queue = FIFOQueue(self.redis, self.config.get("processor_queue"))
     # 注册过滤器
     self.duplicate_filter = DuplicateFilter(self.redis, self.config.get("duplicate_set"))
     # 注册存储数据库
     self.storage_pipline = MongodbStorage(self.mongodb, self.config.get("storage_db"))
     # 并发线程数
     self.concurrency = concurrency
     # 内部队列
     self._queue = queues.Queue()