def __init__(self, master='127.0.0.1:2181', type='spider'): """ 处理执行器 :param master: 主节点地址 :param type: 执行器类型 :return: """ Worker.__init__(self, master, type) # 注册任务队列 self.processer_queue = FIFOQueue(self.redis, self.config.get("processor_queue")) # 注册存储数据库 self.storage_pipline = MongodbStorage(self.mongodb, self.config.get("storage_db"))
def __init__(self, master='127.0.0.1:2181', type='spider', concurrency=5, **kwargs): """ 异步爬虫执行器 :param master: 主节点地址 :param type: 执行器类型 :param concurrency: 并发数 :param kwargs: :return: """ Worker.__init__(self, master, type) # 注册任务队列 self.spider_queue = FIFOQueue(self.redis, self.config.get("spider_queue")) self.processer_queue = FIFOQueue(self.redis, self.config.get("processor_queue")) # 注册过滤器 self.duplicate_filter = DuplicateFilter(self.redis, self.config.get("duplicate_set")) # 注册存储数据库 self.storage_pipline = MongodbStorage(self.mongodb, self.config.get("storage_db")) # 并发线程数 self.concurrency = concurrency # 内部队列 self._queue = queues.Queue()