def run(self): while True: if not self.runValue.value: print "%s stops" % self.name break self.signalget() start_t = time.time() self.ctask = self.taskqueue.get() if self.ctask.empty: time.sleep(10) continue end_t = time.time() self.log_wait_task(end_t - start_t) self.log_get_task() start_t = time.time() c = Crawler().create(self.ctask.type, self.ctask.key, self.ctask.data) if c: try: c.crawl() success = True logger.info("CRAWL SUCCEED - <%s> %s" % (self.taskqueue.queueid, c)) end_t = time.time() self.log_done_task(end_t - start_t) except Exception: msg = get_exception_info() success = False logger.error("CRAWL FAILED - <%s> %s, %s" % (self.taskqueue.queueid, c, msg)) else: logger.error("CRAWL FAILED - <%s> %s" % (self.taskqueue.queueid, self.ctask)) success = False Scheduler.finish(self.ctask.type, self.ctask.key, c.data if c else {}, success)
if __name__ == "__main__": """ tasks = Queue() for i in range(10): tasks.put(Task(i)) main = Queue() p = WorkProcess(tasks) p.start() print "main process left" # """ # import uuid # data = { # # 'source': 'gome', # "priorcategory" : ["冰箱 洗衣机 空调","冰箱/冷柜","冰箱"], # "presentcategory": ["冰箱 洗衣机 空调","冰箱/冷柜","冰箱"], # "uuid": uuid.uuid1() # } # c = Crawler().create('ecommerce.gome.goodslist', 'cat10000054', {"priorcategory" : ["教育音像"]}) # c = Crawler().create('ecommerce.jd.goodscomment', '272765', data) # keys = 'http://list.yhd.com/c32159-0-0/' # c = Crawler().create('ecommerce.yhd.goodslist', keys, data) # print c # c.crawl() keys = '7天无理由退货正式写入新《消费者权益保护法》' c = Crawler().create('zjld.baidu.newstitle', keys, data={}) print c.crawl()
self.msgqueue.put(msg) if __name__ == "__main__": """ tasks = Queue() for i in range(10): tasks.put(Task(i)) main = Queue() p = WorkProcess(tasks) p.start() print "main process left" # """ # import uuid # data = { # # 'source': 'gome', # "priorcategory" : ["冰箱 洗衣机 空调","冰箱/冷柜","冰箱"], # "presentcategory": ["冰箱 洗衣机 空调","冰箱/冷柜","冰箱"], # "uuid": uuid.uuid1() # } # c = Crawler().create('ecommerce.gome.goodslist', 'cat10000054', {"priorcategory" : ["教育音像"]}) # c = Crawler().create('ecommerce.jd.goodscomment', '272765', data) # keys = 'http://list.yhd.com/c32159-0-0/' # c = Crawler().create('ecommerce.yhd.goodslist', keys, data) # print c # c.crawl() keys = '7天无理由退货正式写入新《消费者权益保护法》' c = Crawler().create('zjld.baidu.newstitle', keys, data={}) print c.crawl()