Пример #1
0
    def run(self):
        while True:
            if not self.runValue.value:
                print "%s stops" % self.name
                break
            self.signalget()
            start_t = time.time()
            self.ctask = self.taskqueue.get()
            if self.ctask.empty:
                time.sleep(10)
                continue
            end_t = time.time()
            self.log_wait_task(end_t - start_t)
            self.log_get_task()
            start_t = time.time()

            c = Crawler().create(self.ctask.type, self.ctask.key, self.ctask.data)
            if c:
                try:
                    c.crawl()
                    success = True
                    logger.info("CRAWL SUCCEED - <%s> %s" % (self.taskqueue.queueid, c))
                    end_t = time.time()
                    self.log_done_task(end_t - start_t)
                except Exception:
                    msg = get_exception_info()
                    success = False
                    logger.error("CRAWL FAILED - <%s> %s, %s" % (self.taskqueue.queueid, c, msg))
            else:
                logger.error("CRAWL FAILED - <%s> %s" % (self.taskqueue.queueid, self.ctask))
                success = False

            Scheduler.finish(self.ctask.type, self.ctask.key, c.data if c else {}, success)
Пример #2
0
    def run(self):
        while True:
            if not self.runValue.value:
                print "%s stops" % self.name
                break
            self.signalget()
            start_t = time.time()
            self.ctask = self.taskqueue.get()
            if self.ctask.empty:
                time.sleep(10)
                continue
            end_t = time.time()
            self.log_wait_task(end_t - start_t)
            self.log_get_task()
            start_t = time.time()

            c = Crawler().create(self.ctask.type, self.ctask.key,
                                 self.ctask.data)
            if c:
                try:
                    c.crawl()
                    success = True
                    logger.info("CRAWL SUCCEED - <%s> %s" %
                                (self.taskqueue.queueid, c))
                    end_t = time.time()
                    self.log_done_task(end_t - start_t)
                except Exception:
                    msg = get_exception_info()
                    success = False
                    logger.error("CRAWL FAILED - <%s> %s, %s" %
                                 (self.taskqueue.queueid, c, msg))
            else:
                logger.error("CRAWL FAILED - <%s> %s" %
                             (self.taskqueue.queueid, self.ctask))
                success = False

            Scheduler.finish(self.ctask.type, self.ctask.key,
                             c.data if c else {}, success)
Пример #3
0

if __name__ == "__main__":
    """
    tasks = Queue()
    for i in range(10):
        tasks.put(Task(i))
    main = Queue()
    p = WorkProcess(tasks)
    p.start()
    print "main process left"
    # """
    # import uuid
    # data = {
    #   #  'source': 'gome',
    #     "priorcategory" : ["冰箱 洗衣机 空调","冰箱/冷柜","冰箱"],
    #     "presentcategory": ["冰箱 洗衣机 空调","冰箱/冷柜","冰箱"],
    #     "uuid": uuid.uuid1()
    # }
#    c = Crawler().create('ecommerce.gome.goodslist', 'cat10000054', {"priorcategory" : ["教育音像"]})
    # c = Crawler().create('ecommerce.jd.goodscomment', '272765', data)
    # keys = 'http://list.yhd.com/c32159-0-0/'
    # c = Crawler().create('ecommerce.yhd.goodslist', keys, data)
    # print c
 #   c.crawl()


    keys = '7天无理由退货正式写入新《消费者权益保护法》'
    c = Crawler().create('zjld.baidu.newstitle', keys, data={})
    print c.crawl()
Пример #4
0
        self.msgqueue.put(msg)


if __name__ == "__main__":
    """
    tasks = Queue()
    for i in range(10):
        tasks.put(Task(i))
    main = Queue()
    p = WorkProcess(tasks)
    p.start()
    print "main process left"
    # """
    # import uuid
    # data = {
    #   #  'source': 'gome',
    #     "priorcategory" : ["冰箱 洗衣机 空调","冰箱/冷柜","冰箱"],
    #     "presentcategory": ["冰箱 洗衣机 空调","冰箱/冷柜","冰箱"],
    #     "uuid": uuid.uuid1()
    # }
    #    c = Crawler().create('ecommerce.gome.goodslist', 'cat10000054', {"priorcategory" : ["教育音像"]})
    # c = Crawler().create('ecommerce.jd.goodscomment', '272765', data)
    # keys = 'http://list.yhd.com/c32159-0-0/'
    # c = Crawler().create('ecommerce.yhd.goodslist', keys, data)
    # print c
    #   c.crawl()

    keys = '7天无理由退货正式写入新《消费者权益保护法》'
    c = Crawler().create('zjld.baidu.newstitle', keys, data={})
    print c.crawl()