Пример #1
0
 def run(self):
     loop = asyncio.new_event_loop()
     while True:
         if self.q_material.empty():
             self.event.set()
             break
         material_id = self.q_material.get()
         print("开始抓取模块%s" % material_id)
         self.q_material.task_done()
         page = 1
         while True:
             _ = tools.get_random_num(12)
             json = loop.run_until_complete(
                 tools.get_goods_by_material_id(self.activity_id, _,
                                                material_id, page))
             if json is None:
                 print("抓取模块%s完毕,总页数:%s" % (material_id, page))
                 break
             if json.get('data') is None or len(json.get('data')) <= 0:
                 print("抓取模块%s完毕,总页数:%s" % (material_id, page))
                 break
             if json.get('data').get('list') is None or len(
                     json.get('data').get('list')) <= 0:
                 print("抓取模块%s完毕,总页数:%s" % (material_id, page))
                 break
             items = json.get('data').get('list')
             for item in items:
                 goods_id = item.get('product_id')
                 if self.global_goods_ids.__contains__(goods_id):
                     continue
                 self.global_goods_ids.append(goods_id)
                 one = loop.run_until_complete(
                     tools.get_goods_by_id(goods_id))
                 if one:
                     item = one.get('data')
                 else:
                     continue
                 # 判断栈是否已经满
                 if self.queue.full():
                     print("队列已满,总数%s" % self.queue.qsize())
                     # 栈满 线程进入等待
                     self.event.wait()
                     # 线程唤醒后将flag设置为False
                     if self.event.isSet():
                         self.event.clear()
                 else:
                     # 判断栈是否为空,为空则在向栈添加数据后,则将Flag设置为True,
                     # 唤醒前所有在等待的消费者线程
                     if self.queue.empty():
                         # 未满 向栈添加数据
                         self.queue.put(item)
                         # print("生产数据:%s" + str(item))
                         # 将Flag设置为True
                         self.event.set()
                     else:
                         # 未满 向栈添加数据
                         self.queue.put(item)
                         # print("生产数据:%s" + str(item))
             page += 1
     print(self.name + "结束")
Пример #2
0
 def run(self):
     loop = asyncio.new_event_loop()
     page = len(self.global_page)
     self.global_page.append(page)
     while True:
         json = loop.run_until_complete(tools.get_recommend_goods(page))
         if json is None:
             print("抓取精选完毕,总页数:%s" % (page))
             break
         if json.get('data') is None or len(json.get('data')) <= 0:
             print("抓取精选完毕,总页数:%s" % (page))
             break
         if json.get('data').get('list') is None or len(json.get('data').get('list')) <= 0:
             print("抓取精选完毕,总页数:%s" % (page))
             break
         items = json.get('data').get('list')
         for item in items:
             goods_id = item.get('product_id')
             if not goods_id:
                 continue
             sell_num = item.get('sell_num')
             if sell_num < 1:
                 continue
             if self.global_goods_ids.__contains__(goods_id):
                 print("重复")
                 continue
             self.global_goods_ids.append(goods_id)
             one = loop.run_until_complete(tools.get_goods_by_id(goods_id))
             if one:
                 item = one.get('data')
             else:
                 continue
             # 判断栈是否已经满
             if self.queue.full():
                 print("队列已满,总数%s" % self.queue.qsize())
                 # 栈满 线程进入等待
                 self.event.wait()
                 # 线程唤醒后将flag设置为False
                 if self.event.isSet():
                     self.event.clear()
             else:
                 # 判断栈是否为空,为空则在向栈添加数据后,则将Flag设置为True,
                 # 唤醒前所有在等待的消费者线程
                 if self.queue.empty():
                     # 未满 向栈添加数据
                     self.queue.put(item)
                     # print("生产数据:%s" + str(item))
                     # 将Flag设置为True
                     self.event.set()
                 else:
                     # 未满 向栈添加数据
                     self.queue.put(item)
                     self.event.set()
                     # print("生产数据:%s" + str(item))
         page = len(self.global_page)
         self.global_page.append(page)
     print(self.name + "结束")
Пример #3
0
 def run(self):
     loop = asyncio.new_event_loop()
     while True:
         if self.q_goods.empty():
             self.event.set()
             break
         goods_id = self.q_goods.get()
         print(u"开始抓取商品%s" % goods_id)
         self.q_goods.task_done()
         item = loop.run_until_complete(tools.get_goods_by_id(goods_id))
         if not item or not item.get('data'):
             continue
         if not item.get('data').get('name') or item.get('data').get(
                 'name') == '':
             print("下架: %s" % goods_id)
             continue
         if self.global_goods_ids.__contains__(goods_id):
             continue
         self.global_goods_ids.append(goods_id)
         # 判断栈是否已经满
         if self.queue.full():
             print("队列已满,总数%s" % self.queue.qsize())
             # 栈满 线程进入等待
             self.event.wait()
             # 线程唤醒后将flag设置为False
             if self.event.isSet():
                 self.event.clear()
         else:
             # 判断栈是否为空,为空则在向栈添加数据后,则将Flag设置为True,
             # 唤醒前所有在等待的消费者线程
             if self.queue.empty():
                 # 未满 向栈添加数据
                 self.queue.put(item.get('data'))
                 # print("生产数据:%s" + str(item))
                 # 将Flag设置为True
                 self.event.set()
             else:
                 # 未满 向栈添加数据
                 self.queue.put(item.get('data'))
                 # print("生产数据:%s" + str(item))
                 self.event.set()
     print(self.name + "结束")
Пример #4
0
 def run(self):
     loop = asyncio.new_event_loop()
     while True:
         if self.q_shops.empty():
             self.event.set()
             break
         shop_id = self.q_shops.get().shop_id
         print("开始抓取店铺%s" % shop_id)
         self.q_shops.task_done()
         page = 0
         shop_key = []
         while True:
             json = loop.run_until_complete(
                 tools.get_first_goods_by_shop(shop_id, page))
             if json is None:
                 print("抓取店铺%s完毕,总页数:%s" % (shop_id, page))
                 break
             if json.get('data') is None or len(json.get('data')) <= 0:
                 print("抓取店铺%s完毕,总页数:%s" % (shop_id, page))
                 break
             if json.get('data').get('list') is None or len(
                     json.get('data').get('list')) <= 0:
                 print("抓取店铺%s完毕,总页数:%s" % (shop_id, page))
                 break
             items = json.get('data').get('list')
             for item in items:
                 goods_id = item.get('product_id')
                 if self.global_goods_ids.__contains__(goods_id):
                     continue
                 self.global_goods_ids.append(goods_id)
                 one = loop.run_until_complete(
                     tools.get_goods_by_id(goods_id))
                 if one:
                     item = one.get('data')
                     shop_id = item.get('shop_id')
                     shop_tel = item.get('shop_tel')
                     key = '%s %s' % (shop_id, shop_tel)
                     if shop_key.__contains__(key):
                         continue
                     shop_key.append(key)
                 else:
                     continue
                 # 判断栈是否已经满
                 if self.queue.full():
                     print("队列已满,总数%s" % self.queue.qsize())
                     # 栈满 线程进入等待
                     self.event.wait()
                     # 线程唤醒后将flag设置为False
                     if self.event.isSet():
                         self.event.clear()
                 else:
                     # 判断栈是否为空,为空则在向栈添加数据后,则将Flag设置为True,
                     # 唤醒前所有在等待的消费者线程
                     if self.queue.empty():
                         # 未满 向栈添加数据
                         self.queue.put(item)
                         # print("生产数据:%s" + str(item))
                         # 将Flag设置为True
                         self.event.set()
                     else:
                         # 未满 向栈添加数据
                         self.queue.put(item)
                         self.event.set()
                         # print("生产数据:%s" + str(item))
             page += 1
         page = 0
         while True:
             json = loop.run_until_complete(
                 tools.get_goods_by_shop(shop_id, page))
             # print("%s第%s页" % (shop_id, page))
             # time.sleep(3)
             if json is None:
                 print("抓取店铺%s完毕,总页数:%s" % (shop_id, page))
                 break
             if json.get('data') is None or len(
                     json.get('data').get('list')) <= 0:
                 print("抓取店铺%s完毕,总页数:%s" % (shop_id, page))
                 break
             if json.get('data').get('list') is None or len(
                     json.get('data').get('list')) <= 0:
                 print("抓取店铺%s完毕,总页数:%s" % (shop_id, page))
                 break
             items = json.get('data').get('list')
             for item in items:
                 sell_num = item.get('sell_num')
                 goods_id = item.get('product_id')
                 one = loop.run_until_complete(
                     tools.get_goods_by_id(goods_id))
                 if one:
                     item = one.get('data')
                     shop_id = item.get('shop_id')
                     shop_tel = item.get('shop_tel')
                     key = '%s %s' % (shop_id, shop_tel)
                     if shop_key.__contains__(key):
                         continue
                     shop_key.append(key)
                 else:
                     print(item)
                     continue
                 if self.global_goods_ids.__contains__(goods_id):
                     continue
                 self.global_goods_ids.append(goods_id)
                 # 判断栈是否已经满
                 if self.queue.full():
                     print("队列已满,总数%s" % self.queue.qsize())
                     # 栈满 线程进入等待
                     self.event.wait()
                     # 线程唤醒后将flag设置为False
                     if self.event.isSet():
                         self.event.clear()
                 else:
                     # 判断栈是否为空,为空则在向栈添加数据后,则将Flag设置为True,
                     # 唤醒前所有在等待的消费者线程
                     if self.queue.empty():
                         # 未满 向栈添加数据
                         self.queue.put(item)
                         # print("生产数据:%s" + str(item))
                         # 将Flag设置为True
                         self.event.set()
                     else:
                         # 未满 向栈添加数据
                         self.queue.put(item)
                         # self.event.set()
                         # print("生产数据:%s" + str(item))
             page += 1
     print(self.name + "结束")
Пример #5
0
 def run(self):
     loop = asyncio.new_event_loop()
     while True:
         if self.q_category.empty():
             self.event.set()
             break
         categoryDto = self.q_category.get()
         print("开始抓取分类%s" % categoryDto["id"])
         self.q_category.task_done()
         page = 0
         while True:
             json = loop.run_until_complete(
                 tools.get_goods_by_category(categoryDto["cids"],
                                             categoryDto["id"],
                                             categoryDto["parent_id"],
                                             page))
             # print("%s第%s页" % (categoryDto["id"], page))
             # time.sleep(3)
             if json is None:
                 print("抓取分类%s完毕,总页数:%s" % (categoryDto["id"], page))
                 break
             if json.get('data') is None or len(json.get('data')) <= 0:
                 print("抓取分类%s完毕,总页数:%s" % (categoryDto["id"], page))
                 break
             if json.get('data').get('list') is None or len(
                     json.get('data').get('list')) <= 0:
                 print("抓取分类%s完毕,总页数:%s" % (categoryDto["id"], page))
                 break
             items = json.get('data').get('list')
             for item in items:
                 sell_num = item.get('sell_num')
                 goods_id = item.get('product_id')
                 if sell_num < 1:
                     continue
                 if self.global_goods_ids.__contains__(goods_id):
                     continue
                 self.global_goods_ids.append(goods_id)
                 one = loop.run_until_complete(
                     tools.get_goods_by_id(goods_id))
                 if one:
                     item = one.get('data')
                 else:
                     continue
                 # 判断栈是否已经满
                 if self.queue.full():
                     print("队列已满,总数%s" % self.queue.qsize())
                     # 栈满 线程进入等待
                     self.event.wait()
                     # 线程唤醒后将flag设置为False
                     if self.event.isSet():
                         self.event.clear()
                 else:
                     # 判断栈是否为空,为空则在向栈添加数据后,则将Flag设置为True,
                     # 唤醒前所有在等待的消费者线程
                     if self.queue.empty():
                         # 未满 向栈添加数据
                         self.queue.put(item)
                         # print("生产数据:%s" + str(item))
                         # 将Flag设置为True
                         self.event.set()
                     else:
                         # 未满 向栈添加数据
                         self.queue.put(item)
                         # print("生产数据:%s" + str(item))
             page += 1
     print(self.name + "结束")
Пример #6
0
 def run(self):
     loop = asyncio.new_event_loop()
     while True:
         if self.q_ids.empty():
             self.event.set()
             break
         id = self.q_ids.get()
         print("开始抓取ID%s" % id)
         self.q_ids.task_done()
         page = 0
         has_over = False
         while True:
             json = loop.run_until_complete(
                 tools.get_goods_by_detail_id(self.mertial_id, id, page))
             # time.sleep(3)
             if json is None and not has_over:
                 has_over = True
                 page += 1
                 continue
             if (json.get('data') is None
                     or len(json.get('data')) <= 0) and not has_over:
                 has_over = True
                 page += 1
                 continue
             if json is None and has_over:
                 print("抓取ID%s完毕,总页数:%s" % (id, page))
                 break
             if (json.get('data') is None
                     or len(json.get('data')) <= 0) and has_over:
                 print("抓取ID%s完毕,总页数:%s" % (id, page))
                 break
             items = json.get('data')
             for item in items:
                 sell_num = item.get('sell_num')
                 goods_id = item.get('product_id')
                 if sell_num < 1:
                     continue
                 if self.global_goods_ids.__contains__(goods_id):
                     continue
                 self.global_goods_ids.append(goods_id)
                 one = loop.run_until_complete(
                     tools.get_goods_by_id(goods_id))
                 if one:
                     item = one.get('data')
                 else:
                     continue
                 # 判断栈是否已经满
                 if self.queue.full():
                     print("队列已满,总数%s" % self.queue.qsize())
                     # 栈满 线程进入等待
                     self.event.wait()
                     # 线程唤醒后将flag设置为False
                     if self.event.isSet():
                         self.event.clear()
                 else:
                     # 判断栈是否为空,为空则在向栈添加数据后,则将Flag设置为True,
                     # 唤醒前所有在等待的消费者线程
                     if self.queue.empty():
                         # 未满 向栈添加数据
                         self.queue.put(item)
                         # print("生产数据:%s" + str(item))
                         # 将Flag设置为True
                         self.event.set()
                     else:
                         # 未满 向栈添加数据
                         self.queue.put(item)
                         # print("生产数据:%s" + str(item))
             page += 1
     print(self.name + "结束")