def shop_change(shop): shop_change = trans_shop_change(shop) shop_id = shop['shopid'] result = redis_db.hget("historical_shops",shop_id) if result is not None: # print('旧数据,比对') historical_shop = json.loads(result) change = json_tools.diff(dict(shop_change), historical_shop) if len(change) == 0: print('shopee_shope update time') cache_shop_update_time.put(shop_id) else: print('shopee_shope update,hopee_shope_change insert') cache_shop_update.put(json.dumps(dict(shop),cls = DateEnconding)) cache_shop_change_insert.put(json.dumps(dict(shop),cls = DateEnconding)) else: print('shopee_shope,shopee_shope_change insert') cache_shop_insert.put(json.dumps(dict(shop),cls = DateEnconding)) # 无论是变更还是新增,都要同步到redis redis_db.hset('historical_shops',shop_id,json.dumps(dict(shop_change),cls = DateEnconding)) # 传入队列内开始遍历商铺所有商品 new_task = trans_task(shop) new_task['parse_type'] = 'goods_list' new_task['level'] = 1 queue_shopee.put(json.dumps(dict(new_task),cls = DateEnconding))
def product_list(task): shopid = task['shopid'] try: level = task['level'] if level ==1 : # by= pop 流行 by= ctime 最新 by= sales 销量 url = "https://ph.xiapibuy.com/api/v2/search_items/?by=sales&limit=30&match_id={}&newest=0&order=desc&page_type=shop&version=2".format( shopid) task['url'] = url total_count = product_list_parse(task) page_num = int(total_count/30) if page_num > 100: page_num = 100 for i in range(1,page_num+1): url = "https://ph.xiapibuy.com/api/v2/search_items/?by=sales&limit=30&match_id={}&newest={}&order=desc&page_type=shop&version=2".format( shopid,i*30) # print(url) new_task = trans_task(task) new_task['level'] = 2 new_task['url'] = url queue_shopee.put(json.dumps(dict(new_task),cls = DateEnconding)) else: product_list_parse(task) except Exception as e: traceback.print_exc() print('goods_list异常--',e) queue_shopee.put(json.dumps(dict(task),cls = DateEnconding))
def shopee_search(): while True: queue_result = queue_shopee_search.get_nowait() # print(queue_result) if queue_result is not None: result_f = json.loads(queue_result.decode()) if isinstance(result_f,dict): task = trans_task(result_f) deal(task) elif isinstance(result_f,list): for result_f2 in result_f: if 'fastjson' not in result_f2: task = trans_task(result_f2) deal(task) else: print(type(result_f)) else: time.sleep(10)
def product_list_parse(task): url = task['url'] try: # print(url) html = get_html(url) # print(html) json_str = json.loads(html) total_count = json_str['total_count'] for item in json_str['items']: new_task = trans_task(task) new_task['spu'] = item['itemid'] new_task['parse_type'] = 'goods' historical_sold = item['historical_sold'] # historical_sold 历史总销量为0,则抛弃 if historical_sold == 0: continue queue_shopee.put(json.dumps(dict(new_task),cls = DateEnconding)) return total_count except Exception as e: traceback.print_exc() print('goods_list异常--',e) queue_shopee.put(json.dumps(dict(task),cls = DateEnconding))