def update_main(): page = 20 count = 2000 more_data = True pool = ThreadPool(16) goods_obj = TbkGoods() last_id = '' while more_data: # more_data = False if last_id: cond = {'_id': {'$gt': last_id}} else: cond = {} goods_list = goods_obj.find_goods_by_cond( cond, page, count, ['title', 'num_id', 'update_time']) last_id = '' for goods in goods_list: last_id = goods['_id'] if not last_id: print("done") break # goods_list = list(goods_list) # if len(goods_list) < count: # more_data = False # break # else: # more_data = True LOG.info("page: %s ok", page) # pool.apply_async(update_worker, (goods_list, page)) page += 1 pool.close() pool.join()
def local_search(params): keyword = params.get("keyword") page = int(params.get("page", 1)) count = int(params.get("count", 20)) sort = int(params.get('tid', 1)) if isinstance(keyword, unicode): keyword = keyword.encode("utf-8") share_text_info = find_goods_info(keyword) if share_text_info: num_id = share_text_info['num_id'] save_goods_info(num_id, share_text_info) tmp = _ship_miniapp(share_text_info) return {'errcode': 0, 'data': [tmp]} data = [] sort_dict = {} if sort == 8: sort_dict.update({'coupon_amount': -1}) elif sort == 6: sort_dict.update({'sales': -1}) elif sort == 7: sort_dict.update({'coupon_fee': 1}) elif sort == 9: super_params = { 'keyword': keyword, 'page': page, 'count': count, 'yq': 0, 'tid': 0 } return super_search_miniapp(super_params) data = searcher.search(keyword, sort_dict, page=page, count=count) # ids = map(lambda x: int(x['id']), data) # LOG.info("data: %s", data) LOG.info('keyword: %s, ret: %s', keyword, len(data)) table_dict = {} ordered_id = [] for item in data: table = item.get("table", 'goods') table_dict.setdefault(table, []) table_dict[table].append(int(item['id'])) ordered_id.append(int(item['id'])) goods_obj = TbkGoods() data_dict = {} for table, ids in table_dict.items(): goods_obj.__table__ = table cond = {'num_id': {'$in': ids}} goods_list = goods_obj.find_goods_by_cond(cond, 1, count=100) for goods in goods_list: if goods.get('num_id') is None: LOG.info(goods['_id']) continue tmp = _ship_miniapp(goods) data_dict[goods['num_id']] = tmp result = [] for _id in ordered_id: tmp = data_dict.get(_id) if not tmp: continue result.append(tmp) return {'errcode': 0, 'data': result}
def list_goods(cid=None, page=1, count=20): goods_obj = TbkGoods() cond = {'coupon_expire': 0} if cid: cond.update({'category_id': cid}) goods = goods_obj.find_goods_by_cond(cond, page, count) goods_list = map(_ship_db_goods, goods) return {'goods': goods_list}
def get_data(self, params): table = 'haitao' res = self.validate(params) if res['errcode'] != 0: return res goods_obj = TbkGoods() goods_obj.__table__ = table sort = self.get_sort_field() ret = goods_obj.find_goods_by_cond({}, self.page, self.count) if sort: ret.sort(sort) return {'errcode': 0, 'data': ret}
def get_data(self, params): res = self.validate(params) if res['errcode'] != 0: return res cond = self.build_condition() sort = self.get_sort_field() LOG.info("cond: %s, sort: %s", cond, sort) goods_obj = TbkGoods() ret = goods_obj.find_goods_by_cond(cond, self.page, self.count) if sort: ret.sort(sort) return {'errcode': 0, 'data': ret}
def update_similar(): page = 6 count = 100 have_data = True goods_obj = TbkGoods() while have_data: have_data = False goods_list = goods_obj.find_goods_by_cond({}, page, count) for goods in goods_list: have_data = True if goods.get('similar_goods'): continue _id = goods['num_id'] similar_ids = crawler_similar(_id) if similar_ids is None: continue goods_instance = TbkGoods(num_id=_id) goods_instance.update({'similar_goods': similar_ids}) page += 1 print page
def update_one_by_one(table): page = 1 count = 1000 have_data = True update_count = 0 goods_obj = TbkGoods() goods_obj.__table__ = table LOG.info(table) while have_data: have_data = False goods_list = goods_obj.find_goods_by_cond({}, page, count) now = int(time.time() * 1000) for goods in goods_list: have_data = True update_time = goods.get('update_time') if update_time and now - update_time < 3600000: continue update_goods(goods['title'], goods['num_id'], table) page += 1 LOG.info("page: %s" % page) print(update_count)
def get_one_goods(cat=None): if cat is None: cat_obj = Category(recommend=1) cats = cat_obj.all_category() cat_list = [] for cat in cats: cat_list.append(int(cat['id'])) # cat_list = [1801, 16, 30, 50002766, 50006843, 122952001] cat_id = random.choice(cat_list) else: cat_id = cat start = time.time() - 8 * 86400 cond = { "coupon_amount": { '$gt': 5 }, "created": { '$gt': start * 1000 }, "sales": { '$gt': 3000 }, 'category_id': cat_id, "sended": { '$exists': False }, "coupon_expire": 0 } LOG.debug(cond) goods_obj = TbkGoods() goods = goods_obj.find_goods_by_cond(cond, 1, count=20) goods_list = list(goods) length = len(goods_list) if length == 0: return {} index = random.randint(0, length - 1) return goods_list[index]
def similar_goods(goods_id): goods = get_goods_info_by_id(goods_id) if not goods: goods_instance = TbkGoods(num_id=goods_id) goods = goods_instance.find_goods_by_id() if not goods: return True, [] similar_goods_ids = goods.get("similar_goods", []) goods_obj = TbkGoods() if similar_goods_ids: cond = {'num_id': {'$in': similar_goods_ids}, 'coupon_expire': 0} else: cond = {"coupon_expire": 0} goods_list = goods_obj.find_goods_by_cond( cond, 1, 100).sort([('sales', -1)]).limit(20) result = [] for goods in goods_list: tmp = _ship_db_goods(goods) if tmp['is_tmall']: tmp['is_tmall'] = 'inline' else: tmp['is_tmall'] = 'none' result.append(tmp) return True, result