示例#1
0
    def __get_rec_item_ids(self, item_id, sim_cat_ids):
	"""
	获取推荐item_id
	Args:
	    item_id: query的item_id
	    sim_cat_ids : item_id的相似cat_id列表
	Returns:
	    [(rec_item_id, sim_value), (rec_item_id, sim_value), ...],长度为strategy.get_total_rec_num()
	"""

	target_item = self.dim_items_index.get(item_id, -1)
	if target_item == -1:
	    write_log(sys._getframe().f_lineno, "cannot get info of item_id:%d" % (item_id) )
	    return

	rec_items = []
	sim_cat_ids = sim_cat_ids[0: self.strategy.get_max_sim_cat_process()]
	count = 0
	timer_total = Timer()
	for i in range(0, len(sim_cat_ids)):
	    (cat_id, sim_value) = sim_cat_ids[i]
	    sim_item_ids = self.cat_to_item_rindex.get(cat_id, [])
	    if len(sim_item_ids) == 0 : 
		write_log(sys._getframe().f_lineno, "cat_id:%d has no item" % (cat_id) )
		continue
	    timer = Timer()
	    count += len(sim_item_ids)
	    res_list = self.__find_sim_item_from_a_list(target_item, sim_item_ids, i)
	    write_log(msg = "__find_sim_item_from_a_list cost time:%f, sim_item_ids size:%d, i:%d" % (timer.get_diff(), len(sim_item_ids), i))
	    rec_items.extend(res_list)
	write_log(msg = "all__find_sim_item_from_a_list cost time:%f, all_sim_item_ids size:%d" % (timer_total.get_diff(), count))

	write_log(msg = 'process item_id:%d, rec_items size:%d' % (item_id, len(rec_items)) )
	rec_items.sort(lambda y,x : cmp(x[1], y[1]))
	rec_items = rec_items[0: self.strategy.get_total_rec_num()]

	final_res = []
	for (id, value) in rec_items:
	    final_res.append(id)

	return final_res
示例#2
0
    def __find_sim_item_from_a_list(self, target_item, sim_item_ids, idx):
	target_title = target_item[1]
	
	count = 0
	rec_items = []
	for sim_id in sim_item_ids:
	    sim_item = self.dim_items_index.get(sim_id, -1)
	    if sim_item == -1 : continue

	    count += 1
	    if count > 20000: break

	    sim_title = sim_item[1]
	    timer = Timer()
	    val = self.__cal_title_sim(target_title, sim_title)
	    write_log(msg = "__cal_title_sim cost time:%f, target_title size:%d, sim_title size:%d" % (timer.get_diff(), len(target_title), len(sim_title)))
	    rec_items.append( (sim_id, val) )

	num_to_get = self.strategy.num_to_return_of_this_sim_cat(idx)
	if len(rec_items) > num_to_get:
	    rec_items.sort(lambda y,x : cmp(x[1], y[1]))
	    rec_items = rec_items[0: num_to_get]
	return rec_items