def run(train_set, test_set, ranker, num_interation, click_model, num_rankers): ndcg_scores = [] cndcg_scores = [] query_set = train_set.get_all_querys() index = np.random.randint(query_set.shape[0], size=num_interation) es = cma.CMAEvolutionStrategy(np.zeros((FEATURE_SIZE, )), 3) opt = cma.CMAOptions() opt.set('CSA_dampfac', 0.3) #es.sp.popsize = 500 record = [] batch_size = 10 for j in range(0, num_interation // batch_size): canditate_rankers = es.ask() fitness = np.zeros((len(canditate_rankers, ))) for i in index[j * batch_size:j * batch_size + batch_size]: qid = query_set[i] result_list = ranker.get_query_result_list(train_set, qid) clicked_doc, click_label, _ = click_model.simulate( qid, result_list, train_set) # if no clicks, skip. if len(clicked_doc) == 0: all_result = ranker.get_all_query_result_list(test_set) ndcg = evl_tool.average_ndcg_at_k(test_set, all_result, 10) cndcg = evl_tool.query_ndcg_at_k(train_set, result_list, qid, 10) ndcg_scores.append(ndcg) cndcg_scores.append(cndcg) continue # flip click label. exp: [1,0,1,0,0] -> [0,1,0,0,0] last_click = np.where(click_label == 1)[0][-1] click_label[:last_click + 1] = 1 - click_label[:last_click + 1] # bandit record record.append( (qid, result_list, click_label, ranker.get_current_weights())) fitness += ranker.fitness(canditate_rankers, record, train_set)[1:] es.tell(canditate_rankers, fitness / 10) best = np.argmax(fitness[1:]) ranker.assign_weights(canditate_rankers[best]) all_result = ranker.get_all_query_result_list(test_set) ndcg = evl_tool.average_ndcg_at_k(test_set, all_result, 10) cndcg = evl_tool.query_ndcg_at_k(train_set, result_list, qid, 10) ndcg_scores.append(ndcg) cndcg_scores.append(cndcg) final_weight = ranker.get_current_weights() print(ndcg, cndcg) return ndcg_scores, cndcg_scores, final_weight
def run(train_intents, ranker, num_interation, click_model, group_sequence): ndcg_scores = [] for x in range(len(train_intents) + 1): ndcg_scores.append([]) cndcg_scores = [] query_set = train_intents[0].get_all_querys() index = np.random.randint(query_set.shape[0], size=num_interation) num_iter = 0 current_train_set = train_intents[group_sequence[0]] for i in index: if num_iter % 50000 == 0 and num_iter > 0: print("Change intent to", int(num_iter / 50000), "group id", group_sequence[int(num_iter / 50000)]) all_result = ranker.get_all_query_result_list(current_train_set) ndcg = evl_tool.average_ndcg_at_k(current_train_set, all_result, 10) ndcg_scores[0].append(ndcg) current_train_set = train_intents[group_sequence[int(num_iter / 50000)]] qid = query_set[i] result_list, scores = ranker.get_query_result_list( current_train_set, qid) clicked_doc, click_label, _ = click_model.simulate( qid, result_list, current_train_set) ranker.update_to_clicks( click_label, result_list, scores, current_train_set.get_all_features_by_query(qid)) if num_iter % 1000 == 0: all_result = ranker.get_all_query_result_list(current_train_set) ndcg = evl_tool.average_ndcg_at_k(current_train_set, all_result, 10) ndcg_scores[0].append(ndcg) for intent in range(len(train_intents)): all_result = ranker.get_all_query_result_list( train_intents[intent]) ndcg = evl_tool.average_ndcg_at_k(train_intents[intent], all_result, 10) ndcg_scores[intent + 1].append(ndcg) cndcg = evl_tool.query_ndcg_at_k(current_train_set, result_list, qid, 10) cndcg_scores.append(cndcg) # print(num_iter, ndcg) num_iter += 1 return ndcg_scores, cndcg_scores
def run(train_set, test_set, ranker, eta, gamma, reward_method, num_interation, click_model): ndcg_scores = [] cndcg_scores = [] query_set = train_set.get_all_querys() index = np.random.randint(query_set.shape[0], size=num_interation) num_iter = 0 for i in index: qid = query_set[i] result_list = ranker.get_query_result_list(train_set, qid) clicked_doces, click_labels, _ = click_model.simulate( qid, result_list, train_set) # if no click data, skip this session if len(clicked_doces) == 0: if num_iter % 1000 == 0: all_result = ranker.get_all_query_result_list(test_set) ndcg = evl_tool.average_ndcg_at_k(test_set, all_result, 10) ndcg_scores.append(ndcg) cndcg = evl_tool.query_ndcg_at_k(train_set, result_list, qid, 10) cndcg_scores.append(cndcg) num_iter += 1 continue propensities = np.power( np.divide(1, np.arange(1.0, len(click_labels) + 1)), eta) # directly using pointwise rewards # rewards = get_DCG_rewards(click_labels, propensities, reward_method) # using listwise rewards rewards = get_DCG_MDPrewards(click_labels, propensities, reward_method, gamma=gamma) # ranker.record_episode(qid, result_list, rewards) ranker.update_policy(qid, result_list, rewards, train_set) if num_iter % 1000 == 0: all_result = ranker.get_all_query_result_list(test_set) ndcg = evl_tool.average_ndcg_at_k(test_set, all_result, 10) ndcg_scores.append(ndcg) cndcg = evl_tool.query_ndcg_at_k(train_set, result_list, qid, 10) cndcg_scores.append(cndcg) # all_result = ranker.get_all_query_result_list(test_set) # ndcg = evl_tool.average_ndcg_at_k(test_set, all_result, 10) # print(num_iter, ndcg) num_iter += 1 return ndcg_scores, cndcg_scores
def run(train_set, test_set, ranker, num_interation, click_model, num_rankers): ndcg_scores = [] cndcg_scores = [] query_set = train_set.get_all_querys() index = np.random.randint(query_set.shape[0], size=num_interation) num_interation = 0 for i in index: num_interation += 1 qid = query_set[i] result_list = ranker.get_query_result_list(train_set, qid) clicked_doc, click_label, _ = click_model.simulate(qid, result_list, train_set) # if no clicks, skip. if len(clicked_doc) == 0: all_result = ranker.get_all_query_result_list(test_set) ndcg = evl_tool.average_ndcg_at_k(test_set, all_result, 10) cndcg = evl_tool.query_ndcg_at_k(train_set, result_list, qid, 10) ndcg_scores.append(ndcg) cndcg_scores.append(cndcg) continue # flip click label. exp: [1,0,1,0,0] -> [0,1,0,0,0] last_click = np.where(click_label == 1)[0][-1] click_label[:last_click + 1] = 1 - click_label[:last_click + 1] # bandit record record = (qid, result_list, click_label, ranker.get_current_weights()) unit_vectors = ranker.sample_random_vectors(num_rankers) canditate_rankers = ranker.sample_canditate_rankers(unit_vectors) R = ranker.get_SNIPS(canditate_rankers, record) if R is not None: R = R[1:] A = (R - np.mean(R)) / np.std(R) gradient = np.dot(unit_vectors.T, A) / (num_rankers * sigma) ranker.update(gradient) all_result = ranker.get_all_query_result_list(test_set) ndcg = evl_tool.average_ndcg_at_k(test_set, all_result, 10) cndcg = evl_tool.query_ndcg_at_k(train_set, result_list, qid, 10) ndcg_scores.append(ndcg) cndcg_scores.append(cndcg) final_weight = ranker.get_current_weights() # print(num_interation, ndcg, cndcg) return ndcg_scores, cndcg_scores, final_weight
def run(train_set, test_set, ranker, num_interation, click_model, num_rankers): ndcg_scores = [] cndcg_scores = [] query_set = train_set.get_all_querys() index = np.random.randint(query_set.shape[0], size=num_interation) for i in index: qid = query_set[i] result_list = ranker.get_query_result_list(train_set, qid) clicked_doc, click_label, _ = click_model.simulate( qid, result_list, train_set) # if no clicks, skip. if len(clicked_doc) == 0: all_result = ranker.get_all_query_result_list(test_set) ndcg = evl_tool.average_ndcg_at_k(test_set, all_result, 10) cndcg = evl_tool.query_ndcg_at_k(train_set, result_list, qid, 10) ndcg_scores.append(ndcg) cndcg_scores.append(cndcg) continue # flip click label. exp: [1,0,1,0,0] -> [0,1,0,0,0] last_click = np.where(click_label == 1)[0][-1] click_label[:last_click + 1] = 1 - click_label[:last_click + 1] # bandit record record = (qid, result_list, click_label, ranker.get_current_weights()) unit_vectors = ranker.sample_unit_vectors(num_rankers) canditate_rankers = ranker.sample_canditate_rankers(unit_vectors) winner_rankers = ranker.infer_winners(canditate_rankers, record) if winner_rankers is not None: gradient = np.sum(unit_vectors[winner_rankers - 1], axis=0) / winner_rankers.shape[0] ranker.update(gradient) all_result = ranker.get_all_query_result_list(test_set) ndcg = evl_tool.average_ndcg_at_k(test_set, all_result, 10) cndcg = evl_tool.query_ndcg_at_k(train_set, result_list, qid, 10) ndcg_scores.append(ndcg) cndcg_scores.append(cndcg) final_weight = ranker.get_current_weights() print(ndcg, cndcg) return ndcg_scores, cndcg_scores, final_weight
def run(train_set, test_set, ranker, num_interation, click_model): ndcg_scores = [] cndcg_scores = [] query_set = train_set.get_all_querys() index = np.random.randint(query_set.shape[0], size=num_interation) num_iter = 0 for i in index: qid = query_set[i] result_list, scores = ranker.get_query_result_list(train_set, qid) clicked_doc, click_label, _ = click_model.simulate( qid, result_list, train_set) ranker.update_to_clicks(click_label, result_list, scores, train_set.get_all_features_by_query(qid)) if num_iter % 1000 == 0: all_result = ranker.get_all_query_result_list(test_set) ndcg = evl_tool.average_ndcg_at_k(test_set, all_result, 10) # print(ndcg) ndcg_scores.append(ndcg) cndcg = evl_tool.query_ndcg_at_k(train_set, result_list, qid, 10) cndcg_scores.append(cndcg) final_weight = ranker.get_current_weights() num_iter += 1 # print(num_iter, ndcg) return ndcg_scores, cndcg_scores, final_weight
def run(train_set, test_set, ranker, num_interation): ndcg_scores = [] cndcg_scores = [] query_set = train_set.get_all_querys() index = np.random.randint(query_set.shape[0], size=num_interation) num_iter = 0 for i in index: qid = query_set[i] result_list = ranker.get_query_result_list(train_set, qid) DCGs = get_real_DCGs(qid, result_list, train_set) # ranker.record_episode(qid, result_list, rewards) ranker.update_policy(qid, result_list, DCGs, train_set) if num_iter % 1000 == 0: all_result = ranker.get_all_query_result_list(test_set) ndcg = evl_tool.average_ndcg_at_k(test_set, all_result, 10) ndcg_scores.append(ndcg) print(num_iter, ndcg) cndcg = evl_tool.query_ndcg_at_k(train_set, result_list, qid, 10) cndcg_scores.append(cndcg) # all_result = ranker.get_all_query_result_list(test_set) # ndcg = evl_tool.average_ndcg_at_k(test_set, all_result, 10) num_iter += 1 return ndcg_scores, cndcg_scores
def run(train_set, test_set, ranker, num_interation, click_model): ndcg_scores = [] cndcg_scores = [] query_set = train_set.get_all_querys() index = np.random.randint(query_set.shape[0], size=num_interation) for i in index: qid = query_set[i] # 这里就是返回打分后的结果列表(长度不超过10),以及与当前query关联的所有文档的分数列表 result_list, scores = ranker.get_query_result_list(train_set, qid) # 三个返回值:已点击文档列表,所有文档的点击标签,用户是否被满足 clicked_doc, click_label, _ = click_model.simulate(qid, result_list, train_set) ranker.update_to_clicks(click_label, result_list, scores, train_set.get_all_features_by_query(qid)) all_result = ranker.get_all_query_result_list(test_set) ndcg = evl_tool.average_ndcg_at_k(test_set, all_result, 10) cndcg = evl_tool.query_ndcg_at_k(train_set, result_list, qid, 10) ndcg_scores.append(ndcg) cndcg_scores.append(cndcg) final_weight = ranker.get_current_weights() # print(ndcg, cndcg) return ndcg_scores, cndcg_scores, final_weight
def run(train_sets, rankers, num_interation, click_model): ndcg_scores = [] cndcg_scores = [] query_set = train_sets[0].get_all_querys() index = np.random.randint(query_set.shape[0], size=num_interation) intents_probs = [0.25,0.25,0.25,0.25] num_iter = 0 for i in index: current_intent = np.random.choice(4, 1, p=intents_probs)[0] current_train_set = train_sets[current_intent] qid = query_set[i] result_list, scores = rankers[current_intent].get_query_result_list(current_train_set, qid) clicked_doc, click_label, _ = click_model.simulate(qid, result_list, current_train_set) rankers[current_intent].update_to_clicks(click_label, result_list, scores, current_train_set.get_all_features_by_query(qid)) if num_iter % 1000 == 0: all_result = rankers[current_intent].get_all_query_result_list(current_train_set) ndcg = evl_tool.average_ndcg_at_k(current_train_set, all_result, 10) ndcg_scores.append(ndcg) cndcg = evl_tool.query_ndcg_at_k(current_train_set, result_list, qid, 10) cndcg_scores.append(cndcg) # print(num_iter, ndcg) num_iter += 1 return ndcg_scores, cndcg_scores
def run(train_set, ranker, num_interation, click_model, num_rankers): ndcg_scores = [] cndcg_scores = [] query_set = train_set.get_all_querys() index = np.random.randint(query_set.shape[0], size=num_interation) num_iter = 0 current_train_set = train_set for i in index: qid = query_set[i] query_features = current_train_set.get_all_features_by_query(qid) rankers = [] us = [] rankers.append(ranker) for i in range(num_rankers): new_ranker, new_u = ranker.get_new_candidate() rankers.append(new_ranker) us.append(new_u) (inter_list, a) = ranker.probabilistic_multileave(rankers, query_features, 10) _, click_label, _ = click_model.simulate(qid, inter_list, current_train_set) outcome = ranker.probabilistic_multileave_outcome( inter_list, rankers, click_label, query_features) winners = np.where(np.array(outcome) > outcome[0]) if np.shape(winners)[1] != 0: u = np.zeros(ranker.feature_size) for winner in winners[0]: u += us[winner - 1] u = u / np.shape(winners)[1] ranker.update_weights(u, alpha=ranker.learning_rate) if num_iter % 1000 == 0: all_result = ranker.get_all_query_result_list( current_train_set, ranker.get_current_weights()) ndcg = evl_tool.average_ndcg_at_k(current_train_set, all_result, 10) ndcg_scores.append(ndcg) cndcg = evl_tool.query_ndcg_at_k(current_train_set, inter_list, qid, 10) cndcg_scores.append(cndcg) num_iter += 1 return ndcg_scores, cndcg_scores
def run(train_set, test_set, ranker, eta, gamma, reward_method, num_interation, click_model): ndcg_scores = [] cndcg_scores = [] query_set = train_set.get_all_querys() index = np.random.randint(query_set.shape[0], size=num_interation) num_iter = 0 for i in index: qid = query_set[i] displayed_docids = [] for rank in range(10): result_list = ranker.get_query_result_list(train_set, qid) if len(displayed_docids) == len(result_list): break for docid in result_list: if docid not in displayed_docids: displayed_docids.append(docid) break candidate_docids = [] for id in result_list: if id == docid or id not in displayed_docids: candidate_docids.append(id) click_label, propensity = click_model.simulate_with_position( qid, docid, train_set, rank) dcg = 1 / np.log2(rank + 2.0) neg_reward = dcg * (click_label - 1) + ( (1 - propensity) / propensity) * dcg * click_label pos_reward = dcg / propensity * click_label reward = pos_reward + neg_reward # ranker.update_policy(qid, candidate_docids, [reward], train_set) if num_iter % 1000 == 0: all_result = ranker.get_all_query_result_list(test_set) ndcg = evl_tool.average_ndcg_at_k(test_set, all_result, 10) ndcg_scores.append(ndcg) # print(num_iter, ndcg) cndcg = evl_tool.query_ndcg_at_k(train_set, displayed_docids, qid, 10) cndcg_scores.append(cndcg) # all_result = ranker.get_all_query_result_list(test_set) # ndcg = evl_tool.average_ndcg_at_k(test_set, all_result, 10) # print(num_iter, ndcg) num_iter += 1 return ndcg_scores, cndcg_scores
def run(train_set, test_set, ranker, num_interation, click_model, batch_size): ndcg_scores = [] cndcg_scores = [] cmrr_scores = [] query_set = train_set.get_all_querys() index = np.random.randint(query_set.shape[0], size=num_interation) num_iter = 0 gradients = np.zeros(train_set._feature_size) cmrr = 0 cndcg = 0 for i in tqdm(index): num_iter += 1 qid = query_set[i] result_list, scores = ranker.get_query_result_list(train_set, qid) clicked_doc, click_label, _ = click_model.simulate( qid, result_list, train_set) gradients += ranker.update_to_clicks( click_label, result_list, scores, train_set.get_all_features_by_query(qid), return_gradients=True) cmrr += evl_tool.online_mrr_at_k(click_label, 10) cndcg += evl_tool.query_ndcg_at_k(train_set, result_list, qid, 10) if num_iter % batch_size == 0: cmrr_scores.append(cmrr / batch_size) cndcg_scores.append(cndcg / batch_size) cmrr = 0 cndcg = 0 # gradients = gradients/batch_size ranker.update_to_gradients(gradients) gradients = np.zeros(train_set._feature_size) all_result = ranker.get_all_query_result_list(test_set) ndcg = evl_tool.average_ndcg_at_k(test_set, all_result, 10) ndcg_scores.append(ndcg) # print(ndcg) final_weights = ranker.get_current_weights() return ndcg_scores, cndcg_scores, cmrr_scores, final_weights
def run(train_set, test_set, ranker1, ranker2, num_interation, click_model): click_predictor = SDBN() ndcg_scores1 = [] cndcg_scores1 = [] ndcg_scores2 = [] cndcg_scores2 = [] query_set = train_set.get_all_querys() np.random.shuffle(query_set) index = np.random.randint(query_set.shape[0], size=num_interation) pdf = np.random.normal(size=query_set.shape[0]) e_x = np.exp((pdf - np.max(pdf)) / 0.2) probs = e_x / e_x.sum(axis=0) querys = np.random.choice(query_set, replace=True, p=probs, size=num_interation) num_interaction = 0 correct = 0 wrong = 0 test1 = 0 test2 = 0 for qid in querys: num_interaction += 1 # qid = query_set[i] result_list1, scores1 = ranker1.get_query_result_list(train_set, qid) result_list2, scores2 = ranker2.get_query_result_list(train_set, qid) clicked_doc1, click_label1, _ = click_model.simulate( qid, result_list1, train_set) clicked_doc2, click_label2, _ = click_model.simulate( qid, result_list2, train_set) # last_exam = None # if len(clicked_doc2) > 0: # last_exam = np.where(click_label2 == 1)[0][-1] + 1 # # click_predictor.online_training(qid, result_list2, click_label2) # reduce, reduced_index = click_predictor.click_noise_reduce(qid, result_list2, click_label2, 0.5, 20) # # if reduce: # for rank in reduced_index: # # print(train_set.get_relevance_label_by_query_and_docid(qid, result_list2[rank])) # if train_set.get_relevance_label_by_query_and_docid(qid, result_list2[rank]) == 0: # correct += 1 # else: # wrong += 1 # # print(correct, wrong) clicked_doc_index = 0 for j in np.where(click_label2 == 1)[0]: rel = train_set.get_relevance_label_by_query_and_docid( qid, result_list2[clicked_doc_index]) if rel == 0: click_label2[j] = 0 clicked_doc_index += 1 ranker1.update_to_clicks(click_label1, result_list1, scores1, train_set.get_all_features_by_query(qid)) ranker2.update_to_clicks(click_label2, result_list2, scores2, train_set.get_all_features_by_query(qid), last_exam) all_result1 = ranker1.get_all_query_result_list(test_set) ndcg1 = evl_tool.average_ndcg_at_k(test_set, all_result1, 10) cndcg1 = evl_tool.query_ndcg_at_k(train_set, result_list1, qid, 10) all_result2 = ranker2.get_all_query_result_list(test_set) ndcg2 = evl_tool.average_ndcg_at_k(test_set, all_result2, 10) cndcg2 = evl_tool.query_ndcg_at_k(train_set, result_list2, qid, 10) ndcg_scores1.append(ndcg1) cndcg_scores1.append(cndcg1) ndcg_scores2.append(ndcg2) cndcg_scores2.append(cndcg2) final_weight1 = ranker1.get_current_weights() final_weight2 = ranker2.get_current_weights() test1 += ndcg1 test2 += ndcg2 print(test1, test2) print(np.mean(ndcg_scores1), np.mean(ndcg_scores2)) return ndcg_scores1, cndcg_scores1, final_weight1, ndcg_scores2, cndcg_scores2, final_weight2
def run(train_intents, ranker, num_interation, click_model, group_sequence): intents_probs = [0.7, 0.1, 0.1, 0.1] ndcg_scores = [] for x in range(len(train_intents) + 1): ndcg_scores.append([]) cndcg_scores = [] query_set = train_intents[0].get_all_querys() index = np.random.randint(query_set.shape[0], size=num_interation) num_iter = 0 for i in index: if num_iter % 50000 == 0 and num_iter > 0: over_all_ndcg = [] for intent in range(len(train_intents)): all_result = ranker.get_all_query_result_list( train_intents[intent]) ndcg = evl_tool.average_ndcg_at_k(train_intents[intent], all_result, 10) over_all_ndcg.append(ndcg) over_all_ndcg = np.array(over_all_ndcg) over_all_ndcg = np.sum(over_all_ndcg * intents_probs) ndcg_scores[0].append(over_all_ndcg) for k in range(len(intents_probs)): if int(num_iter / 50000) == k: intents_probs[k] = 0.7 else: intents_probs[k] = 0.1 current_intent = np.random.choice(4, 1, p=intents_probs)[0] current_train_set = train_intents[current_intent] qid = query_set[i] result_list, scores = ranker.get_query_result_list( current_train_set, qid) clicked_doc, click_label, _ = click_model.simulate( qid, result_list, current_train_set) ranker.update_to_clicks( click_label, result_list, scores, current_train_set.get_all_features_by_query(qid)) if num_iter % 1000 == 0: over_all_ndcg = [] for intent in range(len(train_intents)): all_result = ranker.get_all_query_result_list( train_intents[intent]) ndcg = evl_tool.average_ndcg_at_k(train_intents[intent], all_result, 10) ndcg_scores[intent + 1].append(ndcg) over_all_ndcg.append(ndcg) over_all_ndcg = np.array(over_all_ndcg) over_all_ndcg = np.sum(over_all_ndcg * intents_probs) ndcg_scores[0].append(over_all_ndcg) cndcg = evl_tool.query_ndcg_at_k(current_train_set, result_list, qid, 10) cndcg_scores.append(cndcg) # print(num_iter, ndcg) num_iter += 1 return ndcg_scores, cndcg_scores
def run(train_intents, ranker, num_interation, click_model, num_rankers): ndcg_scores = [[], [], [], [], []] cndcg_scores = [] query_set = train_intents[0].get_all_querys() index = np.random.randint(query_set.shape[0], size=num_interation) num_iter = 0 current_train_set = train_intents[0] for i in index: if num_iter % 50000 == 0 and num_iter > 0: # print("Change intent to", int(num_iter/10000)) all_result = ranker.get_all_query_result_list(current_train_set) ndcg = evl_tool.average_ndcg_at_k(current_train_set, all_result, 10) ndcg_scores[0].append(ndcg) current_train_set = train_intents[int(num_iter / 50000)] qid = query_set[i] result_list = ranker.get_query_result_list(current_train_set, qid) clicked_doc, click_label, _ = click_model.simulate(qid, result_list, current_train_set) # if no clicks, skip. if len(clicked_doc) == 0: if num_iter % 1000 == 0: all_result = ranker.get_all_query_result_list(current_train_set) ndcg = evl_tool.average_ndcg_at_k(current_train_set, all_result, 10) ndcg_scores[0].append(ndcg) for intent in range(4): all_result = ranker.get_all_query_result_list(train_intents[intent]) ndcg = evl_tool.average_ndcg_at_k(train_intents[intent], all_result, 10) ndcg_scores[intent + 1].append(ndcg) cndcg = evl_tool.query_ndcg_at_k(current_train_set, result_list, qid, 10) cndcg_scores.append(cndcg) # print(num_inter, ndcg, "continue") num_iter += 1 continue # flip click label. exp: [1,0,1,0,0] -> [0,1,0,0,0] last_click = np.where(click_label == 1)[0][-1] click_label[:last_click + 1] = 1 - click_label[:last_click + 1] # bandit record record = (qid, result_list, click_label, ranker.get_current_weights()) unit_vectors = ranker.sample_unit_vectors(num_rankers) canditate_rankers = ranker.sample_canditate_rankers( unit_vectors) # canditate_rankers are ranker weights, not ranker class # winner_rankers are index of candidates rankers who win the evaluation winner_rankers = ranker.infer_winners(canditate_rankers[:num_rankers], record) if winner_rankers is not None: gradient = np.sum(unit_vectors[winner_rankers - 1], axis=0) / winner_rankers.shape[0] ranker.update(gradient) if num_iter % 1000 == 0: all_result = ranker.get_all_query_result_list(current_train_set) ndcg = evl_tool.average_ndcg_at_k(current_train_set, all_result, 10) ndcg_scores[0].append(ndcg) for intent in range(4): all_result = ranker.get_all_query_result_list(train_intents[intent]) ndcg = evl_tool.average_ndcg_at_k(train_intents[intent], all_result, 10) ndcg_scores[intent + 1].append(ndcg) cndcg = evl_tool.query_ndcg_at_k(current_train_set, result_list, qid, 10) cndcg_scores.append(cndcg) # print(num_inter, ndcg) num_iter += 1 return ndcg_scores, cndcg_scores
def run(train_set, test_set, ranker, num_interation, click_model, num_rankers): ndcg_scores = [] cndcg_scores = [] query_set = train_set.get_all_querys() index = np.random.randint(query_set.shape[0], size=num_interation) num_interation = 0 correct = 0 wrong = 0 for i in index: num_interation += 1 qid = query_set[i] result_list = ranker.get_query_result_list(train_set, qid) clicked_doc, click_label, _ = click_model.simulate( qid, result_list, train_set) # if no clicks, skip. if len(clicked_doc) == 0: all_result = ranker.get_all_query_result_list(test_set) ndcg = evl_tool.average_ndcg_at_k(test_set, all_result, 10) cndcg = evl_tool.query_ndcg_at_k(train_set, result_list, qid, 10) ndcg_scores.append(ndcg) cndcg_scores.append(cndcg) continue # flip click label. exp: [1,0,1,0,0] -> [0,1,0,0,0] last_click = np.where(click_label == 1)[0][-1] click_label[:last_click + 1] = 1 - click_label[:last_click + 1] # bandit record record = (qid, result_list, click_label, ranker.get_current_weights()) unit_vectors = ranker.sample_unit_vectors(num_rankers) canditate_rankers = ranker.sample_canditate_rankers( unit_vectors ) # canditate_rankers are ranker weights, not ranker class winner_rankers = ranker.infer_winners_renomalize( canditate_rankers[:num_rankers], record ) # winner_rankers are index of candidates rankers who win the evaluation #### This part of code is used to test correctness of counterfactual evaluation #### # if winner_rankers is not None: # all_result = utility.get_query_result_list(ranker.get_current_weights(), train_set, qid) # current_ndcg = evl_tool.query_ndcg_at_k(train_set, all_result, qid, 10) # for weights in canditate_rankers[winner_rankers - 1]: # canditate_all_result = utility.get_query_result_list(weights, train_set, qid) # canditate_all_result_ndcg = evl_tool.query_ndcg_at_k(train_set, canditate_all_result, qid, 10) # # if canditate_all_result_ndcg >= current_ndcg: # correct += 1 # else: # wrong += 1 # print(correct, wrong, correct / (correct + wrong)) ###################################################################################### if winner_rankers is not None: gradient = np.sum(unit_vectors[winner_rankers - 1], axis=0) / winner_rankers.shape[0] ranker.update(gradient) all_result = ranker.get_all_query_result_list(test_set) ndcg = evl_tool.average_ndcg_at_k(test_set, all_result, 10) cndcg = evl_tool.query_ndcg_at_k(train_set, result_list, qid, 10) ndcg_scores.append(ndcg) cndcg_scores.append(cndcg) final_weight = ranker.get_current_weights() # print(num_interation, ndcg, cndcg) return ndcg_scores, cndcg_scores, final_weight