def job(model_type, f, train_set, test_set, output_fold, batch_size, pc, ps): cm = SDBN(pc, ps) for r in range(1, 2): # np.random.seed(r) ranker = PDGDLinearRanker(FEATURE_SIZE, Learning_rate) print("PDGD fold{} {} run{} start!".format(f, model_type, r)) ndcg_scores, cndcg_scores, cmrr_scores, final_weights = run( train_set, test_set, ranker, NUM_INTERACTION, cm, batch_size) os.makedirs(os.path.dirname("{}/fold{}/".format(output_fold, f)), exist_ok=True) # create directory if not exist with open( "{}/fold{}/{}_run{}_ndcg.txt".format(output_fold, f, model_type, r), "wb") as fp: pickle.dump(ndcg_scores, fp) with open( "{}/fold{}/{}_run{}_cndcg.txt".format(output_fold, f, model_type, r), "wb") as fp: pickle.dump(cndcg_scores, fp) with open( "{}/fold{}/{}_run{}_cmrr.txt".format(output_fold, f, model_type, r), "wb") as fp: pickle.dump(cmrr_scores, fp) with open( "{}/fold{}/{}_run{}_weights.txt".format( output_fold, f, model_type, r), "wb") as fp: pickle.dump(final_weights, fp) print("PDGD fold{} {} run{} finished!".format(f, model_type, r))
def job(model_type, f, train_set, test_set, tau, sigma, gamma, num_rankers, learning_rate_decay, output_fold): if model_type == "perfect": pc = [0.0, 0.2, 0.4, 0.8, 1.0] ps = [0.0, 0.0, 0.0, 0.0, 0.0] elif model_type == "navigational": pc = [0.05, 0.3, 0.5, 0.7, 0.95] ps = [0.2, 0.3, 0.5, 0.7, 0.9] elif model_type == "informational": pc = [0.4, 0.6, 0.7, 0.8, 0.9] ps = [0.1, 0.2, 0.3, 0.4, 0.5] cm = SDBN(pc, ps) for r in range(1, 26): # np.random.seed(r) ranker = ESLinearRanker(FEATURE_SIZE, Learning_rate, sigma, tau, gamma, learning_rate_decay=learning_rate_decay) print("ES fold{} {} run{} start!".format(f, model_type, r)) ndcg_scores, cndcg_scores, final_weight = run(train_set, test_set, ranker, NUM_INTERACTION, cm, num_rankers) with open( "{}/fold{}/{}_sigma{}_run{}_ndcg.txt".format(output_fold, f, model_type, sigma, r), "wb") as fp: pickle.dump(ndcg_scores, fp) with open( "{}/fold{}/{}_sigma{}_run{}_cndcg.txt".format(output_fold, f, model_type, sigma, r), "wb") as fp: pickle.dump(cndcg_scores, fp) with open( "{}/fold{}/{}_sigma{}_run{}_final_weight.txt".format(output_fold, f, model_type, sigma, r), "wb") as fp: pickle.dump(final_weight, fp) print("ES sigma{} fold{} {} run{} finished!".format(output_fold, sigma, f, model_type, r))
def job(model_type, f, train_set, test_set, tau, r): if model_type == "perfect": pc = [0.0, 0.5, 1.0] ps = [0.0, 0.0, 0.0] elif model_type == "navigational": pc = [0.05, 0.5, 0.95] ps = [0.2, 0.5, 0.9] elif model_type == "informational": pc = [0.4, 0.7, 0.9] ps = [0.1, 0.3, 0.5] cm = SDBN(train_set, pc, ps) # np.random.seed(r) ranker1 = PDGDLinearRanker(FEATURE_SIZE, Learning_rate, tau) ranker2 = PDGDLinearRanker(FEATURE_SIZE, Learning_rate, tau) print("PDGD tau{} fold{} {} run{} start!".format(tau, f, model_type, r)) final_weight1, final_weight2 = run(train_set, test_set, ranker1, ranker2, NUM_INTERACTION, cm) with open( "./results/multiple_ranker/mq2007/PDGD/fold{}/{}_tau{}_run{}_ranker1_weights.txt" .format(f, model_type, tau, r), "wb") as fp: pickle.dump(final_weight1, fp) with open( "./results/multiple_ranker/mq2007/PDGD/fold{}/{}_tau{}_run{}_ranker2_weights.txt" .format(f, model_type, tau, r), "wb") as fp: pickle.dump(final_weight2, fp) print("PDGD tau{} fold{} {} run{} finished!".format(tau, f, model_type, r))
def job(model_type, f, train_set, test_set, tau): if model_type == "perfect": pc = [0.0, 0.5, 1.0] ps = [0.0, 0.0, 0.0] elif model_type == "navigational": pc = [0.05, 0.5, 0.95] ps = [0.2, 0.5, 0.9] elif model_type == "informational": pc = [0.4, 0.7, 0.9] ps = [0.1, 0.3, 0.5] cm = SDBN(pc, ps) for r in range(1, 26): # np.random.seed(r) ranker = PDGDLinearRanker(FEATURE_SIZE, Learning_rate, tau) print("PDGD tau{} fold{} {} run{} start!".format( tau, f, model_type, r)) ndcg_scores, cndcg_scores, final_weight = run(train_set, test_set, ranker, NUM_INTERACTION, cm) with open( "../results/exploration/mq2007/PDGD/fold{}/{}_tau{}_run{}_ndcg.txt" .format(f, model_type, tau, r), "wb") as fp: pickle.dump(ndcg_scores, fp) with open( "../results/exploration/mq2007/PDGD/fold{}/{}_tau{}_run{}_cndcg.txt" .format(f, model_type, tau, r), "wb") as fp: pickle.dump(cndcg_scores, fp) with open( "../results/exploration/mq2007/PDGD/fold{}/{}_tau{}_run{}_final_weight.txt" .format(f, model_type, tau, r), "wb") as fp: pickle.dump(final_weight, fp) print("PDGD tau{} fold{} {} run{} finished!".format( tau, f, model_type, r))
def job(model_type, Learning_rate, NUM_INTERACTION, f, train_set, intent_paths, output_fold, num_groups, group_sequence): if model_type == "perfect": pc = [0.0, 1.0] ps = [0.0, 0.0] elif model_type == "navigational": pc = [0.05, 0.95] ps = [0.2, 0.9] elif model_type == "informational": pc = [0.3, 0.7] ps = [0.1, 0.5] elif model_type == "noisy": pc = [0.4, 0.6] ps = [0.0, 0.0] # cm = PBM(pc, 1) cm = SDBN(pc, ps) for r in range(1, 26): random.seed(r) np.random.seed(r) datasets = get_groups_dataset(train_set, intent_paths, num_groups=num_groups) ranker = PDGDLinearRanker(FEATURE_SIZE, Learning_rate) print("PDGD intent change {} fold{} run{} start!".format( model_type, f, r)) ndcg_scores, cndcg_scores = run(datasets, ranker, NUM_INTERACTION, cm, group_sequence) # create directory if not exist os.makedirs(os.path.dirname("{}/current_intent/fold{}/".format( output_fold, f)), exist_ok=True) with open( "{}/current_intent/fold{}/{}_run{}_cndcg.txt".format( output_fold, f, model_type, r), "wb") as fp: pickle.dump(cndcg_scores, fp) with open( "{}/current_intent/fold{}/{}_run{}_ndcg.txt".format( output_fold, f, model_type, r), "wb") as fp: pickle.dump(ndcg_scores[0], fp) for i in range(len(ndcg_scores) - 1): # the intent ndcg start from 1. os.makedirs(os.path.dirname("{}/intent{}/fold{}/".format( output_fold, i + 1, f)), exist_ok=True) # create directory if not exist\ with open( "{}/intent{}/fold{}/{}_run{}_ndcg.txt".format( output_fold, i + 1, f, model_type, r), "wb") as fp: pickle.dump(ndcg_scores[i + 1], fp) print("PDGD intent change {} run{} finish!".format(model_type, r)) print()
def job(model_type, f, train_set, test_set, tau, step_size, gamma, num_rankers, learning_rate_decay, output_fold): if model_type == "perfect": pc = [0.0, 0.5, 1.0] ps = [0.0, 0.0, 0.0] elif model_type == "navigational": pc = [0.05, 0.5, 0.95] ps = [0.2, 0.5, 0.9] elif model_type == "informational": pc = [0.4, 0.7, 0.9] ps = [0.1, 0.3, 0.5] # if model_type == "perfect": # pc = [0.0, 0.2, 0.4, 0.8, 1.0] # ps = [0.0, 0.0, 0.0, 0.0, 0.0] # elif model_type == "navigational": # pc = [0.05, 0.3, 0.5, 0.7, 0.95] # ps = [0.2, 0.3, 0.5, 0.7, 0.9] # elif model_type == "informational": # pc = [0.4, 0.6, 0.7, 0.8, 0.9] # ps = [0.1, 0.2, 0.3, 0.4, 0.5] cm = SDBN(pc, ps) for r in range(1, 26): # np.random.seed(r) ranker = COLTRLinearRanker(FEATURE_SIZE, Learning_rate, step_size, tau, gamma, learning_rate_decay=learning_rate_decay) print("COTLR {} tau{} fold{} {} run{} start!".format( output_fold, tau, f, model_type, r)) ndcg_scores, cndcg_scores, final_weight = run(train_set, test_set, ranker, NUM_INTERACTION, cm, num_rankers) with open( "{}/fold{}/{}_tau{}_run{}_ndcg.txt".format( output_fold, f, model_type, tau, r), "wb") as fp: pickle.dump(ndcg_scores, fp) with open( "{}/fold{}/{}_tau{}_run{}_cndcg.txt".format( output_fold, f, model_type, tau, r), "wb") as fp: pickle.dump(cndcg_scores, fp) with open( "{}/fold{}/{}_tau{}_run{}_final_weight.txt".format( output_fold, f, model_type, tau, r), "wb") as fp: pickle.dump(final_weight, fp) print("COTLR {} tau{} fold{} {} run{} finished!".format( output_fold, tau, f, model_type, r)) utility.send_progress("@arvin {}".format(model_type), r, 25, "final ndcg {}".format(ndcg_scores[-1]))
def job(model_type, Learning_rate, NUM_INTERACTION, f, train_set, intent_paths, output_fold, num_groups): if model_type == "perfect": pc = [0.0, 1.0] ps = [0.0, 0.0] elif model_type == "navigational": pc = [0.05, 0.95] ps = [0.2, 0.9] elif model_type == "informational": pc = [0.3, 0.7] ps = [0.1, 0.5] elif model_type == "noisy": pc = [0.4, 0.6] ps = [0.0, 0.0] # cm = PBM(pc, 1) cm = SDBN(pc, ps) for r in range(1, 26): random.seed(r) np.random.seed(r) datasets = get_groups_dataset(train_set, intent_paths, num_groups=num_groups) # for i in range(len(datasets)): for i in [1, 0]: ranker = PDGDLinearRanker(FEATURE_SIZE, Learning_rate) print("PDGD intent fixed {} intent {} run{} start!".format( model_type, i, r)) ndcg_scores, cndcg_scores = run(datasets[i], ranker, NUM_INTERACTION, cm) os.makedirs(os.path.dirname("{}/group{}/fold{}/".format( output_fold, i + 1, f)), exist_ok=True) with open( "{}/group{}/fold{}/{}_run{}_ndcg.txt".format( output_fold, i + 1, f, model_type, r), "wb") as fp: pickle.dump(ndcg_scores, fp) with open( "{}/group{}/fold{}/{}_run{}_cndcg.txt".format( output_fold, i + 1, f, model_type, r), "wb") as fp: pickle.dump(cndcg_scores, fp) print("PDGD intent fixed {} intent {} run{} finished!".format( model_type, i, r)) print()
def job(model_type, f, train_set, intent_paths, tau, step_size, gamma, num_rankers, learning_rate_decay, output_fold): if model_type == "perfect": pc = [0.0, 1.0] ps = [0.0, 0.0] elif model_type == "navigational": pc = [0.05, 0.95] ps = [0.2, 0.9] elif model_type == "informational": pc = [0.3, 0.7] ps = [0.1, 0.5] # cm = PBM(pc, 1) cm = SDBN(pc, ps) for r in range(1, 26): random.seed(r) np.random.seed(r) datasets = get_groups_dataset(train_set, intent_paths) for i in range(len(datasets)): ranker = COLTRLinearRanker(FEATURE_SIZE, Learning_rate, step_size, tau, gamma, learning_rate_decay=learning_rate_decay) print("COLTR fixed intent {} fold{} run{} start!".format( model_type, f, r)) ndcg_scores, cndcg_scores = run(datasets[i], ranker, NUM_INTERACTION, cm, num_rankers) # create directory if not exist os.makedirs(os.path.dirname("{}/group{}/fold{}/".format( output_fold, i + 1, f)), exist_ok=True) with open( "{}/group{}/fold{}/{}_run{}_ndcg.txt".format( output_fold, i + 1, f, model_type, r), "wb") as fp: pickle.dump(ndcg_scores, fp) with open( "{}/group{}/fold{}/{}_run{}_cndcg.txt".format( output_fold, i + 1, f, model_type, r), "wb") as fp: pickle.dump(cndcg_scores, fp) print("COLTR fixed intent {} run{} finish!".format(model_type, r)) print()
def job(model_type, f, train_set, intent_paths, delta, alpha, FEATURE_SIZE, num_rankers, output_fold): if model_type == "perfect": pc = [0.0, 1.0] ps = [0.0, 0.0] elif model_type == "navigational": pc = [0.05, 0.95] ps = [0.2, 0.9] elif model_type == "informational": pc = [0.3, 0.7] ps = [0.1, 0.5] # cm = PBM(pc, 1) cm = SDBN(pc, ps) for r in range(1, 26): random.seed(r) np.random.seed(r) datasets = get_groups_dataset(train_set, intent_paths) # create directory if not exist for i in range(len(datasets)): ranker = ProbabilisticRanker(delta, alpha, FEATURE_SIZE) print("PDGD intent fixed {} intent {} run{} start!".format( model_type, i, r)) ndcg_scores, cndcg_scores = run(datasets[i], ranker, NUM_INTERACTION, cm, num_rankers) os.makedirs(os.path.dirname("{}/group{}/fold{}/".format( output_fold, i + 1, f)), exist_ok=True) with open( "{}/group{}/fold{}/{}_run{}_ndcg.txt".format( output_fold, i + 1, f, model_type, r), "wb") as fp: pickle.dump(ndcg_scores, fp) with open( "{}/group{}/fold{}/{}_run{}_cndcg.txt".format( output_fold, i + 1, f, model_type, r), "wb") as fp: pickle.dump(cndcg_scores, fp) print("PDGD intent fixed {} intent {} run{} finished!".format( model_type, i, r)) print()
def job(model_type, f, train_set, test_set, output_fold): if model_type == "perfect": # pc = [0.0, 0.2, 0.4, 0.8, 1.0] # pc = [0.0, 0.5, 1.0] pc = [0.0, 1.0] # ps = [0.0, 0.0, 0.0, 0.0, 0.0] # ps = [0.0, 0.0, 0.0] ps = [0.0, 0.0] elif model_type == "navigational": pc = [0.05, 0.3, 0.5, 0.7, 0.95] # pc = [0.05, 0.5, 0.95] ps = [0.2, 0.3, 0.5, 0.7, 0.9] # ps = [0.2, 0.5, 0.9] elif model_type == "informational": # pc = [0.4, 0.6, 0.7, 0.8, 0.9] # pc = [0.4, 0.7, 0.9] pc = [0.1, 0.9] # ps = [0.1, 0.2, 0.3, 0.4, 0.5] # ps = [0.1, 0.3, 0.5] ps = [0.1, 0.5] cm = SDBN(pc, ps) for r in range(1, 16): # np.random.seed(r) FEATURE_SIZE = 136 ranker = PDGDLinearRanker(FEATURE_SIZE, Learning_rate) print("PDGD fold{} {} run{} start!".format(f, model_type, r)) ndcg_scores, cndcg_scores, final_weights = run(train_set, test_set, ranker, NUM_INTERACTION, cm) os.makedirs(os.path.dirname("{}/fold{}/".format(output_fold, f)), exist_ok=True) # create directory if not exist with open( "{}/fold{}/{}_run{}_ndcg.txt".format(output_fold, f, model_type, r), "wb") as fp: pickle.dump(ndcg_scores, fp) with open( "{}/fold{}/{}_run{}_cndcg.txt".format(output_fold, f, model_type, r), "wb") as fp: pickle.dump(cndcg_scores, fp) with open( "{}/fold{}/{}_run{}_weights.txt".format(output_fold, f, model_type, r), "wb") as fp: pickle.dump(final_weights, fp) print("PDGD fold{} {} run{} finished!".format(f, model_type, r))
def job(model_type, f, train_set, test_set, tau, step_size, gamma, num_rankers, learning_rate_decay): if model_type == "perfect": pc = [0.0, 0.5, 1.0] ps = [0.0, 0.0, 0.0] elif model_type == "navigational": pc = [0.05, 0.5, 0.95] ps = [0.2, 0.5, 0.9] elif model_type == "informational": pc = [0.4, 0.7, 0.9] ps = [0.1, 0.3, 0.5] cm = SDBN(pc, ps) for r in range(1, 26): # np.random.seed(r) ranker = CMAESLinearRanker(FEATURE_SIZE, Learning_rate, step_size, tau, gamma, learning_rate_decay=learning_rate_decay) print("COTLR start!") ndcg_scores, cndcg_scores, final_weight = run(train_set, test_set, ranker, NUM_INTERACTION, cm, num_rankers) with open( "../results/COLTR/mq2007/fold{}/{}_tau{}_run{}_ndcg.txt". format(f, model_type, tau, r), "wb") as fp: pickle.dump(ndcg_scores, fp) with open( "../results/COLTR/mq2007/fold{}/{}_tau{}_run{}_cndcg.txt". format(f, model_type, tau, r), "wb") as fp: pickle.dump(cndcg_scores, fp) with open( "../results/COLTR/mq2007/fold{}/{}_tau{}_run{}_final_weight.txt" .format(f, model_type, tau, r), "wb") as fp: pickle.dump(final_weight, fp) print("COTLR tau{} fold{} {} run{} finished!".format( tau, f, model_type, r))
from clickModel.RCTR import RCTR from clickModel.Mixed import Mixed from utils import read_file as rf from utils import utility from dataset import LetorDataset # import matplotlib.pyplot as plt import numpy as np import multiprocessing as mp train_path = "../datasets/ltrc_yahoo/set1.train.txt" print("loading training set.......") train_set = LetorDataset(train_path, 700) pc = [0.05, 0.3, 0.5, 0.7, 0.95] ps = [0.2, 0.3, 0.5, 0.7, 0.9] mixed_models = [DCTR(pc), SDBN(pc, ps), UBM(pc)] datasets_simulator = [ ('SDBN', SDBN(pc, ps)), # ('SDCM', SDCM(pc)), # ('CM', CM(pc)), ('DCTR', DCTR(pc)), ('UBM', UBM(pc)), ('SDBN_reverse', SDBN_reverse(pc, ps)) ] click_model = RCTR() for dataset, simulator in datasets_simulator: for id in range(1, 16): click_log_path = "../click_logs/{}/train_set{}.txt".format(dataset, id) click_log = rf.read_click_log(click_log_path) click_model.train(click_log)
# plt.show() # %% if __name__ == "__main__": # %% train_path = "../datasets/ltrc_yahoo/set1.LetorDataset.txt" print("loading training set.......") with open(train_path, "rb") as fp: train_set = pickle.load(fp) # %% pc = [0.05, 0.3, 0.5, 0.7, 0.95] ps = [0.2, 0.3, 0.5, 0.7, 0.9] datasets_simulator = [ ('SDBN', SDBN(pc, ps)), # ('SDCM', SDCM(pc)), # ('CM', CM(pc)), ('DCTR', DCTR(pc)), ('UBM', UBM(pc)), # ('SDBN_reverse', SDBN_reverse(pc, ps)) ] progress = 0 for dataset, simulator in datasets_simulator: for id in range(2, 16): click_log_path = "../click_logs/{}/train_set{}.txt".format( dataset, id) test_click_log_path = "../click_logs/{}/seen_set{}.txt".format( dataset, id) query_frequency_path = "../click_logs/{}/query_frequency{}.txt".format(
print("loading testing set.......") test_set = LetorDataset(test_path, 700) # %% # pc = [0.4, 0.6, 0.7, 0.8, 0.9] # ps = [0.1, 0.2, 0.3, 0.4, 0.5] pc = [0.05, 0.3, 0.5, 0.7, 0.95] ps = [0.2, 0.3, 0.5, 0.7, 0.9] for id in range(1, 16): p1 = mp.Process(target=generate_dataset, args=(train_set, test_set, DCTR(pc), "../feature_click_datasets/DCTR/", id)) p2 = mp.Process(target=generate_dataset, args=(train_set, test_set, CM(pc), "../feature_click_datasets/CM/", id)) p3 = mp.Process(target=generate_dataset, args=(train_set, test_set, SDBN(pc, ps), "../feature_click_datasets/SDBN/", id)) p4 = mp.Process(target=generate_dataset, args=(train_set, test_set, SDCM(pc), "../feature_click_datasets/SDCM/", id)) p1.start() p2.start() p3.start() p4.start() p1.join() p2.join() p3.join() p4.join() print(" ")
import pickle import bz2 from dataset import LetorDataset from clickModel.SDBN import SDBN import sys import tensorflow as tf train_path = "../datasets/ltrc_yahoo/set1.train.txt" print("loading training set.......") dataset = LetorDataset(train_path, 700) model = NCM(64, 1024, 10240) pc = [0.05, 0.3, 0.5, 0.7, 0.95] ps = [0.2, 0.3, 0.5, 0.7, 0.9] simulator = SDBN(pc, ps) click_log_path = "../feature_click_datasets/{}/train_set_test.txt".format( "SDBN", "1") # click_log_path = "../click_logs/{}/train_set{}_small.txt".format("SDBN", "1") click_log = rf.read_click_log(click_log_path) model.initial_representation(click_log) # model.save_training_set_numpy(click_log, "test", "SDBN") # model.save_training_set(click_log, "../click_logs/{}/train_set{}_small_NCM.tfrecord".format("SDBN", "1"), "SDBN") model.save_training_tfrecord(click_log, "NCM_test.tfrecord", "SDBN") # model.save_training_set_numpy(click_log, "../click_logs/{}/train_set{}_NCM".format("SDBN", "1"), "SDBN") # model.save_training_set_numpy(click_log, "test", "SDBN")
# plt.show() # %% if __name__ == "__main__": # %% train_path = "../datasets/ltrc_yahoo/set1.train.txt" test_path = "../datasets/ltrc_yahoo/set1.test.txt" print("loading training set.......") train_set = LetorDataset(train_path, 700) # %% # print("loading testing set.......") # test_set = LetorDataset(test_path, 700) pc = [0.05, 0.3, 0.5, 0.7, 0.95] ps = [0.2, 0.3, 0.5, 0.7, 0.9] datasets_simulator = [('SDBN', SDBN(pc, ps)), ('SDCM', SDCM(pc)), ('CM', CM(pc)), ('DCTR', DCTR(pc))] # datasets = ['CM'] for dataset, simulator in datasets_simulator: for id in range(1, 2): click_log_path = "../feature_click_datasets/{}/train_set{}.txt".format( dataset, id) test_click_log_path = "../feature_click_datasets/{}/seen_set{}.txt".format( dataset, id) query_frequency_path = "../feature_click_datasets/{}/query_frequency{}.txt".format( dataset, id) click_log = rf.read_click_log(click_log_path) test_click_log = rf.read_click_log(test_click_log_path) query_frequency = rf.read_query_frequency(query_frequency_path) click_models = [SDBN(), SDCM(), CM(), DCTR()]
for freq in frequencies: perplexities = click_model.get_perplexity(np.array(test_logs[freq])) MSEs = click_model.get_MSE(np.array(test_logs[freq]), dataset, simulator) perplexity_line = "Frequency " + freq + " perplexities:" MSEs_line = "Frequency " + freq + " MSE:" for perp in perplexities: perplexity_line += " " + str(perp) for MSE in MSEs: MSEs_line += " " + str(MSE) f.write(perplexity_line + "\n") f.write(MSEs_line + "\n") f.close() if __name__ == "__main__": pc = [0.05, 0.3, 0.5, 0.7, 0.95] ps = [0.2, 0.3, 0.5, 0.7, 0.9] Mixed_models = [DCTR(pc), SDBN(pc, ps), UBM(pc)] # simulators = [SDBN(pc, ps), Mixed(Mixed_models), DCTR(pc), UBM(pc)] simulators = [SDBN(pc, ps), DCTR(pc), UBM(pc)] dataset_path = "../datasets/ltrc_yahoo/set1.train.txt" print("loading training set.......") dataset = LetorDataset(dataset_path, 700) for r in range(1, 2): for simulator in simulators: run(simulator, dataset, r)
generator = "Mixed" click_log_path = "../feature_click_datasets/{}/train_set1.txt".format(generator) test_click_log_path = "../feature_click_datasets/{}/seen_set1.txt".format(generator) click_log = rf.read_click_log(click_log_path) test_click_log = rf.read_click_log(test_click_log_path) # # dataset = tf.data.TFRecordDataset(filenames='../feature_click_datasets/{}/train_set1.tfrecord'.format(generator)) # # # test_dataset = tf.data.TFRecordDataset(filenames='../feature_click_datasets/SDBN/seen_set1.tfrecord') # # #%% pc = [0.05, 0.3, 0.5, 0.7, 0.95] ps = [0.2, 0.3, 0.5, 0.7, 0.9] Mixed_models = [DCTR(pc), CM(pc), SDBN(pc, ps), SDCM(pc), UBM(pc)] simulator = Mixed(Mixed_models) print(click_log.shape) print(test_click_log.shape) # click_model = LSTMv2(700, 1024, train_set, batch_size=128, epoch=5) print(click_model.get_MSE(test_click_log[np.random.choice(test_click_log.shape[0], 1000)], train_set, simulator)) click_model.train(dataset) print(click_model.get_MSE(test_click_log[np.random.choice(test_click_log.shape[0], 1000)], train_set, simulator)) click_model.model.save("../click_model_results/LSTM_models/{}_train_set1.h5".format(generator)) # test model
def run(train_set, test_set, ranker1, ranker2, num_interation, click_model): click_predictor = SDBN() ndcg_scores1 = [] cndcg_scores1 = [] ndcg_scores2 = [] cndcg_scores2 = [] query_set = train_set.get_all_querys() np.random.shuffle(query_set) index = np.random.randint(query_set.shape[0], size=num_interation) pdf = np.random.normal(size=query_set.shape[0]) e_x = np.exp((pdf - np.max(pdf)) / 0.2) probs = e_x / e_x.sum(axis=0) querys = np.random.choice(query_set, replace=True, p=probs, size=num_interation) num_interaction = 0 correct = 0 wrong = 0 test1 = 0 test2 = 0 for qid in querys: num_interaction += 1 # qid = query_set[i] result_list1, scores1 = ranker1.get_query_result_list(train_set, qid) result_list2, scores2 = ranker2.get_query_result_list(train_set, qid) clicked_doc1, click_label1, _ = click_model.simulate( qid, result_list1, train_set) clicked_doc2, click_label2, _ = click_model.simulate( qid, result_list2, train_set) # last_exam = None # if len(clicked_doc2) > 0: # last_exam = np.where(click_label2 == 1)[0][-1] + 1 # # click_predictor.online_training(qid, result_list2, click_label2) # reduce, reduced_index = click_predictor.click_noise_reduce(qid, result_list2, click_label2, 0.5, 20) # # if reduce: # for rank in reduced_index: # # print(train_set.get_relevance_label_by_query_and_docid(qid, result_list2[rank])) # if train_set.get_relevance_label_by_query_and_docid(qid, result_list2[rank]) == 0: # correct += 1 # else: # wrong += 1 # # print(correct, wrong) clicked_doc_index = 0 for j in np.where(click_label2 == 1)[0]: rel = train_set.get_relevance_label_by_query_and_docid( qid, result_list2[clicked_doc_index]) if rel == 0: click_label2[j] = 0 clicked_doc_index += 1 ranker1.update_to_clicks(click_label1, result_list1, scores1, train_set.get_all_features_by_query(qid)) ranker2.update_to_clicks(click_label2, result_list2, scores2, train_set.get_all_features_by_query(qid), last_exam) all_result1 = ranker1.get_all_query_result_list(test_set) ndcg1 = evl_tool.average_ndcg_at_k(test_set, all_result1, 10) cndcg1 = evl_tool.query_ndcg_at_k(train_set, result_list1, qid, 10) all_result2 = ranker2.get_all_query_result_list(test_set) ndcg2 = evl_tool.average_ndcg_at_k(test_set, all_result2, 10) cndcg2 = evl_tool.query_ndcg_at_k(train_set, result_list2, qid, 10) ndcg_scores1.append(ndcg1) cndcg_scores1.append(cndcg1) ndcg_scores2.append(ndcg2) cndcg_scores2.append(cndcg2) final_weight1 = ranker1.get_current_weights() final_weight2 = ranker2.get_current_weights() test1 += ndcg1 test2 += ndcg2 print(test1, test2) print(np.mean(ndcg_scores1), np.mean(ndcg_scores2)) return ndcg_scores1, cndcg_scores1, final_weight1, ndcg_scores2, cndcg_scores2, final_weight2
from clickModel.NCM_TF import NCM from utils import read_file as rf import numpy as np import pickle from dataset import LetorDataset from clickModel.SDBN import SDBN model = NCM(774, 100, 10240+1024+1, 2) pc = [0.05, 0.3, 0.5, 0.7, 0.95] ps = [0.2, 0.3, 0.5, 0.7, 0.9] simulator = SDBN(pc, ps) click_log_path = "../feature_click_datasets/{}/train_set{}.txt".format("SDBN", "_test") click_log = rf.read_click_log(click_log_path) model.initial_representation(click_log) # session = np.array(['1112', '16', '3', '45', '37', '31', '22', '5', '34', '17', '21', '0', '0', '1', '0', '0', '0', '0', '0', '0', '0' ]) # model.save_training_set(click_log, "") # with open("X.txt", "rb") as fp: # X = pickle.load(fp) # # with open("Y.txt", "rb") as fp: # Y = pickle.load(fp) # # train_path = "../datasets/ltrc_yahoo/set1.train.txt" #