def run(simulator, dataset, run): click_model = DFBNCM(256, 1024, 10240, 700, 700, dataset, model=load_model('../click_model_results/DFBNCM_model/{}/train_set{}.h5'.format(simulator.name, run))) click_log_path = "../click_logs/{}/train_set{}.txt".format(simulator.name, run) click_log = rf.read_click_log(click_log_path) click_model.initial_representation(click_log) # click_model.train_tfrecord('../click_logs/{}/train_set{}_DFBNCM.tfrecord'.format(simulator.name, run), # batch_size=64, # epoch=20, # steps_per_epoch=1) # # click_model.model.save("../click_model_results/DFBNCM_model/{}/train_set{}.h5".format(simulator.name, run)) test_click_log_path = "../click_logs/{}/seen_set{}.txt".format(simulator.name, run) query_frequency_path = "../click_logs/{}/query_frequency{}.txt".format(simulator.name, run) test_click_log = rf.read_click_log(test_click_log_path) query_frequency = rf.read_query_frequency(query_frequency_path) f = open("../click_model_results/{}/seen_set{}_{}_result.txt".format(simulator.name, run, "DFBNCM") , "w+") test_logs = {'10': [], '100': [], '1000': [], '10000': [] } for i in range(test_click_log.shape[0]): qid = test_click_log[i][0] test_logs[query_frequency[qid]].append(test_click_log[i]) frequencies = ['10', '100', '1000', '10000'] # i = 0 f.write("Click Model:" + "DFBNCM" + "\n") for freq in frequencies: perplexities = click_model.get_perplexity(np.array(test_logs[freq])) MSEs = click_model.get_MSE(np.array(test_logs[freq]), dataset, simulator) perplexity_line = "Frequency " + freq + " perplexities:" MSEs_line = "Frequency " + freq + " MSE:" for perp in perplexities: perplexity_line += " " + str(perp) for MSE in MSEs: MSEs_line += " " + str(MSE) f.write(perplexity_line + "\n") f.write(MSEs_line + "\n") f.close()
pc = [0.05, 0.3, 0.5, 0.7, 0.95] ps = [0.2, 0.3, 0.5, 0.7, 0.9] datasets_simulator = [('SDBN', SDBN(pc, ps)), ('SDCM', SDCM(pc)), ('CM', CM(pc)), ('DCTR', DCTR(pc))] # datasets = ['CM'] for dataset, simulator in datasets_simulator: for id in range(1, 2): click_log_path = "../feature_click_datasets/{}/train_set{}.txt".format( dataset, id) test_click_log_path = "../feature_click_datasets/{}/seen_set{}.txt".format( dataset, id) query_frequency_path = "../feature_click_datasets/{}/query_frequency{}.txt".format( dataset, id) click_log = rf.read_click_log(click_log_path) test_click_log = rf.read_click_log(test_click_log_path) query_frequency = rf.read_query_frequency(query_frequency_path) click_models = [SDBN(), SDCM(), CM(), DCTR()] processors = [] for cm in click_models: print(dataset, cm.name, "running!") f = open( "../click_model_results/{}/seen_set{}_{}_result.txt". format(dataset, id, cm.name), "w+") p = mp.Process(target=run, args=(click_log, test_click_log, query_frequency, cm, train_set, simulator, f)) p.start() processors.append(p)