def train_and_evaluate(player1, player2, games, evaluation_period, experiment_name=EXPERIMENT_NAME, silent=False): simulation = Othello(player1, player2) start_time = datetime.now() for i in range(games // evaluation_period): simulation.run_simulations(episodes=evaluation_period, silent=silent) evaluation.evaluate_all([player1, player2], 20) if not silent: Printer.print_inplace( "Episode %s/%s" % (evaluation_period * (i + 1), games), evaluation_period * (i + 1) / games * 100, datetime.now() - start_time) # save artifacts player1.plotter.clear_plots(experiment_name) for player in (player1, player2): player.plotter.plot_results(experiment_name) player.plotter.plot_scores(experiment_name) player.save(experiment_name)
def train_continuous(player1, player2, games, experiment_name, iterations, start_time=datetime.now()): """Trains a pair of players for @games, selects the stronger of both to continue and repeats the process for @iterations""" print("Experiment name: %s" % experiment_name) # Initial evaluation evaluate_all([player1, player2], 8) for i in range(iterations): train(player1, player2, games, experiment_name) evaluate_all([player1, player2], 20) generate_and_save_artefacts([player1, player2], experiment_name) player1, player2 = (player1, player2) if compare_players(player1, player2, silent=(i != iterations-1)) >= 0 else (player2, player1) player2 = player1.copy_with_inversed_color() print("Iteration %s/%s Simulation time: %s\n" % (i, iterations, str(datetime.now()-start_time).split(".")[0])) return player1, player2
def train_continuous_asymmetrical(player1, games, experiment_name, iterations, start_time=datetime.now(), best=None): """"Only train player1 while player2 is fixed to the currently best iteration and does not train""" print("Experiment name: %s" % experiment_name) if not best: best = player1.copy_with_inversed_color() best.add_to_name("-BEST-") best.replaced = [] # Initial evaluation evaluate_all([player1, best], 8) # continuously improve for i in range(iterations): best.train = False train(player1, best, games, silent=True) evaluate_all([player1, best], 16) generate_and_save_artefacts([player1, best], experiment_name) if compare_players(player1, best, games=40, silent=(i != iterations-1)) >= 0: best.value_function = player1.value_function.copy() best.plotter = player1.plotter.copy() best.opponents = player1.opponents.copy() best.replaced.append(i) Printer.print_inplace(text="Iteration %s/%s" % (i+1, iterations), percentage=100 * (i+1) / (iterations), time_taken=str(datetime.now() - start_time).split(".")[0], comment=" | Best player replaced at: %s\n" % best.replaced) print() evaluate_all([player1, best], 80, silent=False) return player1, best
# start evaluation for i in range(0, number_of_evaluation): q = train_queries[random.choice(train_queries.keys())] l_rem = rem_learner.get_ranked_list(q) l_full = full_learner.get_ranked_list(q) c_rem = user_model.get_clicks(l_rem, q.get_labels()) c_full = user_model.get_clicks(l_full, q.get_labels()) s_rem = rem_learner.update_solution(c_rem) s_full = full_learner.update_solution(c_full) rem_ndcg_evaluation_train.append( evaluation.evaluate_all(s_rem, train_queries)) full_ndcg_evaluation_train.append( evaluation.evaluate_all(s_full, train_queries)) rem_ndcg_evaluation_test.append( evaluation.evaluate_all(s_rem, test_queries)) full_ndcg_evaluation_test.append( evaluation.evaluate_all(s_full, test_queries)) # write the result to file timestamp = datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S') f = open( "../../output/experiment2/" + timestamp + "k_" + str(k) + "d_" + str(d) + "rem_train.txt", "w") f.write("%s" % str(rem_ndcg_evaluation_train) + "\n") f.close()
("zjet_loss", "Validation/Z+jet")], title="Loss(CrossEntropy)", xlabel="Step", ylabel="Loss") meter.add_plot( x="step", ys=[("train_acc", "Train/Dijet"), ("dijet_acc", "Validation/Dijet"), ("zjet_acc", "Validation/Z+jet")], title="Accuracy", xlabel="Step", ylabel="Acc.") meter.add_plot( x="step", ys=[("train_auc", "Train/Dijet"), ("dijet_auc", "Validation/Dijet"), ("zjet_auc", "Validation/Z+jet")], title="AUC", xlabel="Step", ylabel="AUC") meter.finish() config.finish() return log_dir if __name__ == "__main__": from evaluation import evaluate_all log_dir = train() evaluate_all(log_dir)
import sys, random try: import include, pickle except: pass import retrieval_system, environment, evaluation, query learner = retrieval_system.ListwiseLearningSystem(64, '-w random -c comparison.ProbabilisticInterleave -r ranker.ProbabilisticRankingFunction -s 3 -d 0.1 -a 0.01') user_model = environment.CascadeUserModel('--p_click 0:0.0,1:1 --p_stop 0:0.0,1:0.0') evaluation = evaluation.NdcgEval() training_queries = query.load_queries(sys.argv[1], 64) test_queries = query.load_queries(sys.argv[2], 64) i=0 for i in range(10): q = training_queries[random.choice(training_queries.keys())] l = learner.get_ranked_list(q) c = user_model.get_clicks(l, q.get_labels()) s = learner.update_solution(c) print i i=i+1 print evaluation.evaluate_all(s, test_queries) pickle.dump(learner.ranker, open( "QueryData/"+"generalRanker"+".data", "wb" ) )
import sys import include import evaluation, query, ranker import numpy as np from ranker.AbstractRankingFunction import AbstractRankingFunction evaluation = evaluation.NdcgEval() bm25ranker = AbstractRankingFunction(["ranker.model.BM25"], 'first', 3, sample="utils.sample_fixed") queries = query.load_queries(sys.argv[1], 64) #print evaluation.evaluate_all(bm25ranker, queries) fh = open(sys.argv[1] + ".out.missing-b0.45.txt", "w") for k1 in sorted([2.6, 2.5]): for b in sorted([0.45]): #for k1 in np.arange(19.5, 100, 0.5): # for b in np.arange(-1, 1.2, 0.1): #for k3 in np.arange(100*itt, 100*(itt+1), 10): k3 = 0.0 bm25ranker.update_weights(np.array([k1,k3,b])) print >> fh, "k1:%f k3:%f b:%f score:%f" % (k1, k3, b, evaluation.evaluate_all(bm25ranker, queries)) fh.close()
print "d : " + str(d) print "========================" # start k number of runs for m in range(0, k): # for each k, we have different A matrix # as mentioned on the REMBO paper rem_learner = retrieval_system.ListwiseLearningSystemREMBO(64,d,'-w random -c comparison.ProbabilisticInterleave -r ranker.ProbabilisticRankingFunctionREMBO -s 3 -d 0.1 -a 0.01') # start evaluation for i in range(0,number_of_evaluation): q = train_queries[random.choice(train_queries.keys())] l = rem_learner.get_ranked_list(q) c = user_model.get_clicks(l, q.get_labels()) s = rem_learner.update_solution(c) temp_ndcg_evaluation_train.append(evaluation.evaluate_all(s, train_queries)) temp_ndcg_evaluation_test.append(evaluation.evaluate_all(s, test_queries)) # calculate average ndcg for all evaluation # rem_ndcg_result[n][idx] = sum(temp_ndcg_evaluation_test) / float(len(temp_ndcg_evaluation_test)) # rem_ndcg_result[n][idx] = temp_ndcg_evaluation_test timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S') f = open("../../output/experiment1/" + timestamp + "k_" + str(k) + "d_" + str(d) + "_train.txt", "w") f.write("%s" % str(temp_ndcg_evaluation_train) + "\n") f.close() f2 = open("../../output/experiment1/" + timestamp + "k_" + str(k) + "d_" + str(d) + "_test.txt", "w") f2.write("%s" % str(temp_ndcg_evaluation_test) + "\n") f2.close() # write the result to file
import sys import include import evaluation, query, ranker import numpy as np from ranker.AbstractRankingFunction import AbstractRankingFunction evaluation = evaluation.NdcgEval() bm25ranker = AbstractRankingFunction(["ranker.model.BM25"], 'first', 3, sample="utils.sample_fixed") queries = query.load_queries(sys.argv[1], 64) #print evaluation.evaluate_all(bm25ranker, queries) fh = open(sys.argv[1] + ".out.missing-b0.45.txt", "w") for k1 in sorted([2.6, 2.5]): for b in sorted([0.45]): #for k1 in np.arange(19.5, 100, 0.5): # for b in np.arange(-1, 1.2, 0.1): #for k3 in np.arange(100*itt, 100*(itt+1), 10): k3 = 0.0 bm25ranker.update_weights(np.array([k1, k3, b])) print >> fh, "k1:%f k3:%f b:%f score:%f" % ( k1, k3, b, evaluation.evaluate_all(bm25ranker, queries)) fh.close()
query_samples = 10 # how many queries we sample d_array = [3,4,5,6] # init user model, evaluation methods user_model = environment.CascadeUserModel('--p_click 0:0.0,1:1 --p_stop 0:0.0,1:0.0') evaluation = evaluation.NdcgEval() # calculate using the full 64 dimensions full_learner = retrieval_system.ListwiseLearningSystem(64,'-w random -c comparison.ProbabilisticInterleave -r ranker.ProbabilisticRankingFunction -s 3 -d 0.1 -a 0.01') full_ndcg_result = [] for i in range(0,query_samples): q = queries[random.choice(queries.keys())] l = full_learner.get_ranked_list(q) c = user_model.get_clicks(l, q.get_labels()) s = full_learner.update_solution(c) full_ndcg_result.append( evaluation.evaluate_all(s, queries) ) full_ranker = full_learner.get_solution() # calculate using the lower dimensional slice(s) in d_array rem_ndcg_result = [[0 for i in range(len(d_array))] for j in range(query_samples)] rem_ranker = [] for idx in range(0,len(d_array)): d = d_array[idx] rem_learner = retrieval_system.ListwiseLearningSystemREMBO(64,d,'-w random -c comparison.ProbabilisticInterleave -r ranker.ProbabilisticRankingFunctionREMBO -s 3 -d 0.1 -a 0.01') for i in range(0,query_samples): q = queries[random.choice(queries.keys())] l = rem_learner.get_ranked_list(q) c = user_model.get_clicks(l, q.get_labels()) s = rem_learner.update_solution(c) rem_ndcg_result[i][idx] = evaluation.evaluate_all(s, queries) rem_ranker.append( rem_learner.get_solution() )
import sys import include import evaluation, query, ranker import numpy as np from ranker.AbstractRankingFunction import AbstractRankingFunction w = [0]*64 w[int(sys.argv[2])] = 1 wstr = ",".join([str(x) for x in w]) evaluation = evaluation.NdcgEval() bm25ranker = AbstractRankingFunction(["ranker.model.Linear"], 'first', 64, init=wstr, sample="utils.sample_fixed") queries = query.load_queries(sys.argv[1], 64) print evaluation.evaluate_all(bm25ranker, queries)
player1.plotter.clear_plots(experiment_name) for player in (player1, player2): player.plotter.plot_results(experiment_name) player.plotter.plot_scores(experiment_name) player.save(experiment_name) if __name__ == "__main__": """ This script is run in order to test if all available ValueFunctions can be trained as expected """ strategies = [vF.LargeValueFunction] for strategy in strategies: """ Parameters """ player1 = TDPlayer(color=config.BLACK, strategy=strategy, lr=0.1) player2 = HeuristicPlayer(color=config.WHITE, strategy=vF.NoValueFunction) """ Continue training """ # player1 = config.load_player("TDPlayer_Black_ValueFunction|TDvsMC|") # player2 = config.load_player("MCPlayer_White_ValueFunction|TDvsMC|") TOTAL_GAMES = 200000 EVALUATION_PERIOD = TOTAL_GAMES // 4 """ Execution """ start = datetime.now() print("Experiment name: %s" % EXPERIMENT_NAME) print("Training %s VS %s" % (player1.player_name, player2.player_name)) evaluation.evaluate_all([player1, player2], 8) train_and_evaluate(player1, player2, TOTAL_GAMES, EVALUATION_PERIOD) print("Training completed, took %s\n" % (datetime.now() - start))
for m in range(0, k): # for each k, we have different A matrix # as mentioned on the REMBO paper rem_learner = retrieval_system.ListwiseLearningSystemREMBO( 64, d, '-w random -c comparison.ProbabilisticInterleave -r ranker.ProbabilisticRankingFunctionREMBO -s 3 -d 0.1 -a 0.01' ) # start evaluation for i in range(0, number_of_evaluation): q = train_queries[random.choice(train_queries.keys())] l = rem_learner.get_ranked_list(q) c = user_model.get_clicks(l, q.get_labels()) s = rem_learner.update_solution(c) temp_ndcg_evaluation_train.append( evaluation.evaluate_all(s, train_queries)) temp_ndcg_evaluation_test.append( evaluation.evaluate_all(s, test_queries)) timestamp = datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S') f = open( "../../output/experiment1/" + timestamp + "k_" + str(k) + "d_" + str(d) + "_train.txt", "w") f.write("%s" % str(temp_ndcg_evaluation_train) + "\n") f.close() f2 = open( "../../output/experiment1/" + timestamp + "k_" + str(k) + "d_" + str(d) + "_test.txt", "w") f2.write("%s" % str(temp_ndcg_evaluation_test) + "\n") f2.close()
full_learner = retrieval_system.ListwiseLearningSystem(64,'-w random -c comparison.ProbabilisticInterleave -r ranker.ProbabilisticRankingFunction -s 3 -d 0.1 -a 0.01') # start evaluation for i in range(0,number_of_evaluation): q = train_queries[random.choice(train_queries.keys())] l_rem = rem_learner.get_ranked_list(q) l_full = full_learner.get_ranked_list(q) c_rem = user_model.get_clicks(l_rem, q.get_labels()) c_full = user_model.get_clicks(l_full, q.get_labels()) s_rem = rem_learner.update_solution(c_rem) s_full = full_learner.update_solution(c_full) rem_ndcg_evaluation_train.append(evaluation.evaluate_all(s_rem, train_queries)) full_ndcg_evaluation_train.append(evaluation.evaluate_all(s_full, train_queries)) rem_ndcg_evaluation_test.append(evaluation.evaluate_all(s_rem, test_queries)) full_ndcg_evaluation_test.append(evaluation.evaluate_all(s_full, test_queries)) # write the result to file timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S') f = open("../../output/experiment2/" + timestamp + "k_" + str(k) + "d_" + str(d) + "rem_train.txt", "w") f.write("%s" % str(rem_ndcg_evaluation_train) + "\n") f.close() f = open("../../output/experiment2/" + timestamp + "k_" + str(k) + "d_" + str(d) + "full_train.txt", "w") f.write("%s" % str(full_ndcg_evaluation_train) + "\n") f.close() f = open("../../output/experiment2/" + timestamp + "k_" + str(k) + "d_" + str(d) + "rem_test.txt", "w")
feature_count = 64 learner = retrieval_system.ListwiseLearningSystem( feature_count, '-w random -c comparison.ProbabilisticInterleave -r ranker.ProbabilisticRankingFunction -s 3 -d 0.1 -a 0.01' ) user_model = environment.CascadeUserModel( '--p_click 0:0.0,1:1 --p_stop 0:0.0,1:0.0') evaluation = evaluation.NdcgEval() training_queries = query.load_queries(sys.argv[1], feature_count) query_freq = {} for train in training_queries: if (len(train.__labels__) in query_freq): query_freq[len( train.__labels__)] = query_freq[len(train.__labels__)] + 1 else: query_freq[len(train.__labels__)] = 1 print query_freq test_queries = query.load_queries(sys.argv[2], feature_count) for i in range(20): q = training_queries[random.choice(training_queries.keys())] l = learner.get_ranked_list(q) c = user_model.get_clicks(l, q.get_labels()) s = learner.update_solution(c) print evaluation.evaluate_all(s, test_queries) print s.w rankerDict.add(q, s.w) pickle.dump(rankerDict, open("save.p", "wb")) test = pickle.load(open("save.p", "rb")) print test.query_ranker