def createBanditInstancesAndSimulate(params,n_mc_sim): n_sim = n_mc_sim for i in range(len(params)): results = [] time_horizon = params[i]['time_horizon'] number_of_arms = params[i]['number_of_arms'] number_of_exploration_per_arm = params[i]['number_of_exploration_per_arm'] exp_agent = ExploreThenExploit(time_horizon,number_of_arms,number_of_exploration_per_arm) epsilon_greedy_constant_half_epsilonAgent = EpsilonGreedy(time_horizon,number_of_arms,[1/2]*time_horizon) epsilon_greedy_constant_epsilonAgent = EpsilonGreedy(time_horizon,number_of_arms,[number_of_exploration_per_arm*number_of_arms/time_horizon]*time_horizon) ubc_agent = UBC1Agent(time_horizon,number_of_arms) se_agent = SuccessiveEliminationAgent(time_horizon,number_of_arms) random_agent = Agent() bandit = Bandit(time_horizon,number_of_arms,random_agent) results.append(mc_simulate(n_sim,bandit)) bandit = Bandit(time_horizon,number_of_arms,exp_agent) results.append(mc_simulate(n_sim,bandit)) bandit = Bandit(time_horizon,number_of_arms,epsilon_greedy_constant_epsilonAgent) results.append(mc_simulate(n_sim,bandit,"constant-epsilon=rate-of-explore-exploit")) bandit = Bandit(time_horizon,number_of_arms,epsilon_greedy_constant_half_epsilonAgent) results.append(mc_simulate(n_sim,bandit,"constant-epsilon=0.5")) bandit = Bandit(time_horizon,number_of_arms,ubc_agent) results.append(mc_simulate(n_sim,bandit)) bandit = Bandit(time_horizon,number_of_arms,se_agent) results.append(mc_simulate(n_sim,bandit)) plot(results,time_horizon,params[i])
def experiment2(): params = [{ "time_horizon" : 500, "number_of_arms" : 5 }, { "time_horizon" : 5000, "number_of_arms" : 5 }, { "time_horizon" : 500, "number_of_arms" : 10 }, { "time_horizon" : 5000, "number_of_arms" : 10 }, { "time_horizon" : 500, "number_of_arms" : 20 }, { "time_horizon" : 5000, "number_of_arms" : 20 }, ] for i in range(len(params)): results = [] time_horizon = params[i]["time_horizon"] number_of_arms = params[i]["number_of_arms"] agent1 = agentFactory("random",time_horizon,number_of_arms) epsilons = [] for j in range(time_horizon): epsilons.append(math.pow((j+1)*number_of_arms*math.log(j+1),1/3)) agent2 = agentFactory("epsilon-greedy",time_horizon,number_of_arms,epsilons) agent3 = agentFactory("explore-then-exploit",time_horizon,number_of_arms,time_horizon/100) agent4 = agentFactory("ucb1",time_horizon,number_of_arms) agent5 = agentFactory("successive-elimination",time_horizon,number_of_arms) bandit = Bandit(time_horizon,number_of_arms,agent1) results.append(mc_simulate(n_sim,bandit)) bandit = Bandit(time_horizon,number_of_arms,agent2) results.append(mc_simulate(n_sim,bandit)) bandit = Bandit(time_horizon,number_of_arms,agent3) results.append(mc_simulate(n_sim,bandit,"N=T/100")) bandit = Bandit(time_horizon,number_of_arms,agent4) results.append(mc_simulate(n_sim,bandit)) bandit = Bandit(time_horizon,number_of_arms,agent5) results.append(mc_simulate(n_sim,bandit)) plot(results,time_horizon,params[i])
def experiment1(): params = [{ "time_horizon" : 1000, "number_of_arms" : 5 }, { "time_horizon" : 10000, "number_of_arms" : 5 }, { "time_horizon" : 1000, "number_of_arms" : 10 }, { "time_horizon" : 10000, "number_of_arms" : 10 }, { "time_horizon" : 1000, "number_of_arms" : 20 }, { "time_horizon" : 10000, "number_of_arms" : 20 }, ] for i in range(len(params)): results = [] time_horizon = params[i]["time_horizon"] number_of_arms = params[i]["number_of_arms"] agent1 = agentFactory("explore-then-exploit",time_horizon,number_of_arms,5) agent2 = agentFactory("explore-then-exploit",time_horizon,number_of_arms,time_horizon/10) agent3 = agentFactory("explore-then-exploit",time_horizon,number_of_arms,time_horizon/100) bandit = Bandit(time_horizon,number_of_arms,agent1) results.append(mc_simulate(n_sim,bandit,"N=5")) bandit = Bandit(time_horizon,number_of_arms,agent2) results.append(mc_simulate(n_sim,bandit,"N=T/10")) bandit = Bandit(time_horizon,number_of_arms,agent3) results.append(mc_simulate(n_sim,bandit,"N=T/100")) plot(results,time_horizon,params[i])
def create_evaluation_report( results, k=5, setups=None, score='tra_score', show_plots=False, lower_is_better=True): drop = list(set(results.keys()).intersection(set(nonrelevant_keys))) results = results.drop(columns=drop) best_results = [] # show average only if there is more than one sample if len(np.unique(results['sample'])) > 1: best_results.append(find_best_average(results, k=k, score=score, lower_is_better=lower_is_better)) best_results.append(find_best(results, k=k, score=score, lower_is_better=lower_is_better)) print("Evaluated setups: " + str(np.unique(results['setup']))) for best in best_results: samples = np.unique(best['sample']) configuration_keys = list(set(results.keys()) - set(metric_keys) - set(nonrelevant_keys)) print("%d best configurations:"%k) for sample in samples: sample_best = best[best['sample']==sample] first_table_keys = [ 'sample', 'setup', 'iteration', 'merge_function', 'threshold', 'seg_score', 'tra_score'] table_keys = first_table_keys + [ key for key in results.keys() if key not in first_table_keys and key not in nonrelevant_keys ] table_frame = sample_best.loc[:,table_keys] if setups is not None and len(setups) > 0: table_frame = pandas.merge(table_frame, setups, how='left', on='setup') report.render_table(table_frame) if show_plots: groups = [ { 'sample': sample, } ] figures = [ {'x_axis': 'threshold', 'y_axis': 'seg_score', 'title': 'SEG'}, {'x_axis': 'threshold', 'y_axis': 'tra_score', 'title': 'TRA'}, ] configurations = [ # for all thresholds dict( { c: curate_value(row[1][c]) for c in configuration_keys }, **{'style':'line'} ) for row in sample_best.iterrows() ] + [ # for best threshold only { c: curate_value(row[1][c]) for c in configuration_keys + ['threshold'] } for row in sample_best.iterrows() ] report.plot(groups, figures, configurations, results.sort_values(by='threshold'))