def run_different_parameter(thread_id, gamma): figure_name = ('gamma' + str(gamma)).replace('.', 'dot') bandit_env = StochasticEnvironment(k, sigma_noise) policies = [EXP3('EXP3', k, gamma, 'black')] simulator = Simulator(bandit_env, policies, time_horizon) regret_dict = simulator.run(num_thread, num_mc) plot_regret(figure_name, policies, regret_dict)
def parameter_multiprocessing_run(thread_id, sigma_noise): figure_name = ('sigma_noise' + str(sigma_noise)).replace('.', 'dot') bandit_env = Environment(k, d, sigma_noise) policies = [LinUCB('LinUCB', k, d, lambda_, arm_norm_bound, theta_norm_bound, delta, sigma_noise, 'black')] simulator = Simulator(bandit_env, policies, time_horizon) regret_dict = simulator.multiprocessing_run(num_thread, num_mc) plot_regret(figure_name, policies, regret_dict, jupyter_notebook)
def solo_processing(): # solo_processing option = 'solo_processing' sigma_noise = 0.1 figure_name = ('ucb_convergence_verification_sigma' + str(sigma_noise)).replace('.', 'dot') policies = [UCB(sigma_noise, k, delta)] avg_regret = run_experiment(num_mc, option) plot_regret(figure_name, line_name, avg_regret, jupyter_notebook)
def multi_processing(): global num_thread, sigma_noise, policies # multi_processing option = 'multi_processing' num_thread = 10 sigma_noise = 0.1 figure_name = ('ucb_convergence_verification_sigma' + str(sigma_noise)).replace('.', 'dot') policies = [UCB(sigma_noise, k, delta)] avg_regret = run_experiment(num_mc, option) plot_regret(figure_name, line_name, avg_regret, jupyter_notebook)
def run_model(): data, true_labels = ldl.get_data_linear() true_buckets = [util.bucket(t) for t in true_labels] data = np.tile(data, (DATA_MULTIPLIER, 1)) print("DATA SHAPE:", data.shape) true_buckets = np.tile(true_buckets, DATA_MULTIPLIER) # tuples of (batch_id, total regret, error while training, eval error, precision, recall) batch_results = [] for T in range(NUM_BATCHES): model = Lin_UCB(ALPHA) #model = LASSO_BANDIT() if False: data, true_labels, columns_dict, values_dict = dl.get_data() true_buckets = [util.bucket(t) for t in true_labels] #model = Fixed_Dose(columns_dict, values_dict) #model = Warfarin_Clinical_Dose(columns_dict, values_dict) #model = Warfarin_Pharmacogenetic_Dose(columns_dict, values_dict) batch_id = str(random.randint(100000, 999999)) print() print("Start Batch: ", batch_id) zipped_data = list(zip(data, true_buckets)) random.shuffle(zipped_data) data, true_buckets = zip(*zipped_data) data = np.array(data) model.train(data, true_buckets) pred_buckets = model.evaluate(data) print(batch_id, "Performance on " + str(model)) acc, precision, recall = util.evaluate_performance( pred_buckets, true_buckets) print("\tAccuracy:", acc) print("\tPrecision:", precision) print("\tRecall:", recall) plot_regret(model.regret, ALPHA, batch_id) plot_error_rate(model.error_rate, ALPHA, batch_id) batch_results.append( (batch_id, model.get_regret()[-1], model.get_error_rate()[-1], 1 - acc, precision, recall)) with open('batch/regret' + str(model) + batch_id, 'wb') as fp: pickle.dump(model.regret, fp) with open('batch/error' + str(model) + batch_id, 'wb') as fp: pickle.dump(model.error_rate, fp) return batch_results
tmp = features.T.dot(self.A_inv[i]).dot(features) p[i] = self.theta[i].dot(features) + self.alpha * np.sqrt(tmp) choose_action = np.argmax(p) return choose_action def test_lin_ucb_full(data, true_buckets, alpha=0.1): lin_ucb = Lin_UCB(alpha=alpha) lin_ucb.train(data, true_buckets) pred_buckets = lin_ucb.evaluate(data) acc, precision, recall = util.evaluate_performance(pred_buckets, true_buckets) #print("accuracy on linear UCB: " + str(acc)) if __name__ == '__main__': data, true_labels = ldl.get_data_linear() true_buckets = [util.bucket(t) for t in true_labels] ALPHA = 0.1 lin_ucb = Lin_UCB(alpha=ALPHA) lin_ucb.train(data, true_buckets) pred_buckets = lin_ucb.evaluate(data) acc, precision, recall = util.evaluate_performance(pred_buckets, true_buckets) #print("accuracy on linear UCB: " + str(acc)) plot_regret(lin_ucb.regret, ALPHA) plot_error_rate(lin_ucb.error_rate, ALPHA)