def optimistic_initial_values(runs=2000, time=1000): bandits = [] bandits.append(Bandit(epsilon=0, initial=5, step_size=0.1)) bandits.append(Bandit(epsilon=0.1, initial=0, step_size=0.1)) best_action_counts, _ = simulate(runs, time, bandits) plt.plot(best_action_counts[0], label='epsilon = 0, q = 5') plt.plot(best_action_counts[1], label='epsilon = 0.1, q = 0') plt.xlabel('Steps') plt.ylabel('% optimal action') plt.legend() plt.savefig('../images/figure_2_3.png') plt.close()
def UCB(runs=2000, time=1000): bandits = [] bandits.append(Bandit(epsilon=0, UCB_param=2, sample_averages=True)) bandits.append(Bandit(epsilon=0.1, sample_averages=True)) _, average_rewards = simulate(runs, time, bandits) plt.plot(average_rewards[0], label='UCB c = 2') plt.plot(average_rewards[1], label='epsilon greedy epsilon = 0.1') plt.xlabel('Steps') plt.ylabel('Average reward') plt.legend() plt.savefig('../images/figure_2_4.png') plt.close()
def init_bandits(reward_interval_lists): # init six bandits with their rewards. bandit_list = [] for i in range(len(reward_interval_lists)): b = Bandit(i, reward_interval_lists[i]) bandit_list.append(b) return bandit_list
def run_experiment(true_means, N, upper_limit): bandits = [] # pdb.set_trace() for tm in true_means: bandits.extend([Bandit(tm, upper_limit)]) data = np.empty(N, dtype=np.float16) for n in range(N): i = np.argmax([b.est_mean for b in bandits]) sample = bandits[i].pull() data[n] = sample bandits[i].update(sample) pass mean_winning = np.cumsum(data) / np.arange(1, N + 1) plt.figure() plt.plot(mean_winning) plt.title('Mean Winnings') colors = ['orange', 'blue', 'green'] for b, c in zip(bandits, colors): # plt.plot(np.arange(1,N+1),b.est_mean*np.ones((N,))) plt.fill_between(np.arange(1, N + 1), b.true_mean, b.est_mean * np.ones((N, )), color=c) plt.annotate(str(b.true_mean), xy=(N - 5, b.true_mean)) print b.est_mean, b.true_mean
def __init__(self, Lx, Ly, Ux, Uy, Tx, Ty, method = "svm", budget = 1000): self.Lx0 = Lx[:] self.Ly0 = Ly[:] self.Lx = Lx self.Ly = Ly self.Ux = Ux # TODO should not be here self.Uy = Uy # TODO should not be here self.Tx = Tx # TODO should not be here self.Ty = Ty # TODO should not be here self.th = 0.9 self.queried = 0 self.queries = [] self.ths = [] self.infos = [] self.accuracys = [] self.clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy ); self.clf.train() self.sup_infos = [] # TODO should not be here self.sup_accuracys = [] # TODO should not be here self.sup_clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy ); self.sup_clf.train() # TODO should not be here # self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "UCB", alpha = 1 ) self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "reinforcement", alpha = 1 )
def gradient_bandit(runs=2000, time=1000): bandits = [] bandits.append(Bandit(gradient=True, step_size=0.1, gradient_baseline=True, true_reward=4)) bandits.append(Bandit(gradient=True, step_size=0.1, gradient_baseline=False, true_reward=4)) bandits.append(Bandit(gradient=True, step_size=0.4, gradient_baseline=True, true_reward=4)) bandits.append(Bandit(gradient=True, step_size=0.4, gradient_baseline=False, true_reward=4)) best_action_counts, _ = simulate(runs, time, bandits) labels = ['alpha = 0.1, with baseline', 'alpha = 0.1, without baseline', 'alpha = 0.4, with baseline', 'alpha = 0.4, without baseline'] for i in range(0, len(bandits)): plt.plot(best_action_counts[i], label=labels[i]) plt.xlabel('Steps') plt.ylabel('% Optimal action') plt.legend() plt.savefig('../images/figure_2_5.png') plt.close()
def __init__(self, Lx, Ly, Ux, Uy, Tx, Ty, method = "svm", budget = 251, optimize = 50, datasetname="dataset"): self.datasetname = datasetname self.Lx = Lx self.Ly = Ly self.Ux = Ux self.Uy = Uy # TODO should not be here self.Tx = Tx # TODO should not be here self.Ty = Ty # TODO should not be here self.optimize = optimize self.budget = budget self.accuracys = [] self.clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy ) self.clf.train() self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "boltzmann" ) # self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "UCB" ) # self.mab2 = Bandit( algos = ["disag1", "disag2"], method = "boltzmann" ) self.mab2 = Bandit( algos = ["disag1", "disag2"], method = "reinforcement" )
def main(): if len(sys.argv) < 3: print_usage() exit() num_pulls = int(sys.argv[1]) goal = int(sys.argv[2]) arms = [Arm(arm.split(",")) for arm in sys.argv[3:]] bandit = Bandit(arms) alg = IncrementalUniformBandit(num_pulls, bandit) print "NOTE: cumulative regret is incorrect right now" print "uniform simple regret", alg.get_simple_regret() print "uniform cumulative regret", alg.get_cum_regret() print "uniform best arm", alg.get_best_arm() alg = UCBBandit(num_pulls, bandit) print "ucb simple regret", alg.get_simple_regret() print "ucb cumulative regret", alg.get_cum_regret() print "ucb best arm", alg.get_best_arm()
def run_experiment(true_means, N, eps): """ Making a function so that it can be run repeatedly with different settings """ bandits = [] for tm in true_means: bandits.extend([Bandit(tm)]) data = np.empty(N) est_means = [b.est_mean for b in bandits] for pix in range(N): p = np.random.random() if p < eps: # if you fall in the explore range j = np.random.choice( 3) # choose 1 out of 3 slot machines , note choosing equally, # and not excluding our 'best' else: j = np.argmax(est_means) pull_result = bandits[j].pull() bandits[j].update(pull_result) data[pix] = pull_result cumulative_average = np.cumsum(data) / ( np.arange(N) + 1) # see how the average reward has fluctuated plt.figure() plt.plot(cumulative_average) for tm in true_means: plt.plot(np.ones(N) * tm, c='orange') plt.annotate(str(tm), xy=(0, tm)) for b in bandits: plt.plot(np.ones(N) * b.est_mean, c='blue') plt.annotate(str(b.est_mean), xy=(N - 1, b.est_mean)) # plt.xscale('log') plt.title('eps=' + str(eps)) plt.show() return cumulative_average # will serve as a test of how quickly you get to the optimal
def greedy(runs=2000, time=1000): epsilons = [0, 0.1, 0.01] bandits = [Bandit(epsilon=eps, sample_averages=True) for eps in epsilons] best_action_counts, rewards = simulate(runs, time, bandits) plt.figure(figsize=(10, 20)) plt.subplot(2, 1, 1) for eps, rewards in zip(epsilons, rewards): plt.plot(rewards, label='epsilon = %.02f' % (eps)) plt.xlabel('steps') plt.ylabel('average reward') plt.legend() plt.subplot(2, 1, 2) for eps, counts in zip(epsilons, best_action_counts): plt.plot(counts, label='epsilon = %.02f' % (eps)) plt.xlabel('steps') plt.ylabel('% optimal action') plt.legend() plt.savefig('../images/figure_2_2.png') plt.close()
def encounter(self): encounterChance = random.randint(0, 1) if encounterChance == 1: if self.difficulty == "Easy": chance = random.randint(1, 6) if chance == 1: randAttacker = random.randint(0, 1) if randAttacker == 0: if len(self.player.ship.inventory) > 0: self.npc = Police() else: self.npc = Bandit(self.difficulty) else: self.npc = Bandit(self.difficulty) else: self.npc = Trader(self.curr_region) if self.difficulty == "Medium": chance = random.randint(1, 6) if chance > 1 and chance < 4: randAttacker = random.randint(0, 1) if randAttacker == 0: if len(self.player.ship.inventory) > 0: self.npc = Police() else: self.npc = Bandit(self.difficulty) else: self.npc = Bandit(self.difficulty) else: self.npc = Trader(self.curr_region) if self.difficulty == "Hard": chance = random.randint(3, 6) if chance > 3: randAttacker = 0 if randAttacker == 0: if len(self.player.ship.inventory) > 0: self.npc = Police() else: self.npc = Bandit(self.difficulty) else: self.npc = Bandit(self.difficulty) else: self.npc = Trader(self.curr_region) else: self.npc = None
import matplotlib.pyplot as plt from Bandit import Bandit from Player import Player from EpsilonGreedyStrategy import EpsilonGreedyStrategy from UCBStrategy import UCBStrategy import random bandit1 = Bandit(10) playerA = UCBStrategy(bandit1, "A") # From perspective of some player def plotRegret(history): time = [i for i in range(3000)] plt.plot(history, time) def run_simulation(max_time, player, bandit): curr_time = 0 while (curr_time < max_time): player.playArm(curr_time) curr_time += 1 print("Player estimates:{}".format(player.estimated_values)) print("True bandit:{}".format(bandit.arms)) print("Player's reward: {}".format(player.reward)) plotRegret(player.cum_regret_history)
from Bandit import Bandit from Agent import Agent import numpy as np import matplotlib.pyplot as plt import pickle #initialise variables no_of_iterations = 2000 no_of_time_steps = 1000 all_rewards = np.zeros((no_of_time_steps, no_of_iterations)) #learn for i in np.arange(no_of_iterations): bandit = Bandit(10) agent = Agent(bandit, no_of_time_steps, 0.1, 0) agent.learn() all_rewards[:, i] = agent.rewards_per_time # print(bandit.action_values) # print(agent.action_selection_counts) # print(agent.action_value_estimate) #plot plt.plot(np.arange(0, 1000) + 1, np.mean(all_rewards, axis=1)) plt.show() #save to file pickle.dump(all_rewards, open("stationary_rewards_eps0.1.pkl", "wb")) #all_rewards = pickle.load(open("save.pkl", "rb"))
class ActiveLearning: def __init__(self, Lx, Ly, Ux, Uy, Tx, Ty, method = "svm", budget = 251, optimize = 50, datasetname="dataset"): self.datasetname = datasetname self.Lx = Lx self.Ly = Ly self.Ux = Ux self.Uy = Uy # TODO should not be here self.Tx = Tx # TODO should not be here self.Ty = Ty # TODO should not be here self.optimize = optimize self.budget = budget self.accuracys = [] self.clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy ) self.clf.train() self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "boltzmann" ) # self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "UCB" ) # self.mab2 = Bandit( algos = ["disag1", "disag2"], method = "boltzmann" ) self.mab2 = Bandit( algos = ["disag1", "disag2"], method = "reinforcement" ) # self.mab2 = Bandit( algos = ["disag1", "disag2"], method = "EXP3" ) #--------------------------------------- def train(self, mtd = "margin", backupfile = "backupfile.txt"): # TODO implement sample_weight + make method to shuffle and return sublist with data_limit for i in range(self.budget): if len(self.Ux) <= 1: break if mtd == "margin": ids, scores = self.query_margin() if mtd == "proba": ids, scores = self.query_proba() if mtd == "entropy": ids, scores = self.query_entropy() if mtd == "random": ids, scores = self.query_random() if mtd == "weight": ids, scores = self.query_sufficient_weight() if mtd == "eer": ids, scores = self.query_eer() if mtd == "dist": ids, scores = self.query_sufficient_distance() if mtd == "disag1": ids, scores = self.query_disagreement1() if mtd == "disag2": ids, scores = self.query_disagreement2() if mtd == "disag3": ids, scores = self.query_disagreement3() if mtd == "balance": ids, scores = self.query_balance() if mtd == "balanced_disag1": ids, scores = self.query_balanced_disag1() if mtd == "balanced_disag2": ids, scores = self.query_balanced_disag2() if mtd == "disag1_balanced": ids, scores = self.query_disag1_balanced() if mtd == "disag2_balanced": ids, scores = self.query_disag2_balanced() if mtd == "exp": ids, scores = self.query_explote_explore() if mtd == "test": ids, scores = self.query_disagreement_test() id = ids[0] qx = self.Ux[id] qy = self.Uy[id] self.Lx.append(qx) self.Ly.append(qy) self.Ux.pop(id) self.Uy.pop(id) self.clf.X = self.Lx; self.clf.Y = self.Ly self.clf.train() test_accuracy = self.clf.getTestAccuracy( self.Tx, self.Ty ) self.accuracys.append( test_accuracy ) print "i=", i+1, "-- acc=%.4f"%(test_accuracy*100), "-- %.4f"%(np.mean(self.accuracys)*100), "%.4f"%(np.average(self.accuracys, weights = range(1,1+len(self.accuracys)))*100), "--", scores[0], scores[1] if (i+1)%10 == 0: Util.pickleSave(backupfile, self) # viz = Visualize() # viz.plot( [range(len(self.accuracys)), self.accuracys], fig = backupfile+".png", color = 'r', marker = '-' ) ''' colors = ['r', 'b', 'g', 'k', 'm', 'c', '0.10', '0.35', '0.60', '0.90'] viz.plot( zip(*self.Lx+self.Ux), fig = backupfile+"__.png", color = [colors[int(l)] for l in self.Ly+self.Uy], marker = 'o' ) viz.do_plot( zip(*self.Ux), color = ['y']*len(self.Ux), marker = '.' ) viz.do_plot( zip(*self.Lx), color = [colors[int(l)] for l in self.Ly], marker = 'o' ) viz.end_plot( fig = backupfile+"_.png" ) ''' #--------------------------------------- def sort_scores(self, scores): if sum(scores) == 0.: scores = [ self.clf.uncertainty_margin(x) for x in self.Ux ] ids = (-np.array(scores)).argsort() sorted_scores = [ scores[id] for id in ids ] return ids, sorted_scores #--------------------------------------- def query_margin(self): return self.sort_scores( [ self.clf.uncertainty_margin(x) for x in self.Ux ] ) #--------------------------------------- def query_proba(self): return self.sort_scores( [ self.clf.uncertainty_prediction(x) for x in self.Ux ] ) #--------------------------------------- def query_entropy(self): return self.sort_scores( [ self.clf.uncertainty_entropy(x) for x in self.Ux ] ) #--------------------------------------- def query_random(self): return self.sort_scores( [ random.uniform(0., 1.) for x in self.Ux ] ) #--------------------------------------- def query_sufficient_weight(self): ids, _ = self.query_margin() return self.sort_scores( [ self.clf.uncertainty_weight(x, self.Lx, self.Ly) if ix in ids[:self.optimize] else 0. for ix, x in enumerate(self.Ux) ] ) #--------------------------------------- def query_eer(self, limit_Y = 20): ids, _ = self.query_margin() scores = [] for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize]: YP = self.clf.predict(x, all = True) YP.sort(key=operator.itemgetter(1), reverse=True) sums = 0. for ir, (yy, proba) in enumerate(YP): if ir == limit_Y: break temp_clf = Classification(self.Lx + [x], self.Ly + [yy], method = self.clf.method); temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C # TODO FIXME: do it in general not specifically for svm temp_clf.train() e_h1 = sum( [ temp_clf.uncertainty_entropy(dp) for dp in self.Ux if dp != x ] ) sums += (proba) * e_h1 informativeness = 1. / sums else: informativeness = 0. scores.append( informativeness ) return self.sort_scores(scores) #--------------------------------------- def query_sufficient_distance(self): ids, _ = self.query_margin() scores = [] for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize]: y1, y2, p1, p2 = self.clf.getMarginInfo(x) C = [dp for idp, dp in enumerate(self.Lx) if self.Ly[idp] == y2 ] CDx = [Util.dist(dp, x) for idp, dp in enumerate(self.Lx) if self.Ly[idp] == y2 ] idsC = (np.array(CDx)).argsort(); xx = Util.medoid( [ C[idp] for idp in idsC[:1] ] ) step = 0.01; lower = 0.; upper = 1. while (upper - lower > step): w = (upper + lower) / 2. px = np.array(x) + w * ( np.array(xx) - np.array(x) ) if self.clf.predict_label(px) != y1: upper = w else: lower = w informativeness = 1. - w else: informativeness = 0. scores.append( informativeness ) return self.sort_scores(scores) #--------------------------------------- def query_disagreement1(self, weighted = False, op = 1): ids, _ = self.query_margin() scores = [] for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize*op]: # true_y = self.Uy[ix] true_y = self.clf.predict_label(x) temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() if not weighted: diff = sum([ 1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) else: diff = sum([ abs(temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) # diff = sum([ Util.dist(temp_clf.h.predict_proba(dp)[0], self.clf.h.predict_proba(dp)[0]) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) informativeness = diff else: informativeness = 0. scores.append( informativeness ) return self.sort_scores(scores) #--------------------------------------- def query_disagreement2(self, weighted = False, op = 1): ids, _ = self.query_margin() scores = [] commitee = [] for idp, dp in enumerate(self.Ux): if idp in ids[:self.optimize*op]: # true_y = self.Uy[idp] true_y = self.clf.predict_label(dp) temp_clf = Classification(self.Lx + [dp], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() commitee.append( (temp_clf, 1) ) for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize*op]: preds = Counter() if weighted: # weight using proba distrib of commitee for (clf,_) in commitee: if self.clf.predict_label(x) != clf.predict_label(x): YP = zip( clf.h.classes_, clf.h.predict_proba( x )[0] ) for (y,p) in YP: preds[y] += p preds = preds.most_common() diff = 0. if preds == [] else preds[0][1] # diff = 0. if preds == [] else ( preds[0][1] - preds[1][1] if len(preds)>1 else preds[0][1] ) else: # confis = [ clf.getPredictProba(1,x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ] labels = [ clf.predict_label(x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ] preds = Counter(labels) preds = preds.most_common() diff = 0. if preds == [] else sum( [pred[1] for pred in preds] ) informativeness = diff else: informativeness = 0. scores.append( informativeness ) return self.sort_scores(scores) #--------------------------------------- def query_disagreement3(self): id_algo = self.mab2.choose() algo = self.mab2.algos[ id_algo ] print "Choosen =", algo, "nb_choices =", self.mab2.nb_choices, "mean rew=", [ np.mean(L) for L in self.mab2.rewards ] if algo == "disag1": ids, scores = self.query_disagreement1(weighted = True) if algo == "disag2": ids, scores = self.query_disagreement2() reward = self.get_change( self.Ux[ids[0]], self.Uy[ids[0]] ) self.mab2.update(id_algo, reward) return ids, scores #--------------------------------------- def query_balanced_disag1(self, weighted = True, op=1): ids, _ = self.query_margin() scores = [] scores_B = [] for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize*op]: # true_y = self.Uy[ix] true_y = self.clf.predict_label(x) temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() if not weighted: diff = sum([ 1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) else: diff = sum([ abs(temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) # diff = sum([ Util.dist(temp_clf.h.predict_proba(dp)[0], self.clf.h.predict_proba(dp)[0]) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) balance = self.get_balance(x) informativeness = diff else: informativeness = 0. balance = 0. scores.append( informativeness ) scores_B.append( balance ) # scores_B = Util.normalize(scores_B) scores = [scr*scores_B[iscr] for iscr,scr in enumerate(scores)] return self.sort_scores(scores) #--------------------------------------- def query_balanced_disag2(self, weighted = True, op=1): ids, _ = self.query_margin() scores = [] scores_B = [] commitee = [] for idp, dp in enumerate(self.Ux): if idp in ids[:self.optimize*op]: # true_y = self.Uy[idp] true_y = self.clf.predict_label(dp) temp_clf = Classification(self.Lx + [dp], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() commitee.append( (temp_clf, 1) ) for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize*op]: preds = Counter() if weighted: # weight using proba distrib of commitee for (clf,_) in commitee: if self.clf.predict_label(x) != clf.predict_label(x): YP = zip( clf.h.classes_, clf.h.predict_proba( x )[0] ) for (y,p) in YP: preds[y] += p preds = preds.most_common() diff = 0. if preds == [] else preds[0][1] # diff = 0. if preds == [] else ( preds[0][1] - preds[1][1] if len(preds)>1 else preds[0][1] ) else: # confis = [ clf.getPredictProba(1,x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ] labels = [ clf.predict_label(x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ] preds = Counter(labels) preds = preds.most_common() diff = 0. if preds == [] else sum( [pred[1] for pred in preds] ) balance = self.get_balance(x) informativeness = diff else: informativeness = 0. balance = 0 scores.append( informativeness ) scores_B.append( balance ) # scores_B = Util.normalize(scores_B) scores = [scr*scores_B[iscr] for iscr,scr in enumerate(scores)] return self.sort_scores(scores) #--------------------------------------- def query_balance(self): ids, _ = self.query_margin() scores = [] for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize*4]: informativeness = self.get_balance(x) else: informativeness = 0. scores.append( informativeness ) return self.sort_scores(scores) #--------------------------------------- def query_disag1_balanced(self, weighted = True): ids, _ = self.query_disagreement1(weighted=weighted, op=2) # ids, _ = self.query_margin() scores = [] for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize/2]: informativeness = self.get_balance(x) else: informativeness = 0. scores.append( informativeness ) return self.sort_scores(scores) #--------------------------------------- def query_disag2_balanced(self, weighted = True): ids, _ = self.query_disagreement2(weighted=weighted, op=2) # ids, _ = self.query_margin() scores = [] for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize/2]: informativeness = self.get_balance(x) else: informativeness = 0. scores.append( informativeness ) return self.sort_scores(scores) #--------------------------------------- def query_explote_explore(self): id_eps = self.mab.choose() eps = self.mab.algos[ id_eps ] # print "Choosen = ", eps, "Expected = ", sum([ a*l for a,l in zip(self.mab.algos,self.mab.nb_choices) ]) / sum(self.mab.nb_choices) rnd = random.uniform(0., 1.) # if rnd > eps: ids, scores = self.query_disagreement1(weighted = False) # if rnd > eps: ids, scores = self.query_disagreement1(weighted = True) if rnd > eps: ids, scores = self.query_disagreement2() # else: ids, scores = self.query_balance() else: ids, scores = self.query_random() reward = self.get_change( self.Ux[ids[0]], self.Uy[ids[0]] ) self.mab.update(id_eps, reward) return ids, scores #--------------------------------------- #--------------------------------------- #--------------------------------------- #--------------------------------------- #--------------------------------------- #--------------------------------------- #--------------------------------------- def get_disag1(self, x, weighted = False): true_y = self.Uy[ self.Ux.index(x) ] # true_y = self.clf.predict_label(x) temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() if not weighted: diff = sum([ 1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) else: diff = sum([ 1.-abs(temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) informativeness = diff return informativeness # def get_disag2(self, x, commitee, weighted = False): preds = Counter() if weighted: # weight using proba distrib of commitee for (clf,_) in commitee: if self.clf.predict_label(x) != clf.predict_label(x): YP = zip( clf.h.classes_, clf.h.predict_proba( x )[0] ) for (y,p) in YP: preds[y] += p preds = preds.most_common() diff = 0. if preds == [] else preds[0][1] else: # confis = [ clf.getPredictProba(1,x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ] labels = [ clf.predict_label(x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ] preds = Counter(labels) preds = preds.most_common() diff = 0. if preds == [] else sum( [pred[1] for pred in preds] ) informativeness = diff return informativeness # def query_disagreement_test(self): ids, _ = self.query_margin() scores = [] plots_Y = []; plots_X0 = []; plots_X1 = []; plots_X2 = []; plots_X3 = []; plots_X4 = []; plots_X5 = []; plots_X6 = []; viz = Visualize() commitee = [] for idp, dp in enumerate(self.Ux): if idp in ids[:self.optimize]: true_y = self.Uy[idp] # true_y = self.clf.predict_label(dp) temp_clf = Classification(self.Lx + [dp], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() commitee.append( (temp_clf, 1) ) # =========================== # sampled = random.sample(ids, 100) for ix, x in enumerate(self.Ux): # if ix in sampled: if ix in ids[:self.optimize*9999999]: informativeness1 = self.get_disag1(x, weighted = False) informativeness2 = self.get_disag2(x, commitee, weighted = False) informativeness3 = self.get_disag1(x, weighted = True) informativeness4 = self.get_disag2(x, commitee, weighted = True) informativeness5 = self.clf.uncertainty_prediction(x) informativeness6 = self.get_balance(x) temp_clf = Classification(self.Lx + [x], self.Ly + [self.Uy[ix]], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() acc = temp_clf.getTestAccuracy( self.Tx, self.Ty ) plots_X0.append( acc ) plots_X1.append( informativeness1 ) plots_X2.append( informativeness2 ) plots_X3.append( informativeness3 ) plots_X4.append( informativeness4 ) plots_X5.append( informativeness5 ) plots_X6.append( informativeness6 ) plots_Y.append( 'r' if self.Uy[ix] != self.clf.predict_label(x) else 'b' ) fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X1, plots_X2, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.1-2.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X3, plots_X4, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.3-4.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X1, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.1-acc.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X2, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.2-acc.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X3, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.3-acc.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X4, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.4-acc.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X5, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.5-acc.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X6, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.6-acc.png'); plt.close() # plots = [ plots_X1, plots_X2, plots_X3, plots_X4, plots_X5, plots_X6 ] # fig, axs = plt.subplots( 5, 1, sharex=True ) # axs[0].scatter( Util.normalize(plots_X1), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) # axs[1].scatter( Util.normalize(plots_X2), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) # axs[2].scatter( Util.normalize(plots_X3), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) # axs[3].scatter( Util.normalize(plots_X4), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) # axs[4].scatter( Util.normalize(plots_X5), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) # axs[5].scatter( Util.normalize(plots_X6), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) # plt.savefig(str(len(self.Lx)) + self.datasetname+'.png') # plt.close() informativeness = acc else: informativeness = 0. scores.append( informativeness ) return self.sort_scores(scores) # def get_balance(self, x): # y = self.Uy[ self.Ux.index(x) ] y = self.clf.predict_label(x) temp_clf = Classification(self.Lx + [x], self.Ly + [y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() cnt = Counter() for dp in self.Ux: cnt[ temp_clf.predict_label(dp) ] += 1. / len(self.Ux) P = [ cnt[key] for key in cnt ] informativeness = -1.0 * sum( [ p * math.log(p, len(P)) for p in P if p > 0 ] ) return informativeness # def get_change(self, x, y = None): if y is None: y = self.Uy[ self.Ux.index(x) ] # y = self.clf.predict_label(x) temp_clf = Classification(self.Lx + [x], self.Ly + [y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() v1 = [ self.clf.getPredictProba(1, dp) for dp in self.Ux if x != dp ] v2 = [ temp_clf.getPredictProba(1, dp) for dp in self.Ux if x != dp ] # informativeness = Util.dist(v1, v2) informativeness = math.acos( cosine_similarity(v1, v2) ) / math.pi # v1 = []; v2 = [] # for dp in self.Ux: # if x != dp: # v1 += [ v for v in self.clf.h.predict_proba( dp )[0] ] # v2 += [ v for v in temp_clf.h.predict_proba( dp )[0] ] # informativeness = distance.cosine(v1, v2) return informativeness
def solve_bandit_randomly(bandit, timesteps=1000): ''' Choose random actions on a k-arm bandit for a certain number of timesteps keeping track of accumulated reward. Use for benchmarking against more intelligent methods. ''' n_steps = 0 average_reward = 0 for _ in range(timesteps): arm_i = random.randint(0, bandit.k-1) reward = bandit.crank_arm(arm_i) # update average reward n_steps += 1 average_reward += 1/n_steps * (reward - average_reward) reward_ratio = max(average_reward / bandit.max_possible_expected_reward() * 100, 0) return average_reward, reward_ratio # observe some runs bandit = Bandit(5) print(bandit.max_possible_expected_reward()) print(solve_bandit_randomly(bandit)) print(solve_bandit(bandit)) print(solve_bandit(bandit)) print() bandit = Bandit(5) print(bandit.max_possible_expected_reward()) print(solve_bandit_randomly(bandit)) print(solve_bandit(bandit)) print(solve_bandit(bandit))
# -*- coding: utf-8 -*- """ Created on Wed Jul 22 14:45:55 2020 Upper Confidence Bound 1 (UCB1) implementation @author: Aditya Ojha """ ###Libraries Needed from Bandit import Bandit import numpy as np import matplotlib.pyplot as plt import random winrates = [0.1, .4, .6, .7] bandits = [] for winrate in winrates: bandits.append(Bandit(winrate)) win_list = [] total_time_steps = int(1e5) band_n = [0.1, 0.1, 0.1, 0.1] #number of times each bandit was choosen win_avg = [0, 0, 0, 0] win_UCB = [0, 0, 0, 0] wins = 0 #total number of wins best_bandit = 0 #guess that best bandit is zero data = [] cum_avg = 0 for _ in range(total_time_steps): band_choosen = np.argmax(win_UCB) reward = bandits[band_choosen].pull_arm() #pull the slot mach. arm data.append(reward) wins += reward #update total wins band_n[band_choosen] += 1 #increment counter for this bandit
#epsilon, array of bandits, number of trials def epsilonGreedy(epsilon, bandits, iterations): bestBandit = 0 totalReward = 0 for i in range(iterations): if (np.random.randn() <= epsilon): choice = np.random.choice(len(bandits)) bandit = bandits[choice] reward = bandit.pull() totalReward += reward bandit.updateMean(reward) bestBandit = np.argmax([b.xBar for b in bandits]) else: bandit = bandits[bestBandit] reward = bandit.pull() bandit.updateMean(reward) totalReward += reward bestBandit = np.argmax([b.xBar for b in bandits]) return totalReward bandits = [] for i in range(4): bandits.append(Bandit(i)) print(epsilonGreedy(0.1, bandits, 10000)) print(epsilonGreedy(0.01, bandits, 10000)) print(epsilonGreedy(0.001, bandits, 10000))
class OnlineActiveLearning: def __init__(self, Lx, Ly, Ux, Uy, Tx, Ty, method = "svm", budget = 1000): self.Lx0 = Lx[:] self.Ly0 = Ly[:] self.Lx = Lx self.Ly = Ly self.Ux = Ux # TODO should not be here self.Uy = Uy # TODO should not be here self.Tx = Tx # TODO should not be here self.Ty = Ty # TODO should not be here self.th = 0.9 self.queried = 0 self.queries = [] self.ths = [] self.infos = [] self.accuracys = [] self.clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy ); self.clf.train() self.sup_infos = [] # TODO should not be here self.sup_accuracys = [] # TODO should not be here self.sup_clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy ); self.sup_clf.train() # TODO should not be here # self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "UCB", alpha = 1 ) self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "reinforcement", alpha = 1 ) #--------------------------------------- def train(self, mtd = "margin", backupfile = "backupfile.txt"): for i, x in enumerate(self.Ux): y1 = self.clf.predict_label(x) if mtd == "supervised": informativeness = sys.float_info.max if mtd == "margin": informativeness = self.clf.uncertainty_margin(x) # =============================== id_th = self.mab.choose() self.th = self.mab.algos[ id_th ] print "Choosen =", self.th, "nb_choices =", self.mab.nb_choices, ("avg rwd=", [ np.mean(L) for L in self.mab.rewards ] if self.mab.rewards[0]!=[] else " "), "expected=", sum([ a*l for a,l in zip(self.mab.algos,self.mab.nb_choices) ]) / sum(self.mab.nb_choices) prev_clf = Classification(self.Lx, self.Ly, method = self.clf.method) prev_clf.GAMMA, prev_clf.C = self.clf.GAMMA, self.clf.C; prev_clf.train() # =============================== # avg_rewards = [ np.mean(L[:-20]) if len(L)>0 else 1. for L in self.mab.rewards ] # self.th = sum([ a*l for a,l in zip(self.mab.algos,avg_rewards) ]) / sum(avg_rewards) # print "Choosen =", self.th, "avg rwd=", avg_rewards # =============================== if informativeness > self.th: qx = x qy = self.Uy[i] self.Lx.append(qx) self.Ly.append(qy) self.queried += 1 self.clf.X = self.Lx; self.clf.Y = self.Ly; self.clf.train() # =============================== reward = 1. - abs( 0.1 - self.queried / (i+1.) ) self.mab.update(id_th, reward) # =============================== # for idt in range(len(self.mab.algos)): # reward = 1. - abs( 0.3 - (self.queried-1+1) / (i+1.) ) if informativeness > self.mab.algos[idt] else 1. - abs( 0.4 - (self.queried-1) / (i+1.) ) # self.mab.update(idt, reward) # =============================== self.ths.append( self.th ) self.infos.append( informativeness ) self.accuracys.append( self.clf.getTestAccuracy( self.Tx, self.Ty ) ) self.queries.append( self.queried ) self.sup_infos.append( self.sup_clf.uncertainty_margin(x) ) # TODO should not be here self.sup_clf.X = self.Lx0+self.Ux[:i+1]; self.sup_clf.Y = self.Ly0+self.Uy[:i+1]; self.sup_clf.train() # TODO should not be here self.sup_accuracys.append( self.sup_clf.getTestAccuracy( self.Tx, self.Ty ) ) # TODO should not be here ''' if i>10: # last_infos = self.infos[-100:] if len(self.infos) > 100 else self.infos[:] # self.th = np.mean( last_infos ) if informativeness > self.th: # queried if y1 == qy: # but was correctly predicted self.th = self.th + 0.1 * (informativeness - self.th) else: if y1 != qy: self.th = self.th - 0.1 * (self.th - informativeness ) ''' print "i=", i+1, self.queried, self.queried / (i+1.), "-- acc=%.4f"%(self.accuracys[-1]*100), "%.4f"%(self.sup_accuracys[-1]*100), "-- %.4f"%(np.mean(self.accuracys)*100), "%.4f"%(np.average(self.accuracys, weights = range(1,1+len(self.accuracys)))*100), "--", informativeness if (i+1)%10 == 0: Util.pickleSave(backupfile, self); viz = Visualize() viz.do_plot( [range(len(self.infos)), self.ths], color = 'b', marker = '-' ) viz.do_plot( [range(len(self.infos)), self.infos], color = 'r', marker = '-' ) viz.do_plot( [range(len(self.sup_infos)), self.sup_infos], color = 'y', marker = '-' ) viz.end_plot( fig = backupfile+"_stream_inf.png" ) viz.do_plot( [range(len(self.accuracys)), self.accuracys], color = 'r', marker = '-' ) viz.do_plot( [range(len(self.sup_accuracys)), self.sup_accuracys], color = 'y', marker = '-' ) viz.end_plot( fig = backupfile+"_stream_acc.png" ) viz.do_plot( [range(len(self.queries)), self.queries], color = 'r', marker = '-' ) viz.do_plot( [range(len(self.queries)), range(len(self.queries))], color = 'y', marker = '-' ) viz.end_plot( fig = backupfile+"_stream_lab.png" ) ''' colors = ['r', 'b', 'g', 'k', 'm', 'c', '0.10', '0.35', '0.60', '0.90'] viz.plot( zip(*self.Lx+self.Ux), fig = backupfile+"__.png", color = [colors[int(l)] for l in self.Ly+self.Uy], marker = 'o' ) viz.do_plot( zip(*self.Ux), color = ['y']*len(self.Ux), marker = '.' ) viz.do_plot( zip(*self.Lx), color = [colors[int(l)] for l in self.Ly], marker = 'o' ) viz.end_plot( fig = backupfile+"_.png" ) ''' #--------------------------------------- def get_change(self, prev_clf, curr_clf, U): v1 = [ prev_clf.getPredictProba(1, dp) for dp in U ] v2 = [ curr_clf.getPredictProba(1, dp) for dp in U ] if v1 == v2: return 0. return math.acos( cosine_similarity(v1, v2) ) / math.pi
def main(args): steps = 10000 runs = 300 # average rewards over time steps avgReward1 = np.zeros(steps) avgReward2 = np.zeros(steps) # percent of optimal actions opActions1 = np.zeros(steps) opActions2 = np.zeros(steps) # validate command line args if len(args) != 2: print("Error: Output file not provided") print("Usage: driver.py result.out") exit() """ Bandits using sample averages """ for r in range(runs): bandit = Bandit(steps) for step in range(1, steps + 1): reward = bandit.takeStep(step) avgReward1[step - 1] += reward opActions1 += bandit.getOpActions() opActions1 /= runs avgReward1 /= runs """ Bandits using step size parameters """ for r in range(runs): bandit = Bandit(steps) for step in range(1, steps + 1): reward = bandit.takeStep(step, stepSize=True) avgReward2[step - 1] += reward opActions2 += bandit.getOpActions() opActions2 /= runs avgReward2 /= runs # save data in file try: fn = args[1] file = open(fn, 'w') np.savetxt(file, (avgReward1, opActions1), newline="\n") np.savetxt(file, (avgReward2, opActions2), newline="\n") except FileNotFoundError as e: print(f"Error: {e.strerror}") finally: file.close()
def run_experiment(m1, m2, m3, N, method="eps", eps=None, decay=True, upper_limit=None): """ m1, m2, m3 = means of the three bandits to be compared eps = epsilon for Epsilon-Greedy upper_limit = initial value for the mean reward estimate N = int, the number of times we pull Returns the cumulative average after every play """ data = np.empty(N) if method == "eps": b1 = Bandit(m1, eps=eps) b2 = Bandit(m2, eps=eps) b3 = Bandit(m3, eps=eps) bandits = [b1, b2, b3] for i in range(N): if decay: eps = 1 / (i + 0.01) p = np.random.random() # Epsilon - Greedy part if p < eps: chosen = random.choice([0, 1, 2]) target = bandits[chosen] else: bandits_means = [bi.mean for bi in bandits] target = bandits[np.argmax(bandits_means)] new_reward = target.pull() target.update(new_reward) data[i] = new_reward elif method == "upper_limit" or method == "ucb1": b1 = Bandit(m1, upper_limit=upper_limit, method=method) b2 = Bandit(m2, upper_limit=upper_limit, method=method) b3 = Bandit(m3, upper_limit=upper_limit, method=method) bandits = [b1, b2, b3] if method == "upper_limit": for i in range(N): bandits_means = [bi.mean for bi in bandits] target = bandits[np.argmax(bandits_means)] new_reward = target.pull() target.update(new_reward) data[i] = new_reward elif method == "ucb1": for i in range(N): bandits_means = [ bi.mean + sqrt(log(bi.N) / (N + pow(10, -5))) for bi in bandits ] target = bandits[np.argmax(bandits_means)] new_reward = target.pull() target.update(new_reward) data[i] = new_reward else: raise ValueError("The explore-exploit method chosen is not recognized") cumulative_avg = np.cumsum(data) / (np.arange(N) + 1) plt.plot(cumulative_avg, label=method) plt.plot(np.ones(N) * m1) plt.plot(np.ones(N) * m2) plt.plot(np.ones(N) * m3) plt.xlabel("Iteration") plt.ylabel("Reward mean") print(b1.mean) print(b2.mean) print(b3.mean)