def run_experiment(p1, p2, p3, N): bandits = [Bandit(p1), Bandit(p2), Bandit(p3)] data = np.empty(N) for i in range(N): b_list = [b.sample() for b in bandits] j = np.argmax(b_list) x = bandits[j].pull() bandits[j].update(x) print(b_list) data[i] = x cumulative_average = np.cumsum(data) / (np.arange(N) + 1) print(cumulative_average) plt.plot(cumulative_average) plt.plot(np.ones(N) * p1) plt.plot(np.ones(N) * p2) plt.plot(np.ones(N) * p3) plt.ylim((0, 1)) plt.xscale('log') plt.legend() plt.show()
def run_experiment(p1, p2, p3, N): #converge experiment function: bandits = [ Bandit(p1), Bandit(p2), Bandit(p3) ] #Create 3 bandits with probability p1,p2,p3; we play 100000 times data = np.empty(N) #store results in data for i in range(N): # thompson sampling j = np.argmax([b.sample() for b in bandits]) x = bandits[j].pull() bandits[j].update(x) # for the plot data[i] = x #x means "Get a reward or not" or "click or not" cumulative_average_ctr = np.cumsum(data) / ( np.arange(N) + 1 ) #we keep track of the cumulative click_through_rate by #divide the total number of click by the total showing time. # plot moving average ctr #when we are using Thompson Sampling. plt.plot(cumulative_average_ctr) plt.plot(np.ones(N) * p1) plt.plot(np.ones(N) * p2) plt.plot(np.ones(N) * p3) plt.ylim((0, 1)) plt.xscale('log') plt.show() #What we see here is after 100000 trials, the average ctr
def run_experiment(p1, p2, p3, N): # 3 probabilities = 3 bandits and number of trials. # define each bandit bandits = [Bandit(p1), Bandit(p2), Bandit(p3)] # keep track of all the data we get. 1 for click, 0 for no-click data = np.empty(N) for i in xrange(N): # thompson sampling j = np.argmax([b.sample() for b in bandits]) x = bandits[j].pull() bandits[j].update(x) # for the plot, keep track of x which is the data at i. data[i] = x cumulative_average_ctr = np.cumsum(data) / (np.arange(N) + 1) # plot moving average ctr plt.plot(cumulative_average_ctr) plt.plot(np.ones(N) * p1) plt.plot(np.ones(N) * p2) plt.plot(np.ones(N) * p3) plt.ylim((0, 1)) plt.xscale('log') plt.show()
def run_experiment(p1, p2, p3, N): 'args: probabilities for 3 bandits and number of experiments.' bandits = [Bandit(p1), Bandit(p2), Bandit(p3)] data = np.empty(N) for i in range(N): best_bandito = np.argmax([bandito.sample() for bandito in bandits]) result = bandits[best_bandito].pull() data[i] = result cumulative_avg_ctr = np.cumsum(data) / (np.arange(N) + 1) plt.plot(cumulative_avg_ctr) plt.plot(np.ones(N) * p1, '--') plt.plot(np.ones(N) * p2, '-.') plt.plot(np.ones(N) * p3, ':') plt.ylim((0, .6)) plt.xscale('log') plt.xlabel('Number of Trials') plt.show()
def run_experiment(p1, p2, p3, N): bandits = [Bandit(p1), Bandit(p2), Bandit(p3)] data = np.empty(N) for i in range(N): j = np.argmax([b.sample() for b in bandits]) # Select the best option x = bandits[j].pull() # Pull this lever bandits[j].update(x) # Update the distribution data[i] = x # Store the data cumulative_average = np.cumsum(data) / (np.arange(N) + 1 ) # Find the average plt.plot(cumulative_average) # Plotting the average of the decision plt.plot(np.ones(N) * p1) # Plotting of each bandit's true probability plt.plot(np.ones(N) * p2) plt.plot(np.ones(N) * p3) plt.ylim((0, 1)) plt.xscale('log') # Easier to visualize due to # of iterations plt.show()
def run_experiment(p1, p2, p3, N): bandits = [Bandit(p1), Bandit(p2), Bandit(p3)] data = np.empty(N) for i in range(N): # thompson sampling j = np.argmax([b.sample() for b in bandits]) x = bandits[j].pull() bandits[j].update(x) # for the plot data[i] = x cumulative_average_ctr = np.cumsum(data) / (np.arange(N) + 1) # plot moving average ctr plt.plot(cumulative_average_ctr) plt.plot(np.ones(N)*p1) plt.plot(np.ones(N)*p2) plt.plot(np.ones(N)*p3) plt.ylim((0,1)) plt.xscale('log') plt.show()
def run_experiment(p1, p2, p3, N): bandits = [Bandit(p1), Bandit(p2), Bandit(p3)] data = np.empty(N)