示例#1
0
def run_experiment(p1, p2, p3, N):

    bandits = [Bandit(p1), Bandit(p2), Bandit(p3)]
    data = np.empty(N)

    for i in range(N):
        b_list = [b.sample() for b in bandits]

        j = np.argmax(b_list)
        x = bandits[j].pull()
        bandits[j].update(x)

        print(b_list)

        data[i] = x

    cumulative_average = np.cumsum(data) / (np.arange(N) + 1)
    print(cumulative_average)
    plt.plot(cumulative_average)
    plt.plot(np.ones(N) * p1)
    plt.plot(np.ones(N) * p2)
    plt.plot(np.ones(N) * p3)

    plt.ylim((0, 1))
    plt.xscale('log')
    plt.legend()
    plt.show()
示例#2
0
def run_experiment(p1, p2, p3, N):  #converge experiment function:
    bandits = [
        Bandit(p1), Bandit(p2), Bandit(p3)
    ]  #Create 3 bandits with probability p1,p2,p3; we play 100000 times

    data = np.empty(N)  #store results in data

    for i in range(N):
        # thompson sampling
        j = np.argmax([b.sample() for b in bandits])
        x = bandits[j].pull()
        bandits[j].update(x)

        # for the plot
        data[i] = x  #x means "Get a reward or not" or "click or not"
    cumulative_average_ctr = np.cumsum(data) / (
        np.arange(N) + 1
    )  #we keep track of the cumulative click_through_rate by
    #divide the total number of click by the total showing time.
    # plot moving average ctr                                    #when we are using Thompson Sampling.
    plt.plot(cumulative_average_ctr)
    plt.plot(np.ones(N) * p1)
    plt.plot(np.ones(N) * p2)
    plt.plot(np.ones(N) * p3)
    plt.ylim((0, 1))
    plt.xscale('log')
    plt.show()  #What we see here is after 100000 trials, the average ctr
示例#3
0
def run_experiment(p1, p2, p3,
                   N):  # 3 probabilities = 3 bandits and number of trials.
    # define each bandit
    bandits = [Bandit(p1), Bandit(p2), Bandit(p3)]

    # keep track of all the data we get. 1 for click, 0 for no-click
    data = np.empty(N)

    for i in xrange(N):
        # thompson sampling
        j = np.argmax([b.sample() for b in bandits])
        x = bandits[j].pull()
        bandits[j].update(x)

        # for the plot, keep track of x which is the data at i.
        data[i] = x

    cumulative_average_ctr = np.cumsum(data) / (np.arange(N) + 1)

    # plot moving average ctr
    plt.plot(cumulative_average_ctr)
    plt.plot(np.ones(N) * p1)
    plt.plot(np.ones(N) * p2)
    plt.plot(np.ones(N) * p3)
    plt.ylim((0, 1))
    plt.xscale('log')
    plt.show()
示例#4
0
def run_experiment(p1, p2, p3, N):
    'args: probabilities for 3 bandits and number of experiments.'
    bandits = [Bandit(p1), Bandit(p2), Bandit(p3)]

    data = np.empty(N)

    for i in range(N):
        best_bandito = np.argmax([bandito.sample() for bandito in bandits])
        result = bandits[best_bandito].pull()
        data[i] = result

    cumulative_avg_ctr = np.cumsum(data) / (np.arange(N) + 1)
    plt.plot(cumulative_avg_ctr)
    plt.plot(np.ones(N) * p1, '--')
    plt.plot(np.ones(N) * p2, '-.')
    plt.plot(np.ones(N) * p3, ':')
    plt.ylim((0, .6))
    plt.xscale('log')
    plt.xlabel('Number of Trials')
    plt.show()
示例#5
0
def run_experiment(p1, p2, p3, N):
    bandits = [Bandit(p1), Bandit(p2), Bandit(p3)]

    data = np.empty(N)

    for i in range(N):
        j = np.argmax([b.sample() for b in bandits])  # Select the best option
        x = bandits[j].pull()  # Pull this lever
        bandits[j].update(x)  # Update the distribution

        data[i] = x  # Store the data
    cumulative_average = np.cumsum(data) / (np.arange(N) + 1
                                            )  # Find the average

    plt.plot(cumulative_average)  # Plotting the average of the decision
    plt.plot(np.ones(N) * p1)  # Plotting of each bandit's true probability
    plt.plot(np.ones(N) * p2)
    plt.plot(np.ones(N) * p3)
    plt.ylim((0, 1))
    plt.xscale('log')  # Easier to visualize due to # of iterations
    plt.show()
def run_experiment(p1, p2, p3, N):
  bandits = [Bandit(p1), Bandit(p2), Bandit(p3)]

  data = np.empty(N)
  
  for i in range(N):
    # thompson sampling
    j = np.argmax([b.sample() for b in bandits])
    x = bandits[j].pull()
    bandits[j].update(x)

    # for the plot
    data[i] = x
  cumulative_average_ctr = np.cumsum(data) / (np.arange(N) + 1)

  # plot moving average ctr
  plt.plot(cumulative_average_ctr)
  plt.plot(np.ones(N)*p1)
  plt.plot(np.ones(N)*p2)
  plt.plot(np.ones(N)*p3)
  plt.ylim((0,1))
  plt.xscale('log')
  plt.show()
def run_experiment(p1, p2, p3, N):
  bandits = [Bandit(p1), Bandit(p2), Bandit(p3)]

  data = np.empty(N)