Python Bandit示例

编程语言: Python

命名空间/包名称: bayesian_bandit

类/类型: Bandit

hotexamples.com的示例: 7

Python Bandit - 已找到7个示例。这些是从开源项目中提取的最受好评的bayesian_bandit.Bandit现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Bandit(7)

常用方法

Bandit (7)

示例#1

显示文件

def run_experiment(p1, p2, p3, N):

    bandits = [Bandit(p1), Bandit(p2), Bandit(p3)]
    data = np.empty(N)

    for i in range(N):
        b_list = [b.sample() for b in bandits]

        j = np.argmax(b_list)
        x = bandits[j].pull()
        bandits[j].update(x)

        print(b_list)

        data[i] = x

    cumulative_average = np.cumsum(data) / (np.arange(N) + 1)
    print(cumulative_average)
    plt.plot(cumulative_average)
    plt.plot(np.ones(N) * p1)
    plt.plot(np.ones(N) * p2)
    plt.plot(np.ones(N) * p3)

    plt.ylim((0, 1))
    plt.xscale('log')
    plt.legend()
    plt.show()

示例#2

显示文件

def run_experiment(p1, p2, p3, N):  #converge experiment function:
    bandits = [
        Bandit(p1), Bandit(p2), Bandit(p3)
    ]  #Create 3 bandits with probability p1,p2,p3; we play 100000 times

    data = np.empty(N)  #store results in data

    for i in range(N):
        # thompson sampling
        j = np.argmax([b.sample() for b in bandits])
        x = bandits[j].pull()
        bandits[j].update(x)

        # for the plot
        data[i] = x  #x means "Get a reward or not" or "click or not"
    cumulative_average_ctr = np.cumsum(data) / (
        np.arange(N) + 1
    )  #we keep track of the cumulative click_through_rate by
    #divide the total number of click by the total showing time.
    # plot moving average ctr                                    #when we are using Thompson Sampling.
    plt.plot(cumulative_average_ctr)
    plt.plot(np.ones(N) * p1)
    plt.plot(np.ones(N) * p2)
    plt.plot(np.ones(N) * p3)
    plt.ylim((0, 1))
    plt.xscale('log')
    plt.show()  #What we see here is after 100000 trials, the average ctr

示例#3

显示文件

def run_experiment(p1, p2, p3,
                   N):  # 3 probabilities = 3 bandits and number of trials.
    # define each bandit
    bandits = [Bandit(p1), Bandit(p2), Bandit(p3)]

    # keep track of all the data we get. 1 for click, 0 for no-click
    data = np.empty(N)

    for i in xrange(N):
        # thompson sampling
        j = np.argmax([b.sample() for b in bandits])
        x = bandits[j].pull()
        bandits[j].update(x)

        # for the plot, keep track of x which is the data at i.
        data[i] = x

    cumulative_average_ctr = np.cumsum(data) / (np.arange(N) + 1)

    # plot moving average ctr
    plt.plot(cumulative_average_ctr)
    plt.plot(np.ones(N) * p1)
    plt.plot(np.ones(N) * p2)
    plt.plot(np.ones(N) * p3)
    plt.ylim((0, 1))
    plt.xscale('log')
    plt.show()

示例#4

显示文件

文件： thompson_convergence.py 项目： geoffder/learning

def run_experiment(p1, p2, p3, N):
    'args: probabilities for 3 bandits and number of experiments.'
    bandits = [Bandit(p1), Bandit(p2), Bandit(p3)]

    data = np.empty(N)

    for i in range(N):
        best_bandito = np.argmax([bandito.sample() for bandito in bandits])
        result = bandits[best_bandito].pull()
        data[i] = result

    cumulative_avg_ctr = np.cumsum(data) / (np.arange(N) + 1)
    plt.plot(cumulative_avg_ctr)
    plt.plot(np.ones(N) * p1, '--')
    plt.plot(np.ones(N) * p2, '-.')
    plt.plot(np.ones(N) * p3, ':')
    plt.ylim((0, .6))
    plt.xscale('log')
    plt.xlabel('Number of Trials')
    plt.show()

示例#5

显示文件

def run_experiment(p1, p2, p3, N):
    bandits = [Bandit(p1), Bandit(p2), Bandit(p3)]

    data = np.empty(N)

    for i in range(N):
        j = np.argmax([b.sample() for b in bandits])  # Select the best option
        x = bandits[j].pull()  # Pull this lever
        bandits[j].update(x)  # Update the distribution

        data[i] = x  # Store the data
    cumulative_average = np.cumsum(data) / (np.arange(N) + 1
                                            )  # Find the average

    plt.plot(cumulative_average)  # Plotting the average of the decision
    plt.plot(np.ones(N) * p1)  # Plotting of each bandit's true probability
    plt.plot(np.ones(N) * p2)
    plt.plot(np.ones(N) * p3)
    plt.ylim((0, 1))
    plt.xscale('log')  # Easier to visualize due to # of iterations
    plt.show()

示例#6

显示文件

文件： convergence.py 项目： souviksamanta95/machine_learning_examples

def run_experiment(p1, p2, p3, N):
  bandits = [Bandit(p1), Bandit(p2), Bandit(p3)]

  data = np.empty(N)
  
  for i in range(N):
    # thompson sampling
    j = np.argmax([b.sample() for b in bandits])
    x = bandits[j].pull()
    bandits[j].update(x)

    # for the plot
    data[i] = x
  cumulative_average_ctr = np.cumsum(data) / (np.arange(N) + 1)

  # plot moving average ctr
  plt.plot(cumulative_average_ctr)
  plt.plot(np.ones(N)*p1)
  plt.plot(np.ones(N)*p2)
  plt.plot(np.ones(N)*p3)
  plt.ylim((0,1))
  plt.xscale('log')
  plt.show()

示例#7

显示文件

文件： convergence.py 项目： OscarAbacka/machine_learning_examples

def run_experiment(p1, p2, p3, N):
  bandits = [Bandit(p1), Bandit(p2), Bandit(p3)]

  data = np.empty(N)