Пример #1
0
 def getReward(self, arm, reward):
     UCB.getReward(self, arm, reward)
     self.budget += reward
     self.estmeans[arm] = (self.estmeans[arm] *
                           (self.pulls[arm] - 1) + reward) / self.pulls[arm]
     if (reward > 0):
         self.successes[arm] += 1
Пример #2
0
 def getReward(self, arm, reward):
     UCB.getReward(self, arm, reward)
     if (reward >= 0):
         self.positive_count[arm] += 1
         self.positive_mean[arm] = (self.positive_mean[arm] *
                                    (self.positive_count[arm] - 1) +
                                    reward) / self.positive_count[arm]
     else:
         self.negative_count[arm] += 1
         self.negative_mean[arm] = (self.negative_mean[arm] *
                                    (self.negative_count[arm] - 1) +
                                    reward) / self.negative_count[arm]
Пример #3
0
""" Example of use of SMPyBandits.
See https://SMPyBandits.GitHub.io/API.html for more details!"""
import numpy as np
np.random.seed(0)  # for reproducibility
from SMPyBandits.Arms import Bernoulli
arms = [Bernoulli(0.1), Bernoulli(0.9)]
from SMPyBandits.Environment import MAB
my_MAB_problem = MAB(arms)
nbArms = my_MAB_problem.nbArms  # 2 arms !
from SMPyBandits.Policies import UCB
my_UCB_algo = UCB(nbArms)
my_UCB_algo.startGame()  # reset internal memory

horizon = 1000
for t in range(horizon):  # simulation loop
    chosen_arm = my_UCB_algo.choice()
    observed_reward = my_MAB_problem.draw(chosen_arm)
    my_UCB_algo.getReward(chosen_arm, observed_reward)

cumulated_reward = sum(my_UCB_algo.rewards)  # random!
number_of_plays = sum(my_UCB_algo.pulls)  # horizon = 1000
mean_reward = cumulated_reward / number_of_plays
print("The UCB algorithm obtains here a mean reward =", mean_reward)
Пример #4
0
 def getReward(self, arm, reward):
     UCB.getReward(self, arm, reward)
     self.budget += reward
Пример #5
0
 def getReward(self, arm, reward):
     UCB.getReward(self, arm, reward)
     SafeAlg.getReward(self, arm, reward)
Пример #6
0
 def getReward(self, arm, reward):
     UCB.getReward(self, arm, reward)
     self.reward_samples[arm] = np.sort(
         np.append(self.reward_samples[arm], [reward]))
Пример #7
0
 def getReward(self, arm, reward):
     UCB.getReward(self, arm, reward)
     Budgeted.getReward(self, reward)
     BernoulliEstimator.getReward(self, arm, reward)