Exemplos de ArmFinite em Python, exemplos de arms.ArmFinite em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: bandits.py Projeto: GeoffNN/BanditsGameTheoryRL

 def __init__(self, parameter_list=None):
     if parameter_list is None:
         self.parameter_list = [(0, 1)]
     else:
         self.parameter_list = parameter_list
     self.arms = [arms.ArmFinite(x, p) for x, p in parameter_list]
     self.n_arms = len(self.arms)

Exemplo n.º 2

0

Exibir arquivo

def construct_non_parametric_MAB():
    arm1 = arms.ArmBernoulli(0.30, random_state=np.random.randint(1, 312414))
    arm2 = arms.ArmBeta(0.5, 0.5, random_state=np.random.randint(1, 312414))
    arm3 = arms.ArmBeta(1., 3., random_state=np.random.randint(1, 312414))
    arm4 = arms.ArmExp(1., random_state=np.random.randint(1, 312414))
    arm5 = arms.ArmFinite(np.array([0., 0.1, 0.5, 0.8]), np.array([0.2, 0.3, 0.4, 0.1]))
    return [arm1, arm2, arm3, arm4, arm5]

Exemplo n.º 3

0

Exibir arquivo

Arquivo: mainTP2_SMAB.py Projeto: VincentPlassier/Reinforcement-Learning-MVA


plt.figure(3)
plt.clf()
plt.plot(list_t, R[0], label='Expected regret of UCB1')
plt.plot(list_t, R[1], label='Expected regret of TS')
plt.plot(list_t, R[2], label='Eps_Greedy')
plt.plot(list_t,oracle, label='Oracle') # we display
plt.legend()


## Question 1:
arm1 = arms.ArmBernoulli(0.30, random_state=np.random.randint(1, 312414))
arm2 = arms.ArmBeta(0.20, 0.30, random_state=np.random.randint(1, 312414))
arm3 = arms.ArmExp(0.25, random_state=np.random.randint(1, 312414))
arm4 = arms.ArmFinite(np.array([0.3,0.5,0.2]), np.array([0.5,0.1,0.4]), random_state=np.random.randint(1, 312414))

MAB = [arm1, arm2, arm3, arm4]


def TS_non_binarity(T,MAB):
    nb_arms = len(MAB)
    rew, draw = np.zeros(T), np.zeros(T)
    N = np.zeros(nb_arms) # number of draws of arms up to time t
    S = np.zeros_like(N) # sum of rewards gathered up to time t
    tau = np.zeros(nb_arms)
    for t in range(T):
        for a in range(nb_arms):
            if N[a] == 0:
                tau[a] = np.random.rand()
            else:

Exemplo n.º 4

0

Exibir arquivo

Arquivo: mainTP2_SMAB.py Projeto: AmineKheldouni/Reinforcement-Learning-Overview

    ax2.plot(np.arange(1, T + 1),
             mu_max1 * np.arange(1, T + 1) - np.cumsum(r2),
             label='rho=' + str(rho))
plt.legend()
plt.show()

##################### Question 2 - Implementation #########################

# (Expected) regret curve for UCB and Thompson Sampling
arm1 = arms.ArmBernoulli(0.50, random_state=np.random.randint(1, 312414))
arm2 = arms.ArmBeta(0.3, 0.45, random_state=np.random.randint(1, 312414))
arm3 = arms.ArmExp(0.20, random_state=np.random.randint(1, 312414))
arm4 = arms.ArmExp(0.10, random_state=np.random.randint(1, 312414))
arm5 = arms.ArmBernoulli(0.1, random_state=np.random.randint(1, 312414))
arm6 = arms.ArmFinite(X=np.array([0.1, 0.3, 0.7, 0.8]),
                      P=np.array([0.2, 0.4, 0.1, 0.3]),
                      random_state=np.random.randint(1, 312414))

MAB = [arm1, arm2, arm3, arm4, arm5, arm6]
print("Means of diversified MAB arms (respectively)")
for a in MAB:
    print(a.mean)
# bandit : set of arms
nb_arms = len(MAB)
means = [el.mean for el in MAB]
mu_max = np.max(means)


def TSnonbinary(T, MAB, N=50):
    draws = np.zeros((N, T))
    rew = np.zeros((N, T))