def __init__(self, parameter_list=None): if parameter_list is None: self.parameter_list = [(0, 1)] else: self.parameter_list = parameter_list self.arms = [arms.ArmFinite(x, p) for x, p in parameter_list] self.n_arms = len(self.arms)
def construct_non_parametric_MAB(): arm1 = arms.ArmBernoulli(0.30, random_state=np.random.randint(1, 312414)) arm2 = arms.ArmBeta(0.5, 0.5, random_state=np.random.randint(1, 312414)) arm3 = arms.ArmBeta(1., 3., random_state=np.random.randint(1, 312414)) arm4 = arms.ArmExp(1., random_state=np.random.randint(1, 312414)) arm5 = arms.ArmFinite(np.array([0., 0.1, 0.5, 0.8]), np.array([0.2, 0.3, 0.4, 0.1])) return [arm1, arm2, arm3, arm4, arm5]
plt.figure(3) plt.clf() plt.plot(list_t, R[0], label='Expected regret of UCB1') plt.plot(list_t, R[1], label='Expected regret of TS') plt.plot(list_t, R[2], label='Eps_Greedy') plt.plot(list_t,oracle, label='Oracle') # we display plt.legend() ## Question 1: arm1 = arms.ArmBernoulli(0.30, random_state=np.random.randint(1, 312414)) arm2 = arms.ArmBeta(0.20, 0.30, random_state=np.random.randint(1, 312414)) arm3 = arms.ArmExp(0.25, random_state=np.random.randint(1, 312414)) arm4 = arms.ArmFinite(np.array([0.3,0.5,0.2]), np.array([0.5,0.1,0.4]), random_state=np.random.randint(1, 312414)) MAB = [arm1, arm2, arm3, arm4] def TS_non_binarity(T,MAB): nb_arms = len(MAB) rew, draw = np.zeros(T), np.zeros(T) N = np.zeros(nb_arms) # number of draws of arms up to time t S = np.zeros_like(N) # sum of rewards gathered up to time t tau = np.zeros(nb_arms) for t in range(T): for a in range(nb_arms): if N[a] == 0: tau[a] = np.random.rand() else:
ax2.plot(np.arange(1, T + 1), mu_max1 * np.arange(1, T + 1) - np.cumsum(r2), label='rho=' + str(rho)) plt.legend() plt.show() ##################### Question 2 - Implementation ######################### # (Expected) regret curve for UCB and Thompson Sampling arm1 = arms.ArmBernoulli(0.50, random_state=np.random.randint(1, 312414)) arm2 = arms.ArmBeta(0.3, 0.45, random_state=np.random.randint(1, 312414)) arm3 = arms.ArmExp(0.20, random_state=np.random.randint(1, 312414)) arm4 = arms.ArmExp(0.10, random_state=np.random.randint(1, 312414)) arm5 = arms.ArmBernoulli(0.1, random_state=np.random.randint(1, 312414)) arm6 = arms.ArmFinite(X=np.array([0.1, 0.3, 0.7, 0.8]), P=np.array([0.2, 0.4, 0.1, 0.3]), random_state=np.random.randint(1, 312414)) MAB = [arm1, arm2, arm3, arm4, arm5, arm6] print("Means of diversified MAB arms (respectively)") for a in MAB: print(a.mean) # bandit : set of arms nb_arms = len(MAB) means = [el.mean for el in MAB] mu_max = np.max(means) def TSnonbinary(T, MAB, N=50): draws = np.zeros((N, T)) rew = np.zeros((N, T))