def analyse_suboptimal_arm_pulls(self):
        # Compute deltas and theoretical upper bound of playing each sub-optimal arm.
        self.best_arm = mh.get_maximum_index(self.true_means)
        mean_of_best_arm = self.true_means[self.best_arm]

        for i in range(self.K):
            self.deltas[i] = mean_of_best_arm - self.true_means[i]

        del_sq_invs = mh.get_instance_dependent_square_inverses(
            self.deltas, self.best_arm)

        addi_constant = rvh.func_of_pi(add=1, power=2, mult=1 / 3)

        time_series = np.arange(self.T + 1)

        logarithmic_time_series = rvh.natural_logarithm(time_series)

        a = np.array(del_sq_invs)
        del_sq_inv_row_matrix = np.reshape(a, (1, -1))
        logarithmic_time_series_column_matrix = np.reshape(
            logarithmic_time_series, (-1, 1))

        matrix = np.dot(logarithmic_time_series_column_matrix,
                        del_sq_inv_row_matrix)

        self.theoretical_bounds_arm_pulls = matrix + addi_constant
示例#2
0
    def test_library_random_variables(self):

        # Uniform distribution
        result_1 = rvh.get_uniform_sample(0, 1, 10)

        # Bernoulli distribution
        result_2 = rvh.get_bernoulli_sample(0.8)

        result_3 = rvh.get_bernoulli_sample(p=0.5, size=10)

        a = 5
示例#3
0
    def play_arms(self):
        rewards = [0]
        n = 0

        # At time t = 0,
        for i in range(1, self.K + 1):
            arm_number = i - 1
            reward = super().pull_arm(arm_number)
            rewards.append(reward)

            n = n + 1

        # From time t = 1
        for t in range(1, rvh.ceiled_log_base_2(self.N) + 1):
            self.revise_ucbs(n)

            # pull the arm with highest UCB 2 power t times
            pulls_this_iteration = 2**t

            arm_with_highest_ucb = mh.get_maximum_index(
                self.upper_confidence_bound)

            for i in range(pulls_this_iteration):
                if n >= self.N:
                    break

                reward = super().pull_arm(arm_with_highest_ucb)
                rewards.append(reward)
                n = n + 1
            # end for
        # end for

        return rewards
示例#4
0
    def get_arms(arm_count, tape_size):
        true_means = rvh.get_uniform_sample(0, 1, arm_count)
        arms = []
        for i in range(arm_count):
            arm = Arm(true_means[i], size=tape_size)
            arms.append(arm)

        return true_means, arms
    def analyse_common_stats(self):
        # Compute deltas and theoretical upper bound of regret of UCB1.
        self.best_arm = mh.get_maximum_index(self.true_means)
        mean_of_best_arm = self.true_means[self.best_arm]

        for i in range(self.K):
            self.deltas[i] = mean_of_best_arm - self.true_means[i]

        sum_del_inv, sum_del = mh.get_instance_dependent_values(
            self.best_arm, self.deltas)

        mult_constant, addi_constant = mh.get_theoretical_constants(
            sum_del_inv, sum_del)

        time_series = np.arange(self.T + 1)
        self.cum_regret_theo_bound = mult_constant * rvh.natural_logarithm(
            time_series) + addi_constant
        self.cum_optimal_reward = time_series * mean_of_best_arm
示例#6
0
    def get_theoretical_constants(sum_del_inv, sum_del):
        mult_constant = 2 * sum_del_inv
        addi_constant = rvh.func_of_pi(add=1, power=2, mult=1 / 3) * sum_del

        return mult_constant, addi_constant
示例#7
0
    def __init__(self, mean, size=10**7):
        self._mean = mean

        # Create a tape of values to return.
        self._tape = rvh.get_bernoulli_sample(p=self._mean, size=size)
        self._tape_index = 0