示例#1
0
    def _compute_IL_posterior(self, t):
        # W = 0
        # for agent, reputation in self.agent_reputations_overall.items():
        #     W += int(min(1, reputation) == 1)
        max_pulls = max([arm.pulls for arm in self.bandit.arms])
        for (arm_index, arm) in enumerate(self.bandit.arms):
            self.prediction_history[arm_index]=[]
            self.posterior_history[arm_index] = [BetaDistribution(0.5, 1-0.5)]
            pre_alpha, pre_beta = copy.deepcopy(arm.reward_dist.get_params())
            num_reports = 0

            weight_0 = 1
            weight = copy.deepcopy(weight_0) #have to make dependant on initial reputation and 
            running_sum = 0.5 * weight_0

            #iterate through each agent and process their report
            for agent_index, agent in enumerate(self.agency.agents):
                # print(agent.id)
                gamma = min(self.agent_reputations_overall[agent], 1)
                #give full weight to currnt agents reports
                temp_running_sum = running_sum + (self.agency.agent_reports[agent][arm_index])
                temp_weight = weight + 1
                q_j = temp_running_sum / temp_weight

                alpha_j = q_j * (agent.num_reports) #+ pre_alpha
                beta_j = (1-q_j) * (agent.num_reports) # pre_beta

                running_sum += self.agency.agent_reports[agent][arm_index] * gamma
                weight += gamma

                self.prediction_history[arm_index].append(BetaDistribution(copy.deepcopy(alpha_j), copy.deepcopy(beta_j)))

                q_j_tilde = running_sum/weight
                # (1-gamma)*q_j_tilde + gamma*(q_j)
                num_reports += gamma * agent.num_reports

                alpha_tilde = q_j_tilde * (num_reports) 
                beta_tilde = (1-q_j_tilde) * (num_reports)
                self.posterior_history[arm_index].append(BetaDistribution(copy.deepcopy(alpha_tilde), copy.deepcopy(beta_tilde)))
    
            running_sum -= 0.5 * weight_0
            weight -= weight_0
            q_j_tilde = running_sum/weight
            # num_reports = min(num_reports, max(100 - arm.pulls, 0))
            # # num_reports = min(num_reports, 30)
            # # num_reports = min(num_reports, max_pulls-arm.pulls)
            alpha_tilde = q_j_tilde * (num_reports) 
            beta_tilde = (1-q_j_tilde) * (num_reports)
            # print("arm:", arm_index)
            # print("pre:",pre_alpha, pre_beta)
            # print("post:", pre_alpha + alpha_tilde, pre_beta + beta_tilde)
            arm.influence_reward_dist.set_params(alpha_tilde + pre_alpha, beta_tilde  + pre_beta)
    def _compute_IL_posterior(self, t):
        for (arm_index, arm) in enumerate(self.bandit.arms):
            self.prediction_history[arm_index]=[]
            q_j_tilde = 0.5
            self.posterior_history[arm_index] = [BetaDistribution(0.5, 1-0.5)]
            pre_alpha, pre_beta = copy.deepcopy(arm.reward_dist.get_params())
            num_reports = 0

            weight_0 = np.log(t)
            weight = copy.deepcopy(weight_0) #have to make dependant on initial reputation and 
            running_sum = 0.5 * weight

            #iterate through each agent and process their report
            for agent_index, agent in enumerate(self.agency.agents):
                # print(agent.id)
                time_factor = 1
                gamma = min(1, self.agent_reputations[agent])
                #give full weight to currnt agents reports
                temp_running_sum = running_sum + (self.agency.agent_reports[agent][arm_index] * time_factor)
                temp_weight = weight + time_factor
                q_j = temp_running_sum / temp_weight

                alpha_j = q_j * (agent.num_reports) #+ pre_alpha
                beta_j = (1-q_j) * (agent.num_reports) # pre_beta

                running_sum += self.agency.agent_reports[agent][arm_index] * gamma
                weight += gamma

                self.prediction_history[arm_index].append(BetaDistribution(copy.deepcopy(alpha_j), copy.deepcopy(beta_j)))

                q_j_tilde = running_sum/weight
                # (1-gamma)*q_j_tilde + gamma*(q_j)
                num_reports += gamma * agent.num_reports

                alpha_tilde = q_j_tilde * (num_reports) 
                beta_tilde = (1-q_j_tilde) * (num_reports)
                self.posterior_history[arm_index].append(BetaDistribution(copy.deepcopy(alpha_tilde), copy.deepcopy(beta_tilde)))
    
            # if alpha_cuml != 0:
            #     arm.influence_reward_dist.set_params(alpha_cuml + pre_alpha, beta_cuml + pre_beta)
            # else:
            #     arm.influence_reward_dist.set_params(alpha_tilde + pre_alpha, beta_tilde + pre_beta)
            running_sum -= 0.5 * weight_0
            weight -= weight_0
            q_j_tilde = running_sum/weight
            # print("round", t)
            # print(num_reports)
            alpha_tilde = q_j_tilde * (num_reports) 
            beta_tilde = (1-q_j_tilde) * (num_reports)
            # print(alpha_tilde, beta_tilde)
            arm.influence_reward_dist.set_params(alpha_tilde + pre_alpha, beta_tilde  + pre_beta)
    def _compute_IL_posterior(self, t):
        # print("reputations:", self.agent_reputations)
        for (arm_index, arm) in enumerate(self.bandit.arms):
            self.posterior_history[arm_index] = [BetaDistribution(1, 1)]
            self.prediction_history[arm_index] = []

            pre_alpha, pre_beta = copy.deepcopy(arm.reward_dist.get_params())

            weight = max(0, 1 - (np.log(self.bandit.T) / t))
            running_weighted_sum = 0.5 * weight

            # test = weight*copy.deepcopy(arm.reward_dist.mean()) + (1-weight) * (0.5)
            # self.posterior_history[arm_index] = [BetaDistribution(test, (1-test))]

            num_trust = 1

            #iterate through each agent and process their report
            for agent_index, agent in enumerate(self.agency.agents):
                gamma = min(1, self.agent_reputations[agent_index])

                if gamma >= 1:
                    num_trust += 1

                alpha_j = self.agency.agent_reports[agent_index][arm_index] * (
                    agent.num_reports)
                beta_j = (1 - self.agency.agent_reports[agent_index][arm_index]
                          ) * (agent.num_reports)

                self.prediction_history[arm_index].append(
                    BetaDistribution(alpha_j, beta_j))

                running_weighted_sum += gamma * self.agency.agent_reports[
                    agent_index][arm_index]
                weight += gamma

                # running_alpha_sum += gamma * self.agency.agent_reports[agent_index][arm_index] * (agent.num_reports)
                # running_beta_sum += gamma * (1-self.agency.agent_reports[agent_index][arm_index]) * (agent.num_reports)
                # weights += gamma

                # alpha_tilde = running_alpha_sum/weights
                # beta_tilde = running_beta_sum/weights

                q_tilde = running_weighted_sum / weight
                alpha_tilde = q_tilde * (agent.num_reports * num_trust)
                beta_tilde = (1 - q_tilde) * (agent.num_reports * num_trust)
                self.posterior_history[arm_index].append(
                    BetaDistribution(alpha_tilde, beta_tilde))

            arm.influence_reward_dist.set_params(alpha_tilde + pre_alpha,
                                                 beta_tilde + pre_beta)
示例#4
0
    def _compute_IL_posterior(self, t):
        for (arm_index, arm) in enumerate(self.bandit.arms):
            self.prediction_history[arm_index]=[]
            q_j_tilde = 0.5
            self.posterior_history[arm_index] = [BetaDistribution(q_j_tilde, 1-q_j_tilde)]
            pre_alpha, pre_beta = copy.deepcopy(arm.reward_dist.get_params())

            weight = 0
            running_sum = 0
            num_reports = 0
            N = len(self.agency.agents)
            alpha_test, beta_test = 0, 0

            #iterate through each agent and process their report
            for agent_index, agent in enumerate(self.agency.agents):
                #get gamma
                gamma = min(1, self.agent_reputations[agent_index])

                #give full weight to currnt agents reports
                temp_running_sum = running_sum + (self.agency.agent_reports[agent_index][arm_index] * 1)
                temp_weight = weight + 1
                q_j = temp_running_sum / temp_weight

                temp_num_reports = num_reports + agent.num_reports

                alpha_j = q_j * (agent.num_reports * temp_num_reports) #+ pre_alpha
                beta_j = (1-q_j) * (agent.num_reports * temp_num_reports) # pre_beta

                running_sum += self.agency.agent_reports[agent_index][arm_index] * gamma
                num_reports += gamma * agent.num_reports
                weight += gamma

                self.prediction_history[arm_index].append(BetaDistribution(copy.deepcopy(alpha_j), copy.deepcopy(beta_j)))

                q_j = copy.deepcopy(alpha_j/(alpha_j + beta_j))
                q_j_tilde = (1-gamma)*q_j_tilde + gamma*(q_j)

                alpha_tilde = q_j_tilde * (num_reports) 
                beta_tilde = (1-q_j_tilde) * (num_reports)
                self.posterior_history[arm_index].append(BetaDistribution(copy.deepcopy(alpha_tilde), copy.deepcopy(beta_tilde)))
    
            # if alpha_test != 0:
            #     arm.influence_reward_dist.set_params(alpha_test + pre_alpha, beta_test + pre_beta)
            # else:
            #     arm.influence_reward_dist.set_params(alpha_tilde + pre_alpha, beta_tilde + pre_beta)
            arm.influence_reward_dist.set_params(alpha_tilde + pre_alpha, beta_tilde + pre_beta)
    def _compute_IL_posterior(self, t):
        # print("reputations:", self.agent_reputations)
        for (arm_index, arm) in enumerate(self.bandit.arms):
            # self.posterior_history[arm_index] = [BetaDistribution(1, 1)]
            self.prediction_history[arm_index] = []

            pre_alpha, pre_beta = copy.deepcopy(arm.reward_dist.get_params())
            # self.posterior_history[arm_index] = [copy.deepcopy(arm.reward_dist)]
            # k = 2/(len(self.agency.agents) + 1)
            pre_mean = copy.deepcopy(arm.reward_dist.mean())
            prev_ema = copy.deepcopy(self.agency.agent_reports[0][arm_index])
            # q_j_tilde = copy.deepcopy(0.5)
            k = 0.75
            self.posterior_history[arm_index] = [
                BetaDistribution(0.5, 1 - 0.5)
            ]
            q_j_tilde = copy.deepcopy(0.5)

            #iterate through each agent and process their report
            for agent_index, agent in enumerate(self.agency.agents):
                # print("agent:", agent_index)
                # print("agent reputation:", self.agent_reputations[agent_index])
                k = 1 - 1 / (t + 1)
                gamma = min(1, self.agent_reputations[agent_index])
                current_ema = (self.agency.agent_reports[agent_index]
                               [arm_index] - prev_ema) * k + prev_ema
                prev_ema = copy.deepcopy(current_ema)

                alpha_j = current_ema * (agent.num_reports)  #+ pre_alpha
                beta_j = (1 - current_ema) * (agent.num_reports)  # pre_beta

                self.prediction_history[arm_index].append(
                    BetaDistribution(alpha_j, beta_j))

                q_j = copy.deepcopy(alpha_j / (alpha_j + beta_j))
                q_j_tilde = (1 - gamma) * q_j_tilde + gamma * (q_j)

                alpha_tilde = q_j_tilde * (agent.num_reports)
                beta_tilde = (1 - q_j_tilde) * (agent.num_reports)
                self.posterior_history[arm_index].append(
                    BetaDistribution(alpha_tilde, beta_tilde))

            # print("final:", alpha_tilde + pre_alpha, beta_tilde + pre_beta)
            arm.influence_reward_dist.set_params(alpha_tilde + pre_alpha,
                                                 beta_tilde + pre_beta)
示例#6
0
    def _compute_IL_posterior(self, t):
        # print("reputations:", self.agent_reputations)
        for (arm_index, arm) in enumerate(self.bandit.arms):
            # self.posterior_history[arm_index] = [BetaDistribution(1, 1)]
            self.prediction_history[arm_index] = []

            pre_alpha, pre_beta = copy.deepcopy(arm.reward_dist.get_params())
            new_mean = copy.deepcopy(arm.reward_dist.mean())
            weight = 1
            running_weighted_sum = weight * new_mean
            q_tilde = running_weighted_sum / weight

            self.posterior_history[arm_index] = [
                BetaDistribution(q_tilde, 1 - q_tilde)
            ]
            k = 2 / (len(self.agency.agents) + 1)
            prev_ema = self._compute_SMA(arm_index)

            #iterate through each agent and process their report
            for agent_index, agent in enumerate(self.agency.agents):
                gamma = min(1, self.agent_reputations[agent_index])
                current_ema = (self.agency.agent_reports[agent_index]
                               [arm_index] - prev_ema) * k + prev_ema
                alpha_j = current_ema * (agent.num_reports)
                beta_j = (1 - current_ema) * (agent.num_reports)

                self.prediction_history[arm_index].append(
                    BetaDistribution(alpha_j, beta_j))

                q_j = copy.deepcopy(current_ema)

                running_weighted_sum += gamma * q_j
                weight += gamma

                q_tilde = running_weighted_sum / weight

                alpha_tilde = q_tilde * (agent.num_reports)
                beta_tilde = (1 - q_tilde) * (agent.num_reports)
                self.posterior_history[arm_index].append(
                    BetaDistribution(alpha_tilde, beta_tilde))

            # print("final:", alpha_tilde + pre_alpha, beta_tilde + pre_beta)
            arm.influence_reward_dist.set_params(alpha_tilde + pre_alpha,
                                                 beta_tilde + pre_beta)
示例#7
0
def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a, 0), scipy.stats.sem(a, 0)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
    return m, h


T = 500
K = 5
num_exp = 10
num_reports = 10
trust = [False, False, True, False]
initial_reputations = 1

world_priors = [BetaDistribution(1, 1) for k in range(K)]
nature = Nature(K, world_priors, len(trust))

bayes_ucb = BayesUCB(T, K, world_priors)
random = Random(T, K, world_priors)
thompson = ThompsonSampling(T, K, world_priors)
oracle = Oracle(copy.deepcopy(bayes_ucb), nature.agency)
bandits = [thompson, bayes_ucb, random]

key_map = {thompson: "Thompson", bayes_ucb: "Bayes UCB", random: "Random"}
key_color = {thompson: "red", bayes_ucb: "blue", random: "green"}

cumulative_regret_history = {
    bandit: np.zeros((num_exp, T))
    for bandit in bandits
}