def test_find_best_budgets_1(self):
        campaign = Campaign(2, 100, 3)
        campaign.set_sub_campaign_values(0, [3, 7, 14])
        campaign.set_sub_campaign_values(1, [2, 5, 7])

        max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign)

        self.assertEqual(max_clicks, 16)
        self.assertTrue((best_budgets == np.array([100, 0])).all())
    def test_find_best_budgets_2(self):
        campaign = Campaign(3, 90, 4)
        campaign.set_sub_campaign_values(0, [0, 3, 12, 20])
        campaign.set_sub_campaign_values(1, [0, 2, 7, 10])
        campaign.set_sub_campaign_values(2, [0, 5, 8, 12])

        max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign)

        self.assertEqual(max_clicks, 20)
        self.assertTrue((best_budgets == np.array([90, 0, 0])).all())
    def test_optimize_1(self):
        campaign = Campaign(2, 100, 3)
        campaign.set_sub_campaign_values(0, [3, 7, 14])
        campaign.set_sub_campaign_values(1, [2, 5, 7])

        opt_matrix, max_idx_matrix = CampaignOptimizer._optimize(campaign)

        true_opt_matrix = np.array([[0, 0, 0],
                                    [3, 7, 14],
                                    [5, 9, 16]])
        true_max_idx_matrix = np.array([[-1, -1, -1],
                                        [0, 1, 2],
                                        [0, 0, 0]])

        self.assertTrue((true_opt_matrix == opt_matrix).all())
        self.assertTrue((true_max_idx_matrix == max_idx_matrix).all())
    def test_find_best_budgets_3(self):
        campaign = Campaign(4, 100, 5)
        campaign.set_sub_campaign_values(0, [0, 3, 12, 20, 15])
        campaign.set_sub_campaign_values(1, [0, 2, 7, 10, 9])
        campaign.set_sub_campaign_values(2, [0, 5, 8, 12, 18])
        campaign.set_sub_campaign_values(3, [0, 9, 9, 10, 7])

        max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign)

        self.assertEqual(max_clicks, 29)
        self.assertTrue((best_budgets == np.array([75, 0, 0, 25])).all())
    def test_optimize_2(self):
        campaign = Campaign(3, 90, 4)
        campaign.set_sub_campaign_values(0, [0, 3, 12, 20])
        campaign.set_sub_campaign_values(1, [0, 2, 7, 10])
        campaign.set_sub_campaign_values(2, [0, 5, 8, 12])

        opt_matrix, max_idx_matrix = CampaignOptimizer._optimize(campaign)

        true_opt_matrix = np.array([[0, 0, 0, 0],
                                    [0, 3, 12, 20],
                                    [0, 3, 12, 20],
                                    [0, 5, 12, 20]])
        true_max_idx_matrix = np.array([[-1, -1, -1, -1],
                                        [0, 1, 2, 3],
                                        [0, 0, 0, 0],
                                        [0, 1, 0, 0]])

        self.assertTrue((true_opt_matrix == opt_matrix).all())
        self.assertTrue((true_max_idx_matrix == max_idx_matrix).all())
    def test_optimize_3(self):
        campaign = Campaign(4, 100, 5)
        campaign.set_sub_campaign_values(0, [0, 3, 12, 20, 15])
        campaign.set_sub_campaign_values(1, [0, 2, 7, 10, 9])
        campaign.set_sub_campaign_values(2, [0, 5, 8, 12, 18])
        campaign.set_sub_campaign_values(3, [0, 9, 9, 10, 7])

        opt_matrix, max_idx_matrix = CampaignOptimizer._optimize(campaign)

        true_opt_matrix = np.array([[0, 0, 0, 0, 0],
                                    [0, 3, 12, 20, 20],
                                    [0, 3, 12, 20, 22],
                                    [0, 5, 12, 20, 25],
                                    [0, 9, 14, 21, 29]])
        true_max_idx_matrix = np.array([[-1, -1, -1, -1, -1],
                                        [0, 1, 2, 3, 3],
                                        [0, 0, 0, 0, 1],
                                        [0, 1, 0, 0, 1],
                                        [0, 1, 1, 1, 1]])

        self.assertTrue((true_opt_matrix == opt_matrix).all())
        self.assertTrue((true_max_idx_matrix == max_idx_matrix).all())
    def test_find_best_budgets_4(self):
        campaign = Campaign(3, 100, 11)
        # 100(0.0+0.3)*b = 30*x1 = max 3000
        # 100(0.2+0.2)*b = 40*x2 = max 4000
        # 100(0.4+0.1)*b = 50*x3 = max 5000
        campaign.set_sub_campaign_values(0, [0, 300, 600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000])
        campaign.set_sub_campaign_values(1, [0, 400, 800, 1200, 1600, 2000, 2000, 2000, 2000, 2000, 2000])
        campaign.set_sub_campaign_values(2, [0, 500, 1000, 1500, 1500, 1500, 1500, 1500, 1500, 1500, 1500])

        max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign)
        print(max_clicks)
        print(best_budgets)

        my_best_budgets = np.array([20.0, 50.0, 30.0])

        self.assertTrue((best_budgets == my_best_budgets).all())
Exemplo n.º 8
0
# Something
mean_scenario: Scenario = EnvironmentManager.load_scenario(
    SCENARIO_NAME, get_mean_function=True)
click_function_list: List[IStochasticFunction] = mean_scenario.get_phases(
)[0].get_n_clicks_function()

# Optimal point computation
campaign = Campaign(n_sub_campaigns=mean_scenario.get_n_subcampaigns(),
                    cum_budget=BUDGET,
                    n_arms=N_ARMS_ADV)
for i in range(campaign.get_n_sub_campaigns()):
    sub_campaign_values = [
        click_function_list[i].draw_sample(b)
        for b in np.linspace(0, BUDGET, N_ARMS_ADV)
    ]
    campaign.set_sub_campaign_values(i, sub_campaign_values)
max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign)

# # Compute regret
# if CSV_CUM_REGRET:
#     mean_regret_data = np.zeros(shape=(n_bandit + 1, n_days))
#     std_regret_data = np.zeros(shape=(n_bandit + 1, n_days))
#     mean_regret_data[-1] = np.arange(n_days) + 1
#     std_regret_data[-1] = np.arange(n_days) + 1
#
#     for bandit_idx in range(len(BANDIT_NAME)):
#         n_exp = len(total_reward_list[bandit_idx])
#
#         for curr_day in range(n_days):
#             daily_values = []
#             for exp in range(n_exp):
    def pull_budget(self) -> List[int]:
        max_best_clicks = 0
        max_best_budgets = None

        for arm_idx in range(len(self.arm_values)):
            subcampaign_values = self.campaign.get_sub_campaigns()
            arm_value_campaign = Campaign(self.campaign.get_n_sub_campaigns(),
                                          self.campaign.get_cum_budget(),
                                          len(self.campaign.get_budgets()))
            for sub_idx in range(len(subcampaign_values)):
                arm_value_campaign.set_sub_campaign_values(
                    sub_idx, subcampaign_values[sub_idx] *
                    self.value_per_clicks_per_price_idx[arm_idx][sub_idx])
            max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(
                self.campaign)

            if max_best_clicks < max_clicks:
                max_best_clicks = max_clicks
                max_best_budgets = best_budgets

        # Balance price learner with number of clicks distribution estimation
        budget_value_to_index = {
            value: i
            for i, value in enumerate(self.campaign.get_budgets())
        }
        estimated_clicks = np.array([
            self.campaign.get_sub_campaigns()[sub_idx,
                                              budget_value_to_index[budget]]
            for sub_idx, budget in enumerate(max_best_budgets)
        ])
        user_probabilities = estimated_clicks / np.sum(estimated_clicks)

        self.unique_price_learner: DiscreteBandit = self.price_bandit_class(
            **self.price_bandit_kwargs)

        for arm_idx in range(len(self.arm_values)):
            rewards_per_subcampaign = []
            for sub_idx in range(self.campaign.get_n_sub_campaigns()):
                rewards_per_subcampaign.append(
                    self.rewards_per_arm_per_user_class[sub_idx][arm_idx])
            rewards_len = np.array(
                [len(rewards) for rewards in rewards_per_subcampaign])

            solution = np.min(rewards_len / user_probabilities)
            balanced_rewards_len = np.array(np.floor(solution *
                                                     user_probabilities),
                                            dtype=int)

            # Clip values of balanced rewards length between 0 and original rewards length
            balanced_rewards_len = np.maximum(balanced_rewards_len, 0)
            balanced_rewards_len = np.minimum(balanced_rewards_len,
                                              rewards_len)

            for sub_idx in range(self.campaign.get_n_sub_campaigns()):
                sampled_rewards = np.random.choice(
                    rewards_per_subcampaign[sub_idx],
                    size=balanced_rewards_len[sub_idx],
                    replace=False)
                for reward in sampled_rewards:
                    self.unique_price_learner.update(arm_idx, reward)

        return [budget_value_to_index[budget] for budget in max_best_budgets]