def _optimize(cls, campaign: Campaign) -> (np.ndarray, np.ndarray):
        """
        Optimize the combinatorial problem of the advertising campaign by using a dynamic programming algorithm

        :param campaign: the campaign to be optimized
        :return:
            - the optimization matrix (N+1) x M containing, for each pair (budget, set of sub-campaign), the maximum
              optimized value achievable (value might be, for instance, the number of clicks, or the profit)
            - the maximum indices (N+1) x M related to the optimization matrix containing, for each pair
              (budget, set of sub-campaign), the index of the best budget for the new added sub-campaign w.r.t. previous
              set of sub-campaign (i.e. row)
            where N is the number of sub-campaign and M is the number of discrete budgets
        """
        optimization_matrix = np.zeros(shape=(campaign.get_n_sub_campaigns() +
                                              1, len(campaign.get_budgets())))
        max_idx_matrix = np.full_like(optimization_matrix,
                                      fill_value=-1,
                                      dtype=np.int)
        prev_row = 0
        clicks_matrix = campaign.get_sub_campaigns()
        for row in range(1, optimization_matrix.shape[0]):
            temp_clicks = clicks_matrix[row - 1][::-1]

            for col in range(optimization_matrix.shape[1]):
                cum_sum_clicks = temp_clicks[optimization_matrix.shape[1] -
                                             col - 1:] + optimization_matrix[
                                                 prev_row, :col + 1]
                idx_max = np.argmax(cum_sum_clicks)

                optimization_matrix[row, col] = cum_sum_clicks[idx_max]
                max_idx_matrix[row, col] = col - idx_max
            prev_row = row
        return optimization_matrix, max_idx_matrix
    def __init__(self, campaign: Campaign,
                 model_list: List[DiscreteRegressor]):
        """
        :param campaign: campaign that will be optimized
        :param model_list: regressors that will be used to estimate the quantity in each campaign. There is a regressor
        for each campaign
        """
        assert len(model_list) == campaign.get_n_sub_campaigns()

        self.campaign: Campaign = campaign
        self.t: int = 0
        self.collected_rewards: List[float] = []
        self.pulled_superarm_list: List[List] = []
        self.model_list: List[DiscreteRegressor] = model_list

        self.collected_rewards_sub_campaign: List[List] = [
            [] for _ in range(campaign.get_n_sub_campaigns())
        ]
        self.pulled_arm_sub_campaign: List[List] = [
            [] for _ in range(campaign.get_n_sub_campaigns())
        ]

        for sub_index, model in enumerate(self.model_list):
            sub_campaign_values = self.model_list[
                sub_index].sample_distribution()
            self.campaign.set_sub_campaign_values(sub_index,
                                                  sub_campaign_values)
def main(args):
    scenario = EnvironmentManager.load_scenario(args.scenario_name)
    env = PricingAdvertisingJointEnvironment(scenario)

    campaign = Campaign(scenario.get_n_subcampaigns(), args.cum_budget,
                        args.n_arms)
    bandit = build_combinatorial_bandit(bandit_name=args.bandit_name,
                                        campaign=campaign,
                                        init_std=args.init_std,
                                        args=args)
    budget_allocation = [0, 0, 0]

    for t in range(0, args.n_rounds):
        # Choose arm
        budget_allocation_indexes = bandit.pull_arm()
        budget_allocation = [
            int(campaign.get_budgets()[i]) for i in budget_allocation_indexes
        ]

        # Observe reward
        env.set_budget_allocation(budget_allocation=budget_allocation)
        env.next_day()
        rewards = env.get_daily_visits_per_sub_campaign()

        # Update bandit
        bandit.update(pulled_arm=budget_allocation_indexes,
                      observed_reward=rewards)

    return bandit.collected_rewards, budget_allocation
    def test_find_best_budgets_3(self):
        campaign = Campaign(4, 100, 5)
        campaign.set_sub_campaign_values(0, [0, 3, 12, 20, 15])
        campaign.set_sub_campaign_values(1, [0, 2, 7, 10, 9])
        campaign.set_sub_campaign_values(2, [0, 5, 8, 12, 18])
        campaign.set_sub_campaign_values(3, [0, 9, 9, 10, 7])

        max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign)

        self.assertEqual(max_clicks, 29)
        self.assertTrue((best_budgets == np.array([75, 0, 0, 25])).all())
    def __init__(self, campaign: Campaign, arm_values: np.ndarray,
                 price_learner_class: DiscreteBandit.__class__,
                 price_learner_kwargs,
                 number_of_visit_model_list: List[DiscreteRegressor],
                 ad_value_strategy: AdValueStrategy):
        super().__init__(campaign)
        assert len(
            number_of_visit_model_list) == campaign.get_n_sub_campaigns()

        # General problem data
        self.arm_values: np.ndarray = arm_values

        # Data structure to save overall data
        self.collected_rewards_sub_campaign: List[List] = [
            [] for _ in range(campaign.get_n_sub_campaigns())
        ]
        self.pulled_arm_sub_campaign: List[List] = [
            [] for _ in range(campaign.get_n_sub_campaigns())
        ]

        self.value_per_clicks_per_price_idx: Dict[int, List[float]] = {}
        for arm_idx in range(len(self.arm_values)):
            rewards_per_subcampaign = [
                [] for _ in range(self.campaign.get_n_sub_campaigns())
            ]

            self.value_per_clicks_per_price_idx[arm_idx] = ad_value_strategy. \
                get_estimated_value_per_clicks(rewards_per_subcampaign, self.day_t)

        self.rewards_per_arm_per_user_class: Dict[
            int, List[List[float]]] = defaultdict(list)
        for i in range(self.campaign.get_n_sub_campaigns()):
            for j in range(len(self.arm_values)):
                self.rewards_per_arm_per_user_class[i].append([])

        # Learners
        self.price_bandit_class = price_learner_class
        self.price_bandit_kwargs = price_learner_kwargs

        self.unique_price_learner: DiscreteBandit = self.price_bandit_class(
            **self.price_bandit_kwargs)
        self.number_of_visit_model_list: List[
            DiscreteRegressor] = number_of_visit_model_list
        self.ad_value_strategy: AdValueStrategy = ad_value_strategy
    def __init__(self, campaign: Campaign, ads_learner: CombinatorialBandit,
                 price_learner: List[DiscreteBandit],
                 ad_value_strategy: AdValueStrategy):
        assert len(price_learner) == campaign.get_n_sub_campaigns()

        super().__init__(campaign=campaign)
        self.ads_learner: CombinatorialBandit = ads_learner
        self.price_learner: List[DiscreteBandit] = price_learner
        self.n_arms = price_learner[0].n_arms
        self.ad_value_strategy = ad_value_strategy

        self.estimated_ad_value = None
    def __init__(self, campaign: Campaign, number_of_visit_model_list: List[DiscreteRegressor],
                 n_arms_profit: int, arm_profit: np.array, min_std: float):
        assert len(number_of_visit_model_list) == campaign.get_n_sub_campaigns()

        super().__init__(campaign=campaign)

        # Number of visit data structure
        self.number_of_visit_model_list: List[DiscreteRegressor] = number_of_visit_model_list
        self.collected_rewards_sub_campaign: List[List] = [[] for _ in range(campaign.get_n_sub_campaigns())]
        self.pulled_arm_sub_campaign: List[List] = [[] for _ in range(campaign.get_n_sub_campaigns())]

        # Pricing data structure
        self.n_arms_price: int = n_arms_profit
        self.arm_profit = arm_profit
        self.profit_arm_reward_list = [[[] for _ in range(n_arms_profit)] for _ in
                                       range(campaign.get_n_sub_campaigns())]
        self.min_std = min_std

        # Current data structure
        self.current_pricing_arm_idx = np.random.randint(low=0, high=n_arms_profit, size=1)
        self.curr_best_budget_idx: List[int] = [0 for _ in range(campaign.get_n_sub_campaigns())]
        self.current_budget_allocation = [0 for _ in range(campaign.get_n_sub_campaigns())]

        # Initializing randomly budget values
        for sub_index, model in enumerate(self.number_of_visit_model_list):
            sub_campaign_values = self.number_of_visit_model_list[sub_index].sample_distribution()
            self.campaign.set_sub_campaign_values(sub_index, sub_campaign_values)
        _, best_budgets = CampaignOptimizer.find_best_budgets(self.campaign)
        self.curr_best_budget_idx = [np.where(self.campaign.get_budgets() == budget)[0][0] for budget in
                                     best_budgets]
    def test_optimize_3(self):
        campaign = Campaign(4, 100, 5)
        campaign.set_sub_campaign_values(0, [0, 3, 12, 20, 15])
        campaign.set_sub_campaign_values(1, [0, 2, 7, 10, 9])
        campaign.set_sub_campaign_values(2, [0, 5, 8, 12, 18])
        campaign.set_sub_campaign_values(3, [0, 9, 9, 10, 7])

        opt_matrix, max_idx_matrix = CampaignOptimizer._optimize(campaign)

        true_opt_matrix = np.array([[0, 0, 0, 0, 0],
                                    [0, 3, 12, 20, 20],
                                    [0, 3, 12, 20, 22],
                                    [0, 5, 12, 20, 25],
                                    [0, 9, 14, 21, 29]])
        true_max_idx_matrix = np.array([[-1, -1, -1, -1, -1],
                                        [0, 1, 2, 3, 3],
                                        [0, 0, 0, 0, 1],
                                        [0, 1, 0, 0, 1],
                                        [0, 1, 1, 1, 1]])

        self.assertTrue((true_opt_matrix == opt_matrix).all())
        self.assertTrue((true_max_idx_matrix == max_idx_matrix).all())
    def test_find_best_budgets_4(self):
        campaign = Campaign(3, 100, 11)
        # 100(0.0+0.3)*b = 30*x1 = max 3000
        # 100(0.2+0.2)*b = 40*x2 = max 4000
        # 100(0.4+0.1)*b = 50*x3 = max 5000
        campaign.set_sub_campaign_values(0, [0, 300, 600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000])
        campaign.set_sub_campaign_values(1, [0, 400, 800, 1200, 1600, 2000, 2000, 2000, 2000, 2000, 2000])
        campaign.set_sub_campaign_values(2, [0, 500, 1000, 1500, 1500, 1500, 1500, 1500, 1500, 1500, 1500])

        max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign)
        print(max_clicks)
        print(best_budgets)

        my_best_budgets = np.array([20.0, 50.0, 30.0])

        self.assertTrue((best_budgets == my_best_budgets).all())
    def test_find_best_budgets_1(self):
        campaign = Campaign(2, 100, 3)
        campaign.set_sub_campaign_values(0, [3, 7, 14])
        campaign.set_sub_campaign_values(1, [2, 5, 7])

        max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign)

        self.assertEqual(max_clicks, 16)
        self.assertTrue((best_budgets == np.array([100, 0])).all())
    def test_find_best_budgets_2(self):
        campaign = Campaign(3, 90, 4)
        campaign.set_sub_campaign_values(0, [0, 3, 12, 20])
        campaign.set_sub_campaign_values(1, [0, 2, 7, 10])
        campaign.set_sub_campaign_values(2, [0, 5, 8, 12])

        max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign)

        self.assertEqual(max_clicks, 20)
        self.assertTrue((best_budgets == np.array([90, 0, 0])).all())
    def test_optimize_1(self):
        campaign = Campaign(2, 100, 3)
        campaign.set_sub_campaign_values(0, [3, 7, 14])
        campaign.set_sub_campaign_values(1, [2, 5, 7])

        opt_matrix, max_idx_matrix = CampaignOptimizer._optimize(campaign)

        true_opt_matrix = np.array([[0, 0, 0],
                                    [3, 7, 14],
                                    [5, 9, 16]])
        true_max_idx_matrix = np.array([[-1, -1, -1],
                                        [0, 1, 2],
                                        [0, 0, 0]])

        self.assertTrue((true_opt_matrix == opt_matrix).all())
        self.assertTrue((true_max_idx_matrix == max_idx_matrix).all())
def main(args):
    # Retrieve scenario
    scenario = EnvironmentManager.load_scenario(args.scenario_name)

    # Retrieve bandit and basic information
    campaign = Campaign(scenario.get_n_subcampaigns(), args.cum_budget,
                        args.n_arms_ads)
    prices = get_prices(args=args)
    arm_profit = prices - args.unit_cost
    bandit = get_bandit(args=args, arm_values=arm_profit, campaign=campaign)

    # Create environment
    env = PricingAdvertisingJointEnvironment(scenario=scenario)

    # choose whether to pull one arm a day for each class, or to pull one arm for each user
    if args.daily_price:
        learn_per_day(bandit, env, campaign, prices, arm_profit,
                      scenario.get_n_subcampaigns())
    else:
        learn_per_user(bandit, env, campaign, prices, arm_profit)

    return bandit.get_daily_reward(), env.day_breakpoints
    def test_optimize_2(self):
        campaign = Campaign(3, 90, 4)
        campaign.set_sub_campaign_values(0, [0, 3, 12, 20])
        campaign.set_sub_campaign_values(1, [0, 2, 7, 10])
        campaign.set_sub_campaign_values(2, [0, 5, 8, 12])

        opt_matrix, max_idx_matrix = CampaignOptimizer._optimize(campaign)

        true_opt_matrix = np.array([[0, 0, 0, 0],
                                    [0, 3, 12, 20],
                                    [0, 3, 12, 20],
                                    [0, 5, 12, 20]])
        true_max_idx_matrix = np.array([[-1, -1, -1, -1],
                                        [0, 1, 2, 3],
                                        [0, 0, 0, 0],
                                        [0, 1, 0, 0]])

        self.assertTrue((true_opt_matrix == opt_matrix).all())
        self.assertTrue((true_max_idx_matrix == max_idx_matrix).all())
def get_bandit(args, arm_values: np.array, campaign: Campaign) -> IJointBandit:
    bandit_name: str = args.joint_bandit_name
    ads_bandit = build_combinatorial_bandit(bandit_name=args.ads_bandit_name,
                                            campaign=campaign,
                                            init_std=args.init_std,
                                            args=args)
    price_bandit_class, price_bandit_kwargs = get_bandit_class_and_kwargs(
        bandit_name=args.pricing_bandit_name,
        n_arms=len(arm_values),
        arm_values=arm_values,
        args=args)
    price_bandit_list = [
        price_bandit_class(**price_bandit_kwargs)
        for _ in range(campaign.get_n_sub_campaigns())
    ]

    ad_value_strategy = ExpectationAdValueStrategy(np.max(arm_values)) \
        if bandit_name.find("Exp") >= 0 else QuantileAdValueStrategy(np.max(arm_values), args.min_std_q)
    is_learn_visits = True if bandit_name[-1] == 'V' else False

    if bandit_name in ["JBExp", "JBExpV", "JBQV", "JBQ"]:
        bandit = JointBanditDiscriminatory(ads_learner=ads_bandit,
                                           price_learner=price_bandit_list,
                                           campaign=campaign,
                                           ad_value_strategy=ad_value_strategy,
                                           is_learn_visits=is_learn_visits)
    elif bandit_name in ["JBIExpV", "JBIQV"]:
        bandit = JointBanditDiscriminatoryImproved(
            ads_learner=ads_bandit,
            price_learner=price_bandit_list,
            campaign=campaign,
            ad_value_strategy=ad_value_strategy)
    elif bandit_name in ["JBBQ", "JBBExp"]:
        model_list: List[DiscreteRegressor] = [
            DiscreteGPRegressor(list(campaign.get_budgets()),
                                args.init_std,
                                args.alpha,
                                args.n_restart_opt,
                                normalized=True)
            for _ in range(campaign.get_n_sub_campaigns())
        ]
        bandit = JointBanditBalanced(campaign=campaign,
                                     arm_values=arm_values,
                                     price_learner_class=price_bandit_class,
                                     price_learner_kwargs=price_bandit_kwargs,
                                     number_of_visit_model_list=model_list,
                                     ad_value_strategy=ad_value_strategy)
    elif bandit_name == "JBFQ":
        assert args.daily_price, "This joint bandit requires to run in a daily manner"

        model_list: List[DiscreteRegressor] = [
            DiscreteGPRegressor(list(campaign.get_budgets()),
                                args.init_std,
                                args.alpha,
                                args.n_restart_opt,
                                normalized=True)
            for _ in range(campaign.get_n_sub_campaigns())
        ]
        bandit = JointBanditFixedDailyPriceQuantile(
            campaign=campaign,
            number_of_visit_model_list=model_list,
            min_std=args.min_std_q,
            arm_profit=arm_values,
            n_arms_profit=len(arm_values))
    elif bandit_name == "JBFTS":
        assert args.daily_price, "This joint bandit requires to run in a daily manner"

        model_list: List[DiscreteRegressor] = [
            DiscreteGPRegressor(list(campaign.get_budgets()),
                                args.init_std,
                                args.alpha,
                                args.n_restart_opt,
                                normalized=True)
            for _ in range(campaign.get_n_sub_campaigns())
        ]
        bandit = JointBanditFixedDailyPriceTS(
            campaign=campaign,
            number_of_visit_model_list=model_list,
            arm_profit=arm_values,
            n_arms_profit=len(arm_values))
    else:
        raise argparse.ArgumentParser(
            "The name of the bandit to be used is not in the available ones")

    return bandit
Exemplo n.º 16
0
std_df.rename(columns={n_bandit: "day"}, inplace=True)

total_df = mean_df.merge(std_df, left_on="day", right_on="day")
total_df.to_csv("{}instant_reward.csv".format(folder_path_with_date),
                index=False)

# Something
mean_scenario: Scenario = EnvironmentManager.load_scenario(
    SCENARIO_NAME, get_mean_function=True)
click_function_list: List[IStochasticFunction] = mean_scenario.get_phases(
)[0].get_n_clicks_function()

# Optimal point computation
campaign = Campaign(n_sub_campaigns=mean_scenario.get_n_subcampaigns(),
                    cum_budget=BUDGET,
                    n_arms=N_ARMS_ADV)
for i in range(campaign.get_n_sub_campaigns()):
    sub_campaign_values = [
        click_function_list[i].draw_sample(b)
        for b in np.linspace(0, BUDGET, N_ARMS_ADV)
    ]
    campaign.set_sub_campaign_values(i, sub_campaign_values)
max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign)

# # Compute regret
# if CSV_CUM_REGRET:
#     mean_regret_data = np.zeros(shape=(n_bandit + 1, n_days))
#     std_regret_data = np.zeros(shape=(n_bandit + 1, n_days))
#     mean_regret_data[-1] = np.arange(n_days) + 1
#     std_regret_data[-1] = np.arange(n_days) + 1
    def pull_budget(self) -> List[int]:
        max_best_clicks = 0
        max_best_budgets = None

        for arm_idx in range(len(self.arm_values)):
            subcampaign_values = self.campaign.get_sub_campaigns()
            arm_value_campaign = Campaign(self.campaign.get_n_sub_campaigns(),
                                          self.campaign.get_cum_budget(),
                                          len(self.campaign.get_budgets()))
            for sub_idx in range(len(subcampaign_values)):
                arm_value_campaign.set_sub_campaign_values(
                    sub_idx, subcampaign_values[sub_idx] *
                    self.value_per_clicks_per_price_idx[arm_idx][sub_idx])
            max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(
                self.campaign)

            if max_best_clicks < max_clicks:
                max_best_clicks = max_clicks
                max_best_budgets = best_budgets

        # Balance price learner with number of clicks distribution estimation
        budget_value_to_index = {
            value: i
            for i, value in enumerate(self.campaign.get_budgets())
        }
        estimated_clicks = np.array([
            self.campaign.get_sub_campaigns()[sub_idx,
                                              budget_value_to_index[budget]]
            for sub_idx, budget in enumerate(max_best_budgets)
        ])
        user_probabilities = estimated_clicks / np.sum(estimated_clicks)

        self.unique_price_learner: DiscreteBandit = self.price_bandit_class(
            **self.price_bandit_kwargs)

        for arm_idx in range(len(self.arm_values)):
            rewards_per_subcampaign = []
            for sub_idx in range(self.campaign.get_n_sub_campaigns()):
                rewards_per_subcampaign.append(
                    self.rewards_per_arm_per_user_class[sub_idx][arm_idx])
            rewards_len = np.array(
                [len(rewards) for rewards in rewards_per_subcampaign])

            solution = np.min(rewards_len / user_probabilities)
            balanced_rewards_len = np.array(np.floor(solution *
                                                     user_probabilities),
                                            dtype=int)

            # Clip values of balanced rewards length between 0 and original rewards length
            balanced_rewards_len = np.maximum(balanced_rewards_len, 0)
            balanced_rewards_len = np.minimum(balanced_rewards_len,
                                              rewards_len)

            for sub_idx in range(self.campaign.get_n_sub_campaigns()):
                sampled_rewards = np.random.choice(
                    rewards_per_subcampaign[sub_idx],
                    size=balanced_rewards_len[sub_idx],
                    replace=False)
                for reward in sampled_rewards:
                    self.unique_price_learner.update(arm_idx, reward)

        return [budget_value_to_index[budget] for budget in max_best_budgets]