def main(args):
    scenario = EnvironmentManager.load_scenario(args.scenario_name)
    env = PricingAdvertisingJointEnvironment(scenario)

    campaign = Campaign(scenario.get_n_subcampaigns(), args.cum_budget,
                        args.n_arms)
    bandit = build_combinatorial_bandit(bandit_name=args.bandit_name,
                                        campaign=campaign,
                                        init_std=args.init_std,
                                        args=args)
    budget_allocation = [0, 0, 0]

    for t in range(0, args.n_rounds):
        # Choose arm
        budget_allocation_indexes = bandit.pull_arm()
        budget_allocation = [
            int(campaign.get_budgets()[i]) for i in budget_allocation_indexes
        ]

        # Observe reward
        env.set_budget_allocation(budget_allocation=budget_allocation)
        env.next_day()
        rewards = env.get_daily_visits_per_sub_campaign()

        # Update bandit
        bandit.update(pulled_arm=budget_allocation_indexes,
                      observed_reward=rewards)

    return bandit.collected_rewards, budget_allocation
Пример #2
0
def main(args):
    scenario = EnvironmentManager.load_scenario(args.scenario_name)
    env = PricingAdvertisingJointEnvironment(scenario)
    env.set_budget_allocation([args.budget] * scenario.get_n_subcampaigns())

    prices = get_prices(args=args)
    arm_profit = prices - args.unit_cost
    bandit = build_discrete_bandit(bandit_name=args.bandit_name, n_arms=len(arm_profit),
                                   arm_values=arm_profit, args=args)

    iterate = not env.next_day()
    while iterate:
        iterate = not env.next_day()

    for _ in range(0, args.n_rounds):
        # Choose arm
        price_idx = bandit.pull_arm()

        # Observe reward
        env.next_user()
        reward, elapsed_day = env.round(price=prices[price_idx])
        reward = reward * arm_profit[price_idx]

        if elapsed_day:
            iterate = not env.next_day()
            while iterate:
                iterate = not env.next_day()

        # Update bandit
        bandit.update(pulled_arm=price_idx, reward=reward)

    return bandit.collected_rewards, env.get_day_breakpoints()
def main(args):
    # Retrieve scenario
    scenario = EnvironmentManager.load_scenario(args.scenario_name)

    # Retrieve bandit and basic information
    campaign = Campaign(scenario.get_n_subcampaigns(), args.cum_budget,
                        args.n_arms_ads)
    prices = get_prices(args=args)
    arm_profit = prices - args.unit_cost
    bandit = get_bandit(args=args, arm_values=arm_profit, campaign=campaign)

    # Create environment
    env = PricingAdvertisingJointEnvironment(scenario=scenario)

    # choose whether to pull one arm a day for each class, or to pull one arm for each user
    if args.daily_price:
        learn_per_day(bandit, env, campaign, prices, arm_profit,
                      scenario.get_n_subcampaigns())
    else:
        learn_per_user(bandit, env, campaign, prices, arm_profit)

    return bandit.get_daily_reward(), env.day_breakpoints
Пример #4
0
                   inplace=True)

mean_df.rename(columns={n_bandit: "day"}, inplace=True)

for bandit_idx, name in enumerate(BANDIT_NAME):
    std_df.rename(columns={bandit_idx: "mean_std_{}".format(name)},
                  inplace=True)

std_df.rename(columns={n_bandit: "day"}, inplace=True)

total_df = mean_df.merge(std_df, left_on="day", right_on="day")
total_df.to_csv("{}instant_reward.csv".format(folder_path_with_date),
                index=False)

# Something
mean_scenario: Scenario = EnvironmentManager.load_scenario(
    SCENARIO_NAME, get_mean_function=True)
click_function_list: List[IStochasticFunction] = mean_scenario.get_phases(
)[0].get_n_clicks_function()

# Optimal point computation
campaign = Campaign(n_sub_campaigns=mean_scenario.get_n_subcampaigns(),
                    cum_budget=BUDGET,
                    n_arms=N_ARMS_ADV)
for i in range(campaign.get_n_sub_campaigns()):
    sub_campaign_values = [
        click_function_list[i].draw_sample(b)
        for b in np.linspace(0, BUDGET, N_ARMS_ADV)
    ]
    campaign.set_sub_campaign_values(i, sub_campaign_values)
max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign)
        args.pricing_bandit_name))
    fd.write("Bandit algorithm for ads: {}\n".format(args.ads_bandit_name))
    fd.write("User prices: {}\n".format(get_prices(args=args)))
    fd.write("Cumulative budget: {}\n\n".format(args.cum_budget))

    fd.write("Bandit parameters \n")
    fd.write(
        "Min quantile standard deviation (For quantile bandits): {}\n".format(
            args.min_std_q))
    fd.write("Gamma parameter (EXP3) {}\n".format(args.gamma))
    fd.write("CRP upper bound {}\n".format(args.crp_upper_bound))
    fd.write("Alpha GP: {}\n".format(args.alpha))
    fd.write("Number of GP optimizer restarts: {}\n".format(
        args.n_restart_opt))
    fd.write("Initial standard deviation GP: {}\n".format(args.init_std))

    fd.close()

    # Plot instantaneous reward
    rewards = np.mean(total_rewards, axis=0)
    scenario = EnvironmentManager.load_scenario(args.scenario_name)
    env = PricingAdvertisingJointEnvironment(scenario)

    os.chdir(folder_path_with_date)

    plt.plot(rewards, 'g')
    plt.xlabel("t")
    plt.ylabel("Instantaneous Reward")
    plt.title(str(args.n_runs) + " Experiments")
    plt.savefig(fname="Reward.png", format="png")