Пример #1
0
def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])
    random_state = np.random.RandomState(0)

    # Parameter tuning
    tuning_region = np.arange(0.001, 1, 0.03)
    ctr_tuning = np.zeros(shape=len(tuning_region))
    context1, desired_actions1 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=0)
    for gamma_i, gamma in enumerate(tuning_region):
        policy = Exp3(MemoryHistoryStorage(),
                      MemoryModelStorage(),
                      action_storage,
                      gamma=gamma,
                      random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_tuning[gamma_i] = n_rounds - cum_regret[-1]
    ctr_tuning /= n_rounds
    gamma_opt = tuning_region[np.argmax(ctr_tuning)]
    simulation.plot_tuning_curve(tuning_region,
                                 ctr_tuning,
                                 label="gamma changes")

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=1)
    policy = Exp3(MemoryHistoryStorage(),
                  MemoryModelStorage(),
                  action_storage,
                  gamma=gamma_opt,
                  random_state=random_state)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
Пример #2
0
def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])

    # Parameter tuning
    tuning_region = np.arange(0, 3, 0.05)
    ctr_tuning = np.empty(shape=len(tuning_region))
    context1, desired_actions1 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=0)
    for alpha_i, alpha in enumerate(tuning_region):
        policy = LinUCB(history_storage=MemoryHistoryStorage(),
                        model_storage=MemoryModelStorage(),
                        action_storage=action_storage,
                        context_dimension=context_dimension,
                        alpha=alpha)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_tuning[alpha_i] = n_rounds - cum_regret[-1]
    ctr_tuning /= n_rounds
    alpha_opt = tuning_region[np.argmax(ctr_tuning)]
    simulation.plot_tuning_curve(tuning_region,
                                 ctr_tuning,
                                 label="alpha changes")

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=1)
    policy = LinUCB(history_storage=MemoryHistoryStorage(),
                    model_storage=MemoryModelStorage(),
                    action_storage=action_storage,
                    context_dimension=context_dimension,
                    alpha=alpha_opt)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
Пример #3
0
def main():  # pylint: disable=too-many-locals
    n_rounds = 1000
    context_dimension = 5
    actions = [Action(i) for i in range(5)]

    action_ids = [0, 1, 2, 3, 4]
    context1, desired_actions1 = simulation.simulate_data(
        3000, context_dimension, actions, "Exp4P", random_state=0)
    experts = train_expert(context1, desired_actions1)

    # Parameter tuning
    tuning_region = np.arange(0.01, 1, 0.05)
    ctr_tuning = np.empty(len(tuning_region))
    advice1 = get_advice(context1, action_ids, experts)

    for delta_i, delta in enumerate(tuning_region):
        historystorage = MemoryHistoryStorage()
        modelstorage = MemoryModelStorage()
        policy = Exp4P(actions, historystorage, modelstorage,
                       delta=delta, p_min=None)
        cum_regret = simulation.evaluate_policy(policy, advice1,
                                                desired_actions1)
        ctr_tuning[delta_i] = n_rounds - cum_regret[-1]
    ctr_tuning /= n_rounds
    delta_opt = tuning_region[np.argmax(ctr_tuning)]
    simulation.plot_tuning_curve(tuning_region, ctr_tuning,
                                 label="delta changes")

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(
        n_rounds, context_dimension, actions, "Exp4P", random_state=1)
    advice2 = get_advice(context2, action_ids, experts)
    historystorage = MemoryHistoryStorage()
    modelstorage = MemoryModelStorage()
    policy = Exp4P(actions, historystorage, modelstorage,
                   delta=delta_opt, p_min=None)

    for t in range(n_rounds):
        history_id, action = policy.get_action(advice2[t], 1)
        action_id = action[0]['action'].action_id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
Пример #4
0
def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])
    random_state = np.random.RandomState(0)

    # Parameter tuning
    tuning_region = np.arange(0.001, 1, 0.03)
    ctr_tuning = np.zeros(shape=len(tuning_region))
    context1, desired_actions1 = simulation.simulate_data(
        n_rounds, context_dimension, action_storage, random_state=0)
    for gamma_i, gamma in enumerate(tuning_region):
        policy = Exp3(MemoryHistoryStorage(), MemoryModelStorage(),
                      action_storage, gamma=gamma, random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_tuning[gamma_i] = n_rounds - cum_regret[-1]
    ctr_tuning /= n_rounds
    gamma_opt = tuning_region[np.argmax(ctr_tuning)]
    simulation.plot_tuning_curve(tuning_region, ctr_tuning,
                                 label="gamma changes")

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(
        n_rounds, context_dimension, action_storage, random_state=1)
    policy = Exp3(MemoryHistoryStorage(), MemoryModelStorage(),
                  action_storage, gamma=gamma_opt, random_state=random_state)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
Пример #5
0
def main():
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])

    # Regret Analysis
    n_rounds = 10000
    context, desired_actions = simulation.simulate_data(
        n_rounds, context_dimension, action_storage, random_state=1)
    policy = UCB1(MemoryHistoryStorage(), MemoryModelStorage(),
                  action_storage)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context[t])
        action_id = recommendation.action.id
        if desired_actions[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
Пример #6
0
def main():  # pylint: disable=too-many-locals
    n_rounds = 1000
    context_dimension = 5
    actions = [Action(i) for i in range(5)]

    action_ids = [0, 1, 2, 3, 4]
    context1, desired_actions1 = simulation.simulate_data(3000,
                                                          context_dimension,
                                                          actions,
                                                          "Exp4P",
                                                          random_state=0)
    experts = train_expert(context1, desired_actions1)

    # Parameter tuning
    tuning_region = np.arange(0.01, 1, 0.05)
    ctr_tuning = np.empty(len(tuning_region))
    advice1 = get_advice(context1, action_ids, experts)

    for delta_i, delta in enumerate(tuning_region):
        historystorage = MemoryHistoryStorage()
        modelstorage = MemoryModelStorage()
        policy = Exp4P(actions,
                       historystorage,
                       modelstorage,
                       delta=delta,
                       p_min=None)
        cum_regret = simulation.evaluate_policy(policy, advice1,
                                                desired_actions1)
        ctr_tuning[delta_i] = n_rounds - cum_regret[-1]
    ctr_tuning /= n_rounds
    delta_opt = tuning_region[np.argmax(ctr_tuning)]
    simulation.plot_tuning_curve(tuning_region,
                                 ctr_tuning,
                                 label="delta changes")

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          actions,
                                                          "Exp4P",
                                                          random_state=1)
    advice2 = get_advice(context2, action_ids, experts)
    historystorage = MemoryHistoryStorage()
    modelstorage = MemoryModelStorage()
    policy = Exp4P(actions,
                   historystorage,
                   modelstorage,
                   delta=delta_opt,
                   p_min=None)

    for t in range(n_rounds):
        history_id, action = policy.get_action(advice2[t], 1)
        action_id = action[0]['action'].action_id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
Пример #7
0
def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])
    random_state = np.random.RandomState(0)

    # Parameter tuning
    tuning_region = np.arange(0.01, 0.99, 0.1)
    ctr_delta = np.zeros(shape=len(tuning_region))
    ctr_r = np.zeros(shape=len(tuning_region))
    ctr_epsilon = np.zeros(shape=len(tuning_region))

    context1, desired_actions1 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=0)

    for param_i, param in enumerate(tuning_region):
        policy = LinThompSamp(MemoryHistoryStorage(),
                              MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=param,
                              R=0.01,
                              epsilon=0.5,
                              random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_delta[param_i] = n_rounds - cum_regret[-1]

        policy = LinThompSamp(MemoryHistoryStorage(),
                              MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=0.5,
                              R=param,
                              epsilon=0.5,
                              random_state=random_state)

        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_r[param_i] = n_rounds - cum_regret[-1]

        policy = LinThompSamp(MemoryHistoryStorage(),
                              MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=0.5,
                              R=0.01,
                              epsilon=param,
                              random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_epsilon[param_i] = n_rounds - cum_regret[-1]

    ctr_delta /= n_rounds
    ctr_r /= n_rounds
    ctr_epsilon /= n_rounds

    delta_opt = tuning_region[np.argmax(ctr_delta)]
    r_opt = tuning_region[np.argmax(ctr_r)]
    epsilon_opt = tuning_region[np.argmax(ctr_epsilon)]

    # Plot the parameter tuning result
    plt.plot(np.arange(0.01, 0.99, 0.1),
             ctr_delta,
             'ro-',
             label="delta changes, R = 0.01, eps = 0.5")
    plt.plot(np.arange(0.01, 0.99, 0.1),
             ctr_r,
             'gs-',
             label="delta = 0.5, R = changes, eps = 0.5")
    plt.plot(np.arange(0.01, 0.99, 0.1),
             ctr_epsilon,
             'b^-',
             label="delta = 0.5, R = 0.01, eps = changes")
    plt.xlabel('parameter value')
    plt.ylabel('CTR')
    plt.legend(bbox_to_anchor=(1., 0.7))
    plt.ylim([0, 1])
    plt.title("Parameter Tunning Curve - LinThompSamp")
    plt.show()

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=1)
    policy = LinThompSamp(MemoryHistoryStorage(),
                          MemoryModelStorage(),
                          action_storage,
                          context_dimension=context_dimension,
                          delta=delta_opt,
                          R=r_opt,
                          epsilon=epsilon_opt,
                          random_state=random_state)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
Пример #8
0
def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])
    random_state = np.random.RandomState(0)

    # Parameter tuning
    tuning_region = np.arange(0.01, 0.99, 0.1)
    ctr_delta = np.zeros(shape=len(tuning_region))
    ctr_r = np.zeros(shape=len(tuning_region))
    ctr_epsilon = np.zeros(shape=len(tuning_region))

    context1, desired_actions1 = simulation.simulate_data(
        n_rounds, context_dimension, action_storage, random_state=0)

    for param_i, param in enumerate(tuning_region):
        policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=param, R=0.01, epsilon=0.5,
                              random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_delta[param_i] = n_rounds - cum_regret[-1]

        policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=0.5, R=param, epsilon=0.5,
                              random_state=random_state)

        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_r[param_i] = n_rounds - cum_regret[-1]

        policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=0.5, R=0.01, epsilon=param,
                              random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_epsilon[param_i] = n_rounds - cum_regret[-1]

    ctr_delta /= n_rounds
    ctr_r /= n_rounds
    ctr_epsilon /= n_rounds

    delta_opt = tuning_region[np.argmax(ctr_delta)]
    r_opt = tuning_region[np.argmax(ctr_r)]
    epsilon_opt = tuning_region[np.argmax(ctr_epsilon)]

    # Plot the parameter tuning result
    plt.plot(np.arange(0.01, 0.99, 0.1), ctr_delta, 'ro-',
             label="delta changes, R = 0.01, eps = 0.5")
    plt.plot(np.arange(0.01, 0.99, 0.1), ctr_r, 'gs-',
             label="delta = 0.5, R = changes, eps = 0.5")
    plt.plot(np.arange(0.01, 0.99, 0.1), ctr_epsilon, 'b^-',
             label="delta = 0.5, R = 0.01, eps = changes")
    plt.xlabel('parameter value')
    plt.ylabel('CTR')
    plt.legend(bbox_to_anchor=(1., 0.7))
    plt.ylim([0, 1])
    plt.title("Parameter Tunning Curve - LinThompSamp")
    plt.show()

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(
        n_rounds, context_dimension, action_storage, random_state=1)
    policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(),
                          action_storage,
                          context_dimension=context_dimension,
                          delta=delta_opt, R=r_opt, epsilon=epsilon_opt,
                          random_state=random_state)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()