示例#1
0
    def test_add_action(self):
        policy = self.policy
        history_id, _ = policy.get_action(context=None, n_actions=2)
        new_actions = [Action() for i in range(2)]
        policy.add_action(new_actions)
        self.assertEqual(
            len(new_actions) + len(self.actions),
            policy._action_storage.count())
        policy.reward(history_id, {3: 1})
        model = policy._model_storage.get_model()
        for action in new_actions:
            self.assertEqual(model['total_action_reward'][action.id], 1.0)
            self.assertEqual(model['action_times'][action.id], 1)
            self.assertEqual(model['n_rounds'],
                             len(self.actions) + len(new_actions) + 1)

        history_id2, recommendations = policy.get_action(context=None,
                                                         n_actions=4)
        self.assertEqual(len(recommendations), 4)
        policy.reward(history_id2, {
            new_actions[0].id: 4,
            new_actions[1].id: 5
        })
        model = policy._model_storage.get_model()
        for action in new_actions:
            self.assertNotEqual(model['total_action_reward'][action.id], 1.0)
            self.assertEqual(model['action_times'][action.id], 2)
            self.assertEqual(model['n_rounds'],
                             len(self.actions) + len(new_actions) + 1 + 2)
示例#2
0
def get_data():
    streaming_batch = pd.read_csv('streaming_batch.csv',
                                  sep='\t',
                                  names=['user_id'],
                                  engine='c')
    user_feature = pd.read_csv('user_feature.csv',
                               sep='\t',
                               header=0,
                               index_col=0,
                               engine='c')
    actions_id = list(
        pd.read_csv('actions.csv', sep='\t', header=0, engine='c')['movie_id'])
    reward_list = pd.read_csv('reward_list.csv',
                              sep='\t',
                              header=0,
                              engine='c')
    action_context = pd.read_csv('action_context.csv',
                                 sep='\t',
                                 header=0,
                                 engine='c')

    tempactions = []
    for key in actions_id:
        action = Action(key)
        tempactions.append(action)
    actions = MemoryActionStorage()
    actions.add(tempactions)
    return streaming_batch, user_feature, actions, reward_list, action_context
示例#3
0
    def test_add_action(self):
        policy = self.policy
        context1 = {1: [1, 1], 2: [2, 2], 3: [3, 3]}
        history_id, _ = policy.get_action(context1, 2)
        new_actions = [Action() for i in range(2)]
        policy.add_action(new_actions)
        self.assertEqual(
            len(new_actions) + len(self.actions),
            policy._action_storage.count())
        policy.reward(history_id, {3: 1})
        model = policy._model_storage.get_model()
        for action in new_actions:
            self.assertTrue((model['A'][action.id] == np.identity(
                self.context_dimension)).all())

        context2 = {1: [1, 1], 2: [2, 2], 3: [3, 3], 4: [4, 4], 5: [5, 5]}
        history_id2, recommendations = policy.get_action(context2, 4)
        self.assertEqual(len(recommendations), 4)
        policy.reward(history_id2, {
            new_actions[0].id: 4,
            new_actions[1].id: 5
        })
        model = policy._model_storage.get_model()
        for action in new_actions:
            self.assertFalse((model['A'][action.id] == np.identity(2)).all())
    def test_add_action(self):
        policy = self.policy
        history_id, recommendations = policy.get_action(
            context=None, n_actions=2)
        new_actions = [Action() for i in range(2)]
        policy.add_action(new_actions)
        self.assertEqual(
            len(new_actions) + len(self.actions),
            policy._action_storage.count())
        policy.reward(history_id, {recommendations[0].action.id: 1.})
        model = policy._model_storage.get_model()
        for action in new_actions:
            self.assertEqual(model['w'][action.id], 1.0)

        history_id2, recommendations2 = policy.get_action(
            context=None, n_actions=-1)
        self.assertEqual(
            len(recommendations2),
            len(new_actions) + len(self.actions))
        policy.reward(history_id2, {
            new_actions[0].id: 4,
            new_actions[1].id: 5
        })
        model = policy._model_storage.get_model()
        for action in new_actions:
            self.assertGreater(model['w'][action.id], 1.0)
示例#5
0
def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])
    random_state = np.random.RandomState(0)

    # Parameter tuning
    tuning_region = np.arange(0.001, 1, 0.03)
    ctr_tuning = np.zeros(shape=len(tuning_region))
    context1, desired_actions1 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=0)
    for gamma_i, gamma in enumerate(tuning_region):
        policy = Exp3(MemoryHistoryStorage(),
                      MemoryModelStorage(),
                      action_storage,
                      gamma=gamma,
                      random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_tuning[gamma_i] = n_rounds - cum_regret[-1]
    ctr_tuning /= n_rounds
    gamma_opt = tuning_region[np.argmax(ctr_tuning)]
    simulation.plot_tuning_curve(tuning_region,
                                 ctr_tuning,
                                 label="gamma changes")

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=1)
    policy = Exp3(MemoryHistoryStorage(),
                  MemoryModelStorage(),
                  action_storage,
                  gamma=gamma_opt,
                  random_state=random_state)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
示例#6
0
def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])

    # Parameter tuning
    tuning_region = np.arange(0, 3, 0.05)
    ctr_tuning = np.empty(shape=len(tuning_region))
    context1, desired_actions1 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=0)
    for alpha_i, alpha in enumerate(tuning_region):
        policy = LinUCB(history_storage=MemoryHistoryStorage(),
                        model_storage=MemoryModelStorage(),
                        action_storage=action_storage,
                        context_dimension=context_dimension,
                        alpha=alpha)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_tuning[alpha_i] = n_rounds - cum_regret[-1]
    ctr_tuning /= n_rounds
    alpha_opt = tuning_region[np.argmax(ctr_tuning)]
    simulation.plot_tuning_curve(tuning_region,
                                 ctr_tuning,
                                 label="alpha changes")

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=1)
    policy = LinUCB(history_storage=MemoryHistoryStorage(),
                    model_storage=MemoryModelStorage(),
                    action_storage=action_storage,
                    context_dimension=context_dimension,
                    alpha=alpha_opt)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
示例#7
0
    def test_add_action(self):
        policy = self.policy
        context1 = {1: [1, 1], 2: [2, 2], 3: [3, 3]}
        history_id, _ = policy.get_action(context1, 2)
        new_actions = [Action() for i in range(2)]
        policy.add_action(new_actions)
        self.assertEqual(
            len(new_actions) + len(self.actions),
            policy._action_storage.count())
        policy.reward(history_id, {3: 1})

        context2 = {1: [1, 1], 2: [2, 2], 3: [3, 3], 4: [4, 4], 5: [5, 5]}
        history_id2, recommendations = policy.get_action(context2, 4)
        self.assertEqual(len(recommendations), 4)
        policy.reward(history_id2, {
            new_actions[0].id: 4,
            new_actions[1].id: 5
        })
示例#8
0
def main():
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])

    # Regret Analysis
    n_rounds = 10000
    context, desired_actions = simulation.simulate_data(
        n_rounds, context_dimension, action_storage, random_state=1)
    policy = UCB1(MemoryHistoryStorage(), MemoryModelStorage(),
                  action_storage)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context[t])
        action_id = recommendation.action.id
        if desired_actions[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
示例#9
0
def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])
    random_state = np.random.RandomState(0)

    # Parameter tuning
    tuning_region = np.arange(0.01, 0.99, 0.1)
    ctr_delta = np.zeros(shape=len(tuning_region))
    ctr_r = np.zeros(shape=len(tuning_region))
    ctr_epsilon = np.zeros(shape=len(tuning_region))

    context1, desired_actions1 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=0)

    for param_i, param in enumerate(tuning_region):
        policy = LinThompSamp(MemoryHistoryStorage(),
                              MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=param,
                              R=0.01,
                              epsilon=0.5,
                              random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_delta[param_i] = n_rounds - cum_regret[-1]

        policy = LinThompSamp(MemoryHistoryStorage(),
                              MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=0.5,
                              R=param,
                              epsilon=0.5,
                              random_state=random_state)

        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_r[param_i] = n_rounds - cum_regret[-1]

        policy = LinThompSamp(MemoryHistoryStorage(),
                              MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=0.5,
                              R=0.01,
                              epsilon=param,
                              random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_epsilon[param_i] = n_rounds - cum_regret[-1]

    ctr_delta /= n_rounds
    ctr_r /= n_rounds
    ctr_epsilon /= n_rounds

    delta_opt = tuning_region[np.argmax(ctr_delta)]
    r_opt = tuning_region[np.argmax(ctr_r)]
    epsilon_opt = tuning_region[np.argmax(ctr_epsilon)]

    # Plot the parameter tuning result
    plt.plot(np.arange(0.01, 0.99, 0.1),
             ctr_delta,
             'ro-',
             label="delta changes, R = 0.01, eps = 0.5")
    plt.plot(np.arange(0.01, 0.99, 0.1),
             ctr_r,
             'gs-',
             label="delta = 0.5, R = changes, eps = 0.5")
    plt.plot(np.arange(0.01, 0.99, 0.1),
             ctr_epsilon,
             'b^-',
             label="delta = 0.5, R = 0.01, eps = changes")
    plt.xlabel('parameter value')
    plt.ylabel('CTR')
    plt.legend(bbox_to_anchor=(1., 0.7))
    plt.ylim([0, 1])
    plt.title("Parameter Tunning Curve - LinThompSamp")
    plt.show()

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=1)
    policy = LinThompSamp(MemoryHistoryStorage(),
                          MemoryModelStorage(),
                          action_storage,
                          context_dimension=context_dimension,
                          delta=delta_opt,
                          R=r_opt,
                          epsilon=epsilon_opt,
                          random_state=random_state)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
def main():  # pylint: disable=too-many-locals
    n_rounds = 1000
    context_dimension = 5
    actions = [Action(i) for i in range(5)]

    action_ids = [0, 1, 2, 3, 4]
    context1, desired_actions1 = simulation.simulate_data(3000,
                                                          context_dimension,
                                                          actions,
                                                          "Exp4P",
                                                          random_state=0)
    experts = train_expert(context1, desired_actions1)

    # Parameter tuning
    tuning_region = np.arange(0.01, 1, 0.05)
    ctr_tuning = np.empty(len(tuning_region))
    advice1 = get_advice(context1, action_ids, experts)

    for delta_i, delta in enumerate(tuning_region):
        historystorage = MemoryHistoryStorage()
        modelstorage = MemoryModelStorage()
        policy = Exp4P(actions,
                       historystorage,
                       modelstorage,
                       delta=delta,
                       p_min=None)
        cum_regret = simulation.evaluate_policy(policy, advice1,
                                                desired_actions1)
        ctr_tuning[delta_i] = n_rounds - cum_regret[-1]
    ctr_tuning /= n_rounds
    delta_opt = tuning_region[np.argmax(ctr_tuning)]
    simulation.plot_tuning_curve(tuning_region,
                                 ctr_tuning,
                                 label="delta changes")

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          actions,
                                                          "Exp4P",
                                                          random_state=1)
    advice2 = get_advice(context2, action_ids, experts)
    historystorage = MemoryHistoryStorage()
    modelstorage = MemoryModelStorage()
    policy = Exp4P(actions,
                   historystorage,
                   modelstorage,
                   delta=delta_opt,
                   p_min=None)

    for t in range(n_rounds):
        history_id, action = policy.get_action(advice2[t], 1)
        action_id = action[0]['action'].action_id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
示例#11
0
 def test_add_action_from_empty_change_storage(self):
     policy = self.policy_with_empty_action_storage
     new_actions = [Action() for i in range(2)]
     policy.add_action(new_actions)
     self.assertEqual(set(a.id for a in new_actions),
                      set(policy._action_storage.iterids()))
示例#12
0
 def test_add_action_change_storage(self):
     policy = self.policy
     new_actions = [Action() for i in range(2)]
     policy.add_action(new_actions)
     self.assertEqual(set(a.id for a in self.actions + new_actions),
                      set(self.action_storage.iterids()))
示例#13
0
 def setUp(self):  # pylint: disable=invalid-name
     self.model_storage = MemoryModelStorage()
     self.history_storage = MemoryHistoryStorage()
     self.action_storage = MemoryActionStorage()
     self.actions = [Action(i + 1) for i in range(3)]
     self.action_storage.add(self.actions)