示例#1
0
    def test_cb_explore_action_change(self):

        learner = VowpalArgsLearner("--cb_explore 3", VowpalMediatorMocked())
        learner.predict(None, [1, 2, 3])

        with self.assertRaises(Exception) as e:
            learner.predict(None, [4, 5, 6])

        self.assertTrue("`adf`" in str(e.exception))
示例#2
0
    def test_learn_cb(self):

        vw = VowpalMediatorMocked()
        learner = VowpalArgsLearner("--cb_explore", vw)

        learner.predict(None, ['yes', 'no'])
        learner.learn(None, 'no', .5, 0.2, ['yes', 'no'])

        self.assertIsInstance(vw._learn_calls[0], VowpalEaxmpleMock)

        self.assertEqual({'x': None}, vw._learn_calls[0].ns)
        self.assertEqual("2:-0.5:0.2", vw._learn_calls[0].label)
示例#3
0
    def test_learn_cb_adf(self):

        vw = VowpalMediatorMocked()
        learner = VowpalArgsLearner("--cb_explore_adf", vw)

        learner.predict(None, ['yes', 'no'])
        learner.learn(None, 'yes', 1, 0.2, ['yes', 'no'])

        self.assertEqual(2, len(vw._learn_calls[0]))

        self.assertEqual({'x': None}, vw._learn_calls[0][0].ns[0])
        self.assertEqual({'a': 'yes'}, vw._learn_calls[0][0].ns[1])
        self.assertEqual("1:-1:0.2", vw._learn_calls[0][0].label)

        self.assertEqual({'x': None}, vw._learn_calls[0][1].ns[0])
        self.assertEqual({'a': 'no'}, vw._learn_calls[0][1].ns[1])
        self.assertEqual(None, vw._learn_calls[0][1].label)
示例#4
0
    def test_flatten_tuples(self):

        vw = VowpalMediatorMocked()
        learner = VowpalArgsLearner("--cb_explore", vw)

        learner.predict([(0, 0, 1)], ['yes', 'no'])
        learner.learn({'l': (0, 0, 1), 'j': 1}, 'no', .5, 0.2, ['yes', 'no'])

        self.assertIsInstance(vw._learn_calls[0], VowpalEaxmpleMock)

        self.assertEqual({'x': [0, 0, 1]}, vw._predict_calls[0].ns)
        self.assertEqual(None, vw._predict_calls[0].label)

        self.assertEqual({'x': {
            'l_0': 0,
            'l_1': 0,
            'l_2': 1,
            'j': 1
        }}, vw._learn_calls[0].ns)
        self.assertEqual("2:-0.5:0.2", vw._learn_calls[0].label)
示例#5
0
    def test_cb_adf_learning(self):
        learner = VowpalArgsLearner()

        n_actions = 3
        n_features = 10
        n_examples = 2000

        rng = CobaRandom(11111)

        contexts = [rng.randoms(n_features) for _ in range(n_examples)]

        pre_learn_rewards = []
        for context in contexts[:int(.9 * n_examples)]:

            actions = [rng.randoms(n_features) for _ in range(n_actions)]
            rewards = [
                sum([a * c for a, c in zip(action, context)])
                for action in actions
            ]
            rewards = [int(r == max(rewards)) for r in rewards]

            pre_learn_rewards.append(
                rng.choice(rewards,
                           learner.predict(context, actions)[0]))

        for context in contexts[:int(.9 * n_examples)]:

            actions = [rng.randoms(n_features) for _ in range(n_actions)]
            rewards = [
                sum([a * c for a, c in zip(action, context)])
                for action in actions
            ]
            rewards = [int(r == max(rewards)) for r in rewards]

            probs, info = learner.predict(context, actions)
            choice = rng.choice(list(range(3)), probs)

            learner.learn(context, actions[choice], rewards[choice],
                          probs[choice], info)

        post_learn_rewards = []

        for context in contexts[int(.9 * n_examples):]:
            actions = [rng.randoms(n_features) for _ in range(n_actions)]
            rewards = [
                sum([a * c for a, c in zip(action, context)])
                for action in actions
            ]
            rewards = [int(r == max(rewards)) for r in rewards]

            post_learn_rewards.append(
                rng.choice(rewards,
                           learner.predict(context, actions)[0]))

        average_pre_learn_reward = sum(pre_learn_rewards) / len(
            pre_learn_rewards)
        average_post_learn_reward = sum(post_learn_rewards) / len(
            post_learn_rewards)

        self.assertAlmostEqual(.33, average_pre_learn_reward, places=2)
        self.assertAlmostEqual(.78, average_post_learn_reward, places=2)