Exemplo n.º 1
0
    def test_invalid_log_format(self):
        rng = np.random.RandomState(seed=7)
        with self.assertRaises(TypeError):
            Simulator(bandits=[("example",
                                MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)]
                                for _ in range(10)],
                      scaler=StandardScaler(),
                      test_size=0.4,
                      batch_size=0,
                      is_ordered=True,
                      seed=7,
                      log_format=7)

        with self.assertRaises(TypeError):
            Simulator(bandits=[("example",
                                MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)]
                                for _ in range(10)],
                      scaler=StandardScaler(),
                      test_size=0.4,
                      batch_size=0,
                      is_ordered=True,
                      seed=7,
                      log_format=None)
Exemplo n.º 2
0
    def test_no_neighbors_expectations(self):

        exp, mab = self.predict(arms=[1, 2, 3],
                                decisions=[1, 1, 1, 2, 2, 2],
                                rewards=[10, 10, 10, -10, -10, -10],
                                learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
                                neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions=25),
                                context_history=[[1, 1, 2, 3, 5], [1, 2, 1, 1, 1], [0, 0, 1, 0, 0],
                                                 [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0]],
                                contexts=[[0, -1, -2, -3, -5], [-1, -1, -1, -1, -1]],
                                seed=7,
                                num_run=1,
                                is_predict=False)

        for index, row in enumerate(exp):
            for key in row.keys():
                self.assertIs(np.nan, row[key])

        exp, mab = self.predict(arms=[1, 2, 3],
                                decisions=[1, 1, 1, 2, 2, 2],
                                rewards=[10, 10, 10, -10, -10, -10],
                                learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
                                neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions=25),
                                context_history=[[1, 1, 2, 3, 5], [1, 2, 1, 1, 1], [0, 0, 1, 0, 0],
                                                  [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0]],
                                contexts=[[0, -1, -2, -3, -5], [-1, -1, -1, -1, -1]],
                                seed=7,
                                num_run=1,
                                is_predict=False)

        for index, row in enumerate(exp):
            for key in row.keys():
                self.assertIs(np.nan, row[key])
Exemplo n.º 3
0
    def test_no_neighbors(self):

        arms, mab = self.predict(arms=[1, 2, 3, 4],
                                 decisions=[1, 1, 1, 2, 2, 2],
                                 rewards=[10, 10, 10, -10, -10, -10],
                                 learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
                                 neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions=20, n_tables=1),
                                 context_history=[[1, 1, 2, 3, 5], [1, 2, 1, 1, 1], [0, 0, 1, 0, 0],
                                                 [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0]],
                                 contexts=[[0, -1, -2, -3, -5], [-1, -1, -1, -1, -1], [0, -1, -2, -3, -5], [-1, -1, -1, -1, -1],
                                           [0, -1, -2, -3, -5]],
                                 seed=7,
                                 num_run=1,
                                 is_predict=True)

        self.assertListEqual(arms, [2, 3, 2, 3, 4])

        arms, mab = self.predict(arms=[1, 2, 3, 4],
                                 decisions=[1, 1, 1, 2, 2, 2],
                                 rewards=[10, 10, 10, -10, -10, -10],
                                 learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
                                 neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions=20, n_tables=1),
                                 context_history=[[1, 1, 2, 3, 5], [1, 2, 1, 1, 1], [0, 0, 1, 0, 0],
                                                 [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0]],
                                 contexts=[[0, -1, -2, -3, -5], [-1, -1, -1, -1, -1], [0, -1, -2, -3, -5], [-1, -1, -1, -1, -1],
                                           [0, -1, -2, -3, -5]],
                                 seed=12,
                                 num_run=1,
                                 is_predict=True)

        self.assertListEqual(arms, [3, 2, 4, 1, 4])
Exemplo n.º 4
0
    def test_no_neighbors_hash(self):
        contexts = [[0, -1, -2, -3, -5], [-1, -1, -1, -1, -1], [0, -1, -2, -3, -5], [-1, -1, -1, -1, -1],
                                           [0, -1, -2, -3, -5]]
        arms, mab = self.predict(arms=[1, 2, 3, 4],
                                 decisions=[1, 1, 1, 2, 2, 2],
                                 rewards=[10, 10, 10, -10, -10, -10],
                                 learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
                                 neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions=20, n_tables=1),
                                 context_history=[[1, 1, 2, 3, 5], [1, 2, 1, 1, 1], [0, 0, 1, 0, 0],
                                                  [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0]],
                                 contexts=contexts,
                                 seed=7,
                                 num_run=1,
                                 is_predict=True)

        hashes = mab._imp.get_context_hash(np.asarray(contexts), mab._imp.table_to_plane[0])
        for h in hashes:
            self.assertEqual(len(mab._imp.table_to_hash_to_index[0][h]), 0)

        arms, mab = self.predict(arms=[1, 2, 3, 4],
                                 decisions=[1, 1, 1, 2, 2, 2],
                                 rewards=[10, 10, 10, -10, -10, -10],
                                 learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
                                 neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions=20, n_tables=1),
                                 context_history=[[1, 1, 2, 3, 5], [1, 2, 1, 1, 1], [0, 0, 1, 0, 0],
                                                  [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0]],
                                 contexts=contexts,
                                 seed=12,
                                 num_run=1,
                                 is_predict=True)

        hashes = mab._imp.get_context_hash(np.asarray(contexts), mab._imp.table_to_plane[0])
        for h in hashes:
            self.assertEqual(len(mab._imp.table_to_hash_to_index[0][h]), 0)
Exemplo n.º 5
0
    def test_greedy1_k2_backend(self):
        rng = np.random.RandomState(seed=7)

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 2, 1, 2, 1, 2, 1, 2, 1, 2],
            rewards=[1, 1, 0, 0, 0, 0, 1, 1, 0, 0],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=1),
            neighborhood_policy=NeighborhoodPolicy.KNearest(2),
            context_history=[[rng.random_sample() for _ in range(5)]
                             for _ in range(10)],
            contexts=[[1, 1, 1, 1, 1] for _ in range(10)],
            seed=123456,
            num_run=1,
            is_predict=True,
            n_jobs=2,
            backend=None)

        self.assertListEqual(arms, [2, 1, 1, 3, 3, 1, 2, 2, 3, 3])

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 2, 1, 2, 1, 2, 1, 2, 1, 2],
            rewards=[1, 1, 0, 0, 0, 0, 1, 1, 0, 0],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=1),
            neighborhood_policy=NeighborhoodPolicy.KNearest(2),
            context_history=[[rng.random_sample() for _ in range(5)]
                             for _ in range(10)],
            contexts=[[1, 1, 1, 1, 1] for _ in range(10)],
            seed=123456,
            num_run=1,
            is_predict=True,
            n_jobs=2,
            backend='loky')

        self.assertListEqual(arms, [2, 1, 1, 3, 3, 1, 2, 2, 3, 3])

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 2, 1, 2, 1, 2, 1, 2, 1, 2],
            rewards=[1, 1, 0, 0, 0, 0, 1, 1, 0, 0],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=1),
            neighborhood_policy=NeighborhoodPolicy.KNearest(2),
            context_history=[[rng.random_sample() for _ in range(5)]
                             for _ in range(10)],
            contexts=[[1, 1, 1, 1, 1] for _ in range(10)],
            seed=123456,
            num_run=1,
            is_predict=True,
            n_jobs=2,
            backend='threading')

        self.assertListEqual(arms, [2, 1, 1, 3, 3, 1, 2, 2, 3, 3])
Exemplo n.º 6
0
    def test_greedy1_n3(self):
        rng = np.random.RandomState(seed=7)

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 2, 1, 2, 1, 2, 1, 2, 1, 2],
            rewards=[1, 1, 0, 0, 0, 0, 1, 1, 0, 0],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=1),
            neighborhood_policy=NeighborhoodPolicy.Clusters(3),
            context_history=[[rng.random_sample() for _ in range(5)]
                             for _ in range(10)],
            contexts=[[1, 1, 1, 1, 1] for _ in range(10)],
            seed=123456,
            num_run=1,
            is_predict=True,
            n_jobs=1)

        self.assertListEqual(arms, [2, 1, 1, 3, 3, 1, 2, 2, 3, 3])

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 2, 1, 2, 1, 2, 1, 2, 1, 2],
            rewards=[1, 1, 0, 0, 0, 0, 1, 1, 0, 0],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=1),
            neighborhood_policy=NeighborhoodPolicy.Clusters(3),
            context_history=[[rng.random_sample() for _ in range(5)]
                             for _ in range(10)],
            contexts=[[1, 1, 1, 1, 1] for _ in range(10)],
            seed=123456,
            num_run=1,
            is_predict=True,
            n_jobs=2)

        self.assertListEqual(arms, [2, 1, 1, 3, 3, 1, 2, 2, 3, 3])

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 2, 1, 2, 1, 2, 1, 2, 1, 2],
            rewards=[1, 1, 0, 0, 0, 0, 1, 1, 0, 0],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=1),
            neighborhood_policy=NeighborhoodPolicy.Clusters(3),
            context_history=[[rng.random_sample() for _ in range(5)]
                             for _ in range(10)],
            contexts=[[1, 1, 1, 1, 1] for _ in range(10)],
            seed=123456,
            num_run=1,
            is_predict=True,
            n_jobs=-1)

        self.assertListEqual(arms, [2, 1, 1, 3, 3, 1, 2, 2, 3, 3])
Exemplo n.º 7
0
    def test_greedy1_r2_backend(self):
        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 2, 1, 2, 1, 2, 1, 2, 1, 2],
            rewards=[1, 1, 0, 0, 0, 0, 1, 1, 0, 0],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=1),
            neighborhood_policy=NeighborhoodPolicy.Radius(2),
            context_history=[[0, 0, 0, 0, 0] for _ in range(10)],
            contexts=[[1, 1, 1, 1, 1] for _ in range(10)],
            seed=123456,
            num_run=1,
            is_predict=True,
            n_jobs=2,
            backend=None)

        self.assertListEqual(arms, [3, 3, 1, 1, 3, 1, 1, 2, 2, 3])

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 2, 1, 2, 1, 2, 1, 2, 1, 2],
            rewards=[1, 1, 0, 0, 0, 0, 1, 1, 0, 0],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=1),
            neighborhood_policy=NeighborhoodPolicy.Radius(2),
            context_history=[[0, 0, 0, 0, 0] for _ in range(10)],
            contexts=[[1, 1, 1, 1, 1] for _ in range(10)],
            seed=123456,
            num_run=1,
            is_predict=True,
            n_jobs=2,
            backend='loky')

        self.assertListEqual(arms, [3, 3, 1, 1, 3, 1, 1, 2, 2, 3])

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 2, 1, 2, 1, 2, 1, 2, 1, 2],
            rewards=[1, 1, 0, 0, 0, 0, 1, 1, 0, 0],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=1),
            neighborhood_policy=NeighborhoodPolicy.Radius(2),
            context_history=[[0, 0, 0, 0, 0] for _ in range(10)],
            contexts=[[1, 1, 1, 1, 1] for _ in range(10)],
            seed=123456,
            num_run=1,
            is_predict=True,
            n_jobs=2,
            backend='threading')

        self.assertListEqual(arms, [3, 3, 1, 1, 3, 1, 1, 2, 2, 3])
Exemplo n.º 8
0
 def test_invalid_no_context_policy(self):
     decisions = [1, 1, 1]
     rewards = [0, 0, 0]
     context_history = [[1, 1, 1], [1, 1, 1], [1, 1, 1]]
     mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0))
     with self.assertRaises(TypeError):
         mab.fit(decisions, rewards, context_history)
Exemplo n.º 9
0
 def test_invalid_no_context_history(self):
     decisions = [1, 1, 1]
     rewards = [0, 0, 0]
     mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0),
               NeighborhoodPolicy.Radius(2))
     with self.assertRaises(TypeError):
         mab.fit(decisions, rewards)
Exemplo n.º 10
0
    def test_partial_fit_greedy0_r2(self):

        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            neighborhood_policy=NeighborhoodPolicy.KNearest(2),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertListEqual(arms, [1, 1])
        self.assertEqual(len(mab._imp.decisions), 10)
        self.assertEqual(len(mab._imp.decisions), 10)
        self.assertEqual(len(mab._imp.rewards), 10)
        self.assertEqual(np.ndim(mab._imp.decisions), 1)

        decisions2 = [1, 2, 3]
        rewards2 = [1, 1, 1]
        context_history2 = [[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0]]
        mab.partial_fit(decisions2, rewards2, context_history2)

        self.assertEqual(len(mab._imp.decisions), 13)
        self.assertEqual(len(mab._imp.rewards), 13)
        self.assertEqual(len(mab._imp.contexts), 13)
        self.assertEqual(np.ndim(mab._imp.decisions), 1)
Exemplo n.º 11
0
    def test_copy(self):
        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            neighborhood_policy=NeighborhoodPolicy.Clusters(2),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=True)

        clusters = deepcopy(mab._imp)
        self.assertIsNot(clusters, mab._imp)
        self.assertIsInstance(clusters.lp_list[0], _EpsilonGreedy)
        self.assertIsInstance(clusters.lp_list[1], _EpsilonGreedy)
        self.assertIsInstance(clusters.kmeans, KMeans)
        self.assertIsNot(clusters.kmeans, mab._imp.kmeans)
        self.assertIsNot(clusters.lp_list[0], mab._imp.lp_list[0])
        self.assertIsNot(clusters.lp_list[1], mab._imp.lp_list[1])
        self.assertEqual(clusters.lp_list[0].epsilon,
                         mab._imp.lp_list[0].epsilon)
        self.assertEqual(clusters.lp_list[1].epsilon,
                         mab._imp.lp_list[1].epsilon)
Exemplo n.º 12
0
 def test_rewards_inf_df(self):
     history = pd.DataFrame({
         'decision': [1, 1, 1, 2, 2, 2, 3, 3, 3],
         'reward': [0, 0, 0, 0, 0, 0, 1, 1, np.inf]
     })
     mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0))
     with self.assertRaises(TypeError):
         mab.fit(history['decision'], history['reward'])
Exemplo n.º 13
0
    def test_no_neighbors_expectations(self):

        exp, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 0, 0, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            neighborhood_policy=NeighborhoodPolicy.Radius(.1),
            context_history=[[10, 10, 10, 10, 10], [10, 10, 10, 10, 10],
                             [10, 10, 10, 10, 10], [10, 10, 10, 10, 10],
                             [10, 10, 10, 10, 10], [10, 10, 10, 10, 10],
                             [10, 10, 10, 10, 10], [10, 10, 10, 10, 10],
                             [10, 10, 10, 10, 10], [10, 10, 10, 10, 10]],
            contexts=[[0, 0, 0, 0, 0], [0, 0, 0, 0, 0],
                      [.01, .01, .01, .01, .01], [0, 0, 0, 0, 0],
                      [0, 0, 0, 0, 0]],
            seed=123456,
            num_run=1,
            is_predict=False)

        for index, row in enumerate(exp):
            for key in row.keys():
                self.assertIs(np.nan, row[key])

        exp, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 0, 0, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            neighborhood_policy=NeighborhoodPolicy.Radius(.1),
            context_history=[[10, 10, 10, 10, 10], [10, 10, 10, 10, 10],
                             [10, 10, 10, 10, 10], [10, 10, 10, 10, 10],
                             [10, 10, 10, 10, 10], [10, 10, 10, 10, 10],
                             [10, 10, 10, 10, 10], [10, 10, 10, 10, 10],
                             [10, 10, 10, 10, 10], [10, 10, 10, 10, 10]],
            contexts=[[0, 0, 0, 0, 0], [0, 0, 0, 0, 0],
                      [.01, .01, .01, .01, .01], [0, 0, 0, 0, 0],
                      [0, 0, 0, 0, 0]],
            seed=7,
            num_run=1,
            is_predict=False)

        for index, row in enumerate(exp):
            for key in row.keys():
                self.assertIs(np.nan, row[key])
Exemplo n.º 14
0
 def test_invalid_add_arm(self):
     mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0))
     with self.assertRaises(ValueError):
         mab.add_arm(None)
     with self.assertRaises(ValueError):
         mab.add_arm(np.nan)
     with self.assertRaises(ValueError):
         mab.add_arm(np.inf)
     with self.assertRaises(ValueError):
         mab.add_arm(3)
Exemplo n.º 15
0
 def test_invalid_jobs(self):
     with self.assertRaises(ValueError):
         self.predict(
             arms=[1, 2, 3],
             decisions=[1, 1, 1, 3, 2, 2, 3, 1, 3],
             rewards=[0, 1, 1, 0, 1, 0, 1, 1, 1],
             learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
             seed=123456,
             num_run=4,
             is_predict=True,
             n_jobs=0)
Exemplo n.º 16
0
    def test_simulator_contextual(self):
        size = 100

        decisions = [random.randint(0, 2) for _ in range(size)]
        rewards = [random.randint(0, 1000) for _ in range(size)]
        contexts = [[random.random() for _ in range(50)] for _ in range(size)]

        def binarize(decision, reward):

            if decision == 0:
                return reward <= 50
            else:
                return reward >= 220

        n_jobs = 1
        contextual_mabs = [('Random',
                            MAB([0, 1],
                                LearningPolicy.Random(),
                                NeighborhoodPolicy.Radius(10),
                                n_jobs=n_jobs)),
                           ('UCB1',
                            MAB([0, 1],
                                LearningPolicy.UCB1(1),
                                NeighborhoodPolicy.Radius(10),
                                n_jobs=n_jobs)),
                           ('ThompsonSampling',
                            MAB([0, 1],
                                LearningPolicy.ThompsonSampling(binarize),
                                NeighborhoodPolicy.Radius(10),
                                n_jobs=n_jobs)),
                           ('EpsilonGreedy',
                            MAB([0, 1],
                                LearningPolicy.EpsilonGreedy(epsilon=.15),
                                NeighborhoodPolicy.Radius(10),
                                n_jobs=n_jobs)),
                           ('Softmax',
                            MAB([0, 1],
                                LearningPolicy.Softmax(),
                                NeighborhoodPolicy.Radius(10),
                                n_jobs=n_jobs))]

        sim = Simulator(contextual_mabs,
                        decisions,
                        rewards,
                        contexts,
                        scaler=StandardScaler(),
                        test_size=0.5,
                        is_ordered=False,
                        batch_size=0,
                        seed=123456)
        sim.run()

        self.assertTrue(sim.bandit_to_confusion_matrices)
        self.assertTrue(sim.bandit_to_predictions)
Exemplo n.º 17
0
    def test_no_neighbors(self):

        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 0, 0, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            neighborhood_policy=NeighborhoodPolicy.Radius(.1),
            context_history=[[10, 10, 10, 10, 10], [10, 10, 10, 10, 10],
                             [10, 10, 10, 10, 10], [10, 10, 10, 10, 10],
                             [10, 10, 10, 10, 10], [10, 10, 10, 10, 10],
                             [10, 10, 10, 10, 10], [10, 10, 10, 10, 10],
                             [10, 10, 10, 10, 10], [10, 10, 10, 10, 10]],
            contexts=[[0, 0, 0, 0, 0], [0, 0, 0, 0, 0],
                      [.01, .01, .01, .01, .01], [0, 0, 0, 0, 0],
                      [0, 0, 0, 0, 0]],
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertListEqual(arms, [4, 3, 4, 1, 4])

        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 0, 0, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            neighborhood_policy=NeighborhoodPolicy.Radius(.1),
            context_history=[[10, 10, 10, 10, 10], [10, 10, 10, 10, 10],
                             [10, 10, 10, 10, 10], [10, 10, 10, 10, 10],
                             [10, 10, 10, 10, 10], [10, 10, 10, 10, 10],
                             [10, 10, 10, 10, 10], [10, 10, 10, 10, 10],
                             [10, 10, 10, 10, 10], [10, 10, 10, 10, 10]],
            contexts=[[0, 0, 0, 0, 0], [0, 0, 0, 0, 0],
                      [.01, .01, .01, .01, .01], [0, 0, 0, 0, 0],
                      [0, 0, 0, 0, 0]],
            seed=7,
            num_run=1,
            is_predict=True)

        self.assertListEqual(arms, [4, 2, 1, 4, 4])
Exemplo n.º 18
0
    def test_epsilon25_series(self):

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=pd.Series([1, 1, 1, 2, 2, 2, 3, 3, 3]),
            rewards=pd.Series([0, 0, 0, 0, 0, 0, 1, 1, 1]),
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.25),
            seed=7,
            num_run=4,
            is_predict=True)

        self.assertEqual(arms, [2, 3, 3, 3])
Exemplo n.º 19
0
    def test_epsilon_0_missing_decision(self):

        arm, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 1, 1, 3, 3, 3],
            rewards=[0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            seed=7,
            num_run=1,
            is_predict=True)

        self.assertEqual(arm, 3)
Exemplo n.º 20
0
    def test_greedy_t2(self):

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 1, 1, 3, 2, 2, 3, 1, 3],
            rewards=[0, 1, 1, 0, 1, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.5),
            seed=71,
            num_run=4,
            is_predict=True)

        self.assertEqual(arms, [1, 1, 1, 1])
Exemplo n.º 21
0
    def test_greedy_t8(self):

        arms, mab = self.predict(
            arms=['a', 'b', 'c'],
            decisions=['a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c', 'a'],
            rewards=[-1.25, 0.7, 12, 10, 12, 9.2, -1, -10, 4, 0],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.5),
            seed=9,
            num_run=4,
            is_predict=True)

        self.assertEqual(arms, ['c', 'a', 'b', 'a'])
Exemplo n.º 22
0
    def test_greedy_t4(self):

        arms, mab = self.predict(
            arms=[1, 2, 4],
            decisions=[1, 1, 4, 4, 2, 2, 1, 1, 4, 2, 1, 4, 1, 2, 4],
            rewards=[7, 9, 10, 20, 2, 5, 8, 15, 17, 11, 0, 5, 2, 9, 3],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.5),
            seed=23,
            num_run=4,
            is_predict=True)

        self.assertEqual(arms, [4, 4, 4, 1])
Exemplo n.º 23
0
    def test_epsilon_50(self):

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3],
            rewards=[0, 0, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.5),
            seed=7,
            num_run=5,
            is_predict=True)

        self.assertListEqual(arms, [2, 1, 1, 3, 3])
Exemplo n.º 24
0
    def test_predict_expectation(self):

        exps, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3],
            rewards=[10, 20, 30, -10, 0, 16, 2, 7, 3],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.5),
            seed=123456,
            num_run=1,
            is_predict=False)

        self.assertDictEqual(exps, {1: 20.0, 2: 2.0, 3: 4.0})
Exemplo n.º 25
0
    def test_ts_numpy(self):
        arm, mab = self.predict(
            arms=[1, 2],
            decisions=np.array([1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1]),
            rewards=np.array(
                [10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10]),
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.15),
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertEqual(arm, 1)
Exemplo n.º 26
0
    def test_unused_arm(self):

        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3],
            rewards=[0, 0, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.25),
            seed=23,
            num_run=5,
            is_predict=True)

        # not used arm (4) can still be picked up thanks to randomness
        self.assertEqual(arms, [3, 3, 3, 3, 4])
Exemplo n.º 27
0
 def test_invalid_2d_context_history_np(self):
     with self.assertRaises(TypeError):
         self.predict(
             arms=[1, 2, 3],
             decisions=[1, 1, 1],
             rewards=[0, 0, 0],
             learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.0),
             neighborhood_policy=NeighborhoodPolicy.KNearest(2),
             context_history=np.array([1, 1, 1]),
             contexts=np.array([[1, 1]]),
             seed=123456,
             num_run=1,
             is_predict=True)
Exemplo n.º 28
0
 def test_too_large_k(self):
     with self.assertRaises(ValueError):
         self.predict(
             arms=[1, 2, 3],
             decisions=[1, 1, 1],
             rewards=[0, 0, 0],
             learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.0),
             neighborhood_policy=NeighborhoodPolicy.KNearest(4),
             context_history=[[1, 1], [0, 0], [0, 0]],
             contexts=np.array([[1, 1]]),
             seed=123456,
             num_run=1,
             is_predict=True)
Exemplo n.º 29
0
    def test_mismatch_context(self):

        with self.assertRaises(ValueError):
            self.predict(
                arms=[1, 2, 3],
                decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3],
                rewards=[10, 20, 30, -10, 0, 16, 2, 7, 3],
                context_history=[[10], [10], [10], [10], [10], [10], [10],
                                 [10], [10]],
                learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.5),
                neighborhood_policy=NeighborhoodPolicy.Radius(),
                seed=123456,
                num_run=1,
                is_predict=False)
Exemplo n.º 30
0
    def test_greedy_t1(self):

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 1, 1, 3, 2, 2, 3, 1, 3],
            rewards=[0, 1, 1, 0, 1, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            seed=123456,
            num_run=4,
            is_predict=True,
            n_jobs=1)

        self.assertEqual(arms, [1, 1, 1, 1])

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 1, 1, 3, 2, 2, 3, 1, 3],
            rewards=[0, 1, 1, 0, 1, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            seed=123456,
            num_run=4,
            is_predict=True,
            n_jobs=2)

        self.assertEqual(arms, [1, 1, 1, 1])

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 1, 1, 3, 2, 2, 3, 1, 3],
            rewards=[0, 1, 1, 0, 1, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            seed=123456,
            num_run=4,
            is_predict=True,
            n_jobs=3)

        self.assertEqual(arms, [1, 1, 1, 1])