Пример #1
0
    def test_invalid_log_format(self):
        rng = np.random.RandomState(seed=7)
        with self.assertRaises(TypeError):
            Simulator(bandits=[("example",
                                MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)]
                                for _ in range(10)],
                      scaler=StandardScaler(),
                      test_size=0.4,
                      batch_size=0,
                      is_ordered=True,
                      seed=7,
                      log_format=7)

        with self.assertRaises(TypeError):
            Simulator(bandits=[("example",
                                MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)]
                                for _ in range(10)],
                      scaler=StandardScaler(),
                      test_size=0.4,
                      batch_size=0,
                      is_ordered=True,
                      seed=7,
                      log_format=None)
Пример #2
0
    def test_simulator_hyper_parameter(self):
        size = 100

        decisions = [random.randint(0, 2) for _ in range(size)]
        rewards = [random.randint(0, 1000) for _ in range(size)]
        contexts = [[random.random() for _ in range(50)] for _ in range(size)]

        n_jobs = 1
        hyper_parameter_tuning = []
        for radius in range(6, 10):
            hyper_parameter_tuning.append(
                ('Radius' + str(radius),
                 MAB([0, 1],
                     LearningPolicy.UCB1(1),
                     NeighborhoodPolicy.Radius(radius),
                     n_jobs=n_jobs)))

        sim = Simulator(hyper_parameter_tuning,
                        decisions,
                        rewards,
                        contexts,
                        scaler=StandardScaler(),
                        test_size=0.5,
                        is_ordered=False,
                        batch_size=0,
                        seed=123456,
                        is_quick=True)
        sim.run()

        self.assertTrue(sim.bandit_to_confusion_matrices)
        self.assertTrue(sim.bandit_to_predictions)
Пример #3
0
    def test_simulator_mixed(self):
        size = 100

        decisions = [random.randint(0, 2) for _ in range(size)]
        rewards = [random.randint(0, 1000) for _ in range(size)]
        contexts = [[random.random() for _ in range(50)] for _ in range(size)]

        n_jobs = 1
        mixed = [('RandomRadius',
                  MAB([0, 1],
                      LearningPolicy.Random(),
                      NeighborhoodPolicy.Radius(10),
                      n_jobs=n_jobs)),
                 ('Random', MAB([0, 1], LearningPolicy.Random(),
                                n_jobs=n_jobs))]

        sim = Simulator(mixed,
                        decisions,
                        rewards,
                        contexts,
                        scaler=StandardScaler(),
                        test_size=0.5,
                        is_ordered=False,
                        batch_size=0,
                        seed=123456)
        sim.run()

        self.assertTrue(sim.bandit_to_confusion_matrices)
        self.assertTrue(sim.bandit_to_predictions)
Пример #4
0
    def test_simulator_contextual(self):
        size = 100

        decisions = [random.randint(0, 2) for _ in range(size)]
        rewards = [random.randint(0, 1000) for _ in range(size)]
        contexts = [[random.random() for _ in range(50)] for _ in range(size)]

        def binarize(decision, reward):

            if decision == 0:
                return reward <= 50
            else:
                return reward >= 220

        n_jobs = 1
        contextual_mabs = [('Random',
                            MAB([0, 1],
                                LearningPolicy.Random(),
                                NeighborhoodPolicy.Radius(10),
                                n_jobs=n_jobs)),
                           ('UCB1',
                            MAB([0, 1],
                                LearningPolicy.UCB1(1),
                                NeighborhoodPolicy.Radius(10),
                                n_jobs=n_jobs)),
                           ('ThompsonSampling',
                            MAB([0, 1],
                                LearningPolicy.ThompsonSampling(binarize),
                                NeighborhoodPolicy.Radius(10),
                                n_jobs=n_jobs)),
                           ('EpsilonGreedy',
                            MAB([0, 1],
                                LearningPolicy.EpsilonGreedy(epsilon=.15),
                                NeighborhoodPolicy.Radius(10),
                                n_jobs=n_jobs)),
                           ('Softmax',
                            MAB([0, 1],
                                LearningPolicy.Softmax(),
                                NeighborhoodPolicy.Radius(10),
                                n_jobs=n_jobs))]

        sim = Simulator(contextual_mabs,
                        decisions,
                        rewards,
                        contexts,
                        scaler=StandardScaler(),
                        test_size=0.5,
                        is_ordered=False,
                        batch_size=0,
                        seed=123456)
        sim.run()

        self.assertTrue(sim.bandit_to_confusion_matrices)
        self.assertTrue(sim.bandit_to_predictions)
Пример #5
0
    def test_invalid_simulator_stats_scope(self):
        rng = np.random.RandomState(seed=7)
        decisions = np.array([rng.randint(0, 2) for _ in range(10)])
        rewards = np.array([rng.randint(0, 100) for _ in range(10)])

        sim = Simulator(bandits=[("example",
                                  MAB([0, 1],
                                      LearningPolicy.EpsilonGreedy()))],
                        decisions=decisions,
                        rewards=rewards,
                        contexts=[[rng.rand() for _ in range(5)]
                                  for _ in range(10)],
                        scaler=StandardScaler(),
                        test_size=0.4,
                        batch_size=0,
                        is_ordered=True,
                        seed=7)

        with self.assertRaises(ValueError):
            sim._set_stats('validation', decisions, rewards)
Пример #6
0
    def test_invalid_get_arm_stats(self):
        rng = np.random.RandomState(seed=9)
        decisions = np.array([rng.randint(0, 2) for _ in range(5)])
        rewards = np.array([rng.randint(0, 100) for _ in range(5)])
        new_rewards = np.array(['h', 'e', 'l', 'l', 'o'])

        sim = Simulator(bandits=[("example",
                                  MAB([0, 1],
                                      LearningPolicy.EpsilonGreedy()))],
                        decisions=decisions,
                        rewards=rewards,
                        contexts=[[rng.rand() for _ in range(5)]
                                  for _ in range(5)],
                        scaler=StandardScaler(),
                        test_size=0.4,
                        batch_size=0,
                        is_ordered=True,
                        seed=7)
        with self.assertRaises(TypeError):
            stats = sim.get_arm_stats(decisions, new_rewards)
Пример #7
0
 def test_invalid_plot_args_metric_value(self):
     rng = np.random.RandomState(seed=7)
     sim = Simulator(bandits=[("example",
                               MAB([0, 1],
                                   LearningPolicy.EpsilonGreedy()))],
                     decisions=[rng.randint(0, 2) for _ in range(10)],
                     rewards=[rng.randint(0, 100) for _ in range(10)],
                     contexts=[[rng.rand() for _ in range(5)]
                               for _ in range(10)],
                     scaler=StandardScaler(),
                     test_size=0.4,
                     batch_size=0,
                     is_ordered=True,
                     seed=7)
     sim.run()
     with self.assertRaises(ValueError):
         sim.plot('mean')
Пример #8
0
 def test_invalid_get_stats(self):
     data = np.array(['h', 'e', 'l', 'l', 'o'])
     with self.assertRaises(TypeError):
         Simulator.get_stats(data)
Пример #9
0
mixed = [('RandomRadius', MAB([0, 1], LearningPolicy.Random(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
          ('Random', MAB([0, 1], LearningPolicy.Random(), n_jobs=n_jobs))]

hyper_parameter_tuning = []
for radius in range(6, 10):
    hyper_parameter_tuning.append(('Radius'+str(radius),
                                  MAB([0, 1], LearningPolicy.UCB1(1), NeighborhoodPolicy.Radius(radius),
                                      n_jobs=n_jobs)))

####################################
# Contextual Simulation
####################################

start = time()
sim = Simulator(contextual_mabs, decisions, rewards, contexts,
                scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456)
sim.run()
end = time()

runtime = (end - start) / 60
print('Complete', str(runtime) + ' minutes')
print('\n')

for mab_name, mab in sim.bandits:
    print(mab_name)

    # Since simulation is offline, print the bandit stats directly
    print('Worst Case Scenario', sim.bandit_to_arm_to_stats_min[mab_name])
    print('Average Case Scenario', sim.bandit_to_arm_to_stats_avg[mab_name])
    print('Best Case Scenario:', sim.bandit_to_arm_to_stats_max[mab_name])