Exemplo n.º 1
0
    def test_eval_seeds(self):
        sim1 = LambdaSimulation(3, lambda i: i, lambda i, c: [0, 1, 2],
                                lambda i, c, a: cast(float, a))
        learner = RandomLearner()
        benchmark = Benchmark([sim1], shuffle=[1, 4])

        result = benchmark.evaluate([learner], seed=1)
        actual_learners = result.learners.to_tuples()
        actual_simulations = result.simulations.to_tuples()
        actual_interactions = result.interactions.to_tuples()

        expected_learners = [(0, "random", "random")]
        expected_simulations = [(0, "LambdaSimulation", "1", "None",
                                 '"LambdaSimulation",{"Shuffle":1}'),
                                (1, "LambdaSimulation", "4", "None",
                                 '"LambdaSimulation",{"Shuffle":4}')]
        expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 2), (0, 0, 3, 1),
                                 (1, 0, 1, 0), (1, 0, 2, 2), (1, 0, 3, 1)]

        self.assertCountEqual(actual_learners, expected_learners)
        self.assertCountEqual(actual_simulations, expected_simulations)
        self.assertCountEqual(actual_interactions, expected_interactions)
Exemplo n.º 2
0
        #LambdaSimulation(2000, no_contexts, actions, random_rewards_1, seed=10), #not CB since reward is independent of context
        #LambdaSimulation(2000, no_contexts, actions, random_rewards_2, seed=10), #not CB since reward is independent of context
        #LambdaSimulation(2000, no_contexts, actions, random_rewards_3, seed=10), #not CB since reward is independent of context
        LambdaSimulation(2000,
                         contexts,
                         actions,
                         linear_plus_random_rewards_1,
                         seed=10),
        LambdaSimulation(2000,
                         contexts,
                         actions,
                         linear_plus_random_rewards_2,
                         seed=10),
        LambdaSimulation(2000, contexts, actions, polynomial_reward_1,
                         seed=10),
    ]

    #define a benchmark: this benchmark replays the simulation 15 times
    benchmark = Benchmark(simulations, batch_size=1, seeds=list(range(5)))

    #create the learner factories
    learner_factories = [
        RandomLearner(seed=10),
        EpsilonLearner(epsilon=0.025, seed=10),
        UcbTunedLearner(seed=10),
        VowpalLearner(epsilon=0.025, seed=10),
        VowpalLearner(epsilon=0.025, is_adf=False, seed=10),
        VowpalLearner(bag=5, seed=10),
    ]

    benchmark.evaluate(learner_factories).standard_plot()
Exemplo n.º 3
0
 def test_learn(self):
     learner = RandomLearner()
     learner.learn(2, None, 1, 1, 1)
Exemplo n.º 4
0
 def test_predict(self):
     learner = RandomLearner()
     self.assertEqual([0.25, 0.25, 0.25, 0.25],
                      learner.predict(None, [1, 2, 3, 4]))
Exemplo n.º 5
0
 def test_params(self):
     self.assertEqual({"family": "random"}, RandomLearner().params)
Exemplo n.º 6
0
from coba.experiments import Experiment
from coba.environments import Environments

#this line is required by Python in order to use multi-processing
if __name__ == '__main__':

    # These configuration changes aren't ever required.
    # They are simply here to serve as an example.
    # These can also be set automatically by creating a .coba file your project root.
    CobaContext.cacher.cache_directory = './.coba_cache'
    CobaContext.experiment.processes = 2
    CobaContext.experiment.chunk_by = 'task'

    #First, we define the learners that we want to test
    learners = [
        RandomLearner(),
        EpsilonBanditLearner(),
        VowpalEpsilonLearner(),
    ]

    #Next we create the environments we'd like evaluate against
    environments = Environments.from_linear_synthetic(
        1000, n_action_features=0).shuffle([0, 1, 2, 3])

    #We then create and evaluate our experiment from our environments and learners
    result = Experiment(environments, learners).evaluate()

    #After evaluating can create a quick summary plot to get a sense of how the learners performed
    result.plot_learners(err='sd')

    #We can also create a plot examining how specific learners did across each shuffle of our environments