def test_transaction_resume_1(self): sim = LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) working_learner = ModuloLearner() broken_learner = BrokenLearner() #the second Experiment shouldn't ever call broken_factory() because #we're resuming from the first experiment's transaction.log try: first_result = Experiment( [sim], [working_learner], evaluation_task=OnlineOnPolicyEvalTask(False)).evaluate( "coba/tests/.temp/transactions.log") second_result = Experiment( [sim], [broken_learner], evaluation_task=OnlineOnPolicyEvalTask(False)).evaluate( "coba/tests/.temp/transactions.log") actual_learners = second_result.learners.to_tuples() actual_environments = second_result.environments.to_tuples() actual_interactions = second_result.interactions.to_tuples() expected_learners = [(0, "Modulo", "Modulo(p=0)", '0')] expected_environments = [(0, 'LambdaSimulation')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1)] except Exception as e: raise finally: if Path('coba/tests/.temp/transactions.log').exists(): Path('coba/tests/.temp/transactions.log').unlink() self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_environments, expected_environments) self.assertCountEqual(actual_interactions, expected_interactions)
def test_wrapped_not_picklable_learner_with_reduce(self): sim1 = LambdaSimulation(5, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) learner = WrappedLearner(NotPicklableLearnerWithReduce()) experiment = Experiment([sim1], [learner]) experiment.evaluate()
def test_wrapped_not_picklable_learner_sans_reduce(self): sim1 = LambdaSimulation(5, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) learner = WrappedLearner(NotPicklableLearner()) experiment = Experiment([sim1], [learner]) CobaContext.logger = BasicLogger(ListSink()) experiment.evaluate() self.assertEqual(1, len(CobaContext.logger.sink.items)) self.assertIn("pickle", CobaContext.logger.sink.items[0])
def test_config_set(self): exp = Experiment([], []) CobaContext.experiment.processes = 10 self.assertEqual(10, exp.processes) CobaContext.experiment.maxchunksperchild = 3 self.assertEqual(3, exp.maxchunksperchild) CobaContext.experiment.chunk_by = 'source' self.assertEqual('source', exp.chunk_by) exp.config(processes=2, maxchunksperchild=5, chunk_by='task') self.assertEqual(2, exp.processes) self.assertEqual(5, exp.maxchunksperchild) self.assertEqual('task', exp.chunk_by)
def test_no_params(self): sim1 = NoParamsEnvironment() learner = NoParamsLearner() experiment = Experiment([sim1], [learner], evaluation_task=OnlineOnPolicyEvalTask(False)) result = experiment.evaluate() actual_learners = result.learners.to_tuples() actual_environments = result.environments.to_tuples() actual_interactions = result.interactions.to_tuples() expected_learners = [(0, 'NoParamsLearner', 'NoParamsLearner')] expected_environments = [(0, 'NoParamsEnvironment')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1)] self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_environments, expected_environments) self.assertCountEqual(actual_interactions, expected_interactions)
def test_sim(self): sim1 = LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) learner = ModuloLearner() experiment = Experiment([sim1], [learner], evaluation_task=OnlineOnPolicyEvalTask(False)) result = experiment.evaluate() actual_learners = result.learners.to_tuples() actual_environments = result.environments.to_tuples() actual_interactions = result.interactions.to_tuples() expected_learners = [(0, "Modulo", "Modulo(p=0)", '0')] expected_environments = [(0, 'LambdaSimulation')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1)] self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_environments, expected_environments) self.assertCountEqual(actual_interactions, expected_interactions)
def test_learners(self): sim = LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) learner1 = ModuloLearner("0") #type: ignore learner2 = ModuloLearner("1") #type: ignore experiment = Experiment([sim], [learner1, learner2], evaluation_task=OnlineOnPolicyEvalTask(False)) actual_result = experiment.evaluate() actual_learners = actual_result._learners.to_tuples() actual_environments = actual_result._environments.to_tuples() actual_interactions = actual_result.interactions.to_tuples() expected_learners = [(0, "Modulo", "Modulo(p=0)", '0'), (1, "Modulo", "Modulo(p=1)", '1')] expected_environments = [(0, 'LambdaSimulation')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1), (0, 1, 1, 0), (0, 1, 2, 1)] self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_environments, expected_environments) self.assertCountEqual(actual_interactions, expected_interactions)
def test_ignore_raise(self): CobaContext.logger = IndentLogger(ListSink()) sim1 = LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) sim2 = LambdaSimulation(3, lambda i: i, lambda i, c: [3, 4, 5], lambda i, c, a: cast(float, a)) experiment = Experiment( [sim1, sim2], [ModuloLearner(), BrokenLearner()], evaluation_task=OnlineOnPolicyEvalTask(False)) result = experiment.evaluate() actual_learners = result.learners.to_tuples() actual_environments = result.environments.to_tuples() actual_interactions = result.interactions.to_tuples() expected_learners = [(0, "Modulo", "Modulo(p=0)", '0'), (1, "Broken", "Broken", float('nan'))] expected_environments = [(0, 'LambdaSimulation'), (1, 'LambdaSimulation')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1), (1, 0, 1, 3), (1, 0, 2, 4), (1, 0, 3, 5)] self.assertIsInstance(CobaContext.logger, IndentLogger) self.assertEqual( 2, sum([ int("Unexpected exception:" in item) for item in CobaContext.logger.sink.items ])) self.assertCountEqual(actual_learners[0], expected_learners[0]) self.assertCountEqual(actual_learners[1][:3], expected_learners[1][:3]) self.assertTrue(math.isnan(expected_learners[1][3])) self.assertCountEqual(actual_environments, expected_environments) self.assertCountEqual(actual_interactions, expected_interactions)
def test_restore_not_matched_environments(self): path = Path("coba/tests/.temp/experiment.log") if path.exists(): path.unlink() path.write_text( '["version",4]\n["experiment",{"n_environments":1,"n_learners":1}]' ) try: sim1 = LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) learner = ModuloLearner() with self.assertRaises(AssertionError) as e: result = Experiment([sim1, sim1], [learner]).evaluate(str(path)) with self.assertRaises(AssertionError) as e: result = Experiment([sim1], [learner, learner]).evaluate(str(path)) finally: path.unlink()
#this line is required by Python in order to use multi-processing if __name__ == '__main__': # These configuration changes aren't ever required. # They are simply here to serve as an example. # These can also be set automatically by creating a .coba file your project root. CobaContext.cacher.cache_directory = './.coba_cache' CobaContext.experiment.processes = 2 CobaContext.experiment.chunk_by = 'task' #First, we define the learners that we want to test learners = [ RandomLearner(), EpsilonBanditLearner(), VowpalEpsilonLearner(), ] #Next we create the environments we'd like evaluate against environments = Environments.from_linear_synthetic( 1000, n_action_features=0).shuffle([0, 1, 2, 3]) #We then create and evaluate our experiment from our environments and learners result = Experiment(environments, learners).evaluate() #After evaluating can create a quick summary plot to get a sense of how the learners performed result.plot_learners(err='sd') #We can also create a plot examining how specific learners did across each shuffle of our environments result.filter_lrn(full_name="vw").plot_learners(err='sd', each=True)