def test_eight_groups(self): sim1 = LambdaSimulation(5, lambda i: i, lambda i,c: [0,1,2], lambda i,c,a: cast(float,a)) sim2 = LambdaSimulation(5, lambda i: i, lambda i,c: [0,1,2], lambda i,c,a: cast(float,a)) tasks = [ WorkItem(None, 0, None, None, None), WorkItem(None, 1, None, None, None), WorkItem(1, None, sim2, None, None), WorkItem(0, None, sim1, None, None), WorkItem(1, 0, sim1, None, None), WorkItem(1, 1, sim1, None, None), WorkItem(0, 0, sim1, None, None), WorkItem(0, 1, sim2, None, None) ] groups = list(ChunkByTask().filter(tasks)) self.assertEqual(len(groups), 8) self.assertEqual(groups[0], tasks[0:1]) self.assertEqual(groups[1], tasks[1:2]) self.assertEqual(groups[2], tasks[3:4]) self.assertEqual(groups[3], tasks[6:7]) self.assertEqual(groups[4], tasks[7:8]) self.assertEqual(groups[5], tasks[2:3]) self.assertEqual(groups[6], tasks[4:5]) self.assertEqual(groups[7], tasks[5:6])
def test_two_sim_two_learns(self): sim1 = LambdaSimulation(5, lambda i: i, lambda i,c: [0,1,2], lambda i,c,a: cast(float,a)) sim2 = LambdaSimulation(5, lambda i: i, lambda i,c: [0,1,2], lambda i,c,a: cast(float,a)) lrn1 = ModuloLearner("1") lrn2 = ModuloLearner("2") tasks = list(CreateWorkItems([sim1,sim2], [lrn1,lrn2], None, None, None).read()) self.assertEqual(8, len(tasks)) self.assertEqual(2, len([t for t in tasks if not t.environ and t.learner]) ) self.assertEqual(2, len([t for t in tasks if t.environ and not t.learner]) ) self.assertEqual(4, len([t for t in tasks if t.environ and t.learner]) )
def test_wrapped_not_picklable_learner_with_reduce(self): sim1 = LambdaSimulation(5, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) learner = WrappedLearner(NotPicklableLearnerWithReduce()) experiment = Experiment([sim1], [learner]) experiment.evaluate()
def test_transaction_resume_1(self): sim = LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) working_learner = ModuloLearner() broken_learner = BrokenLearner() #the second Experiment shouldn't ever call broken_factory() because #we're resuming from the first experiment's transaction.log try: first_result = Experiment( [sim], [working_learner], evaluation_task=OnlineOnPolicyEvalTask(False)).evaluate( "coba/tests/.temp/transactions.log") second_result = Experiment( [sim], [broken_learner], evaluation_task=OnlineOnPolicyEvalTask(False)).evaluate( "coba/tests/.temp/transactions.log") actual_learners = second_result.learners.to_tuples() actual_environments = second_result.environments.to_tuples() actual_interactions = second_result.interactions.to_tuples() expected_learners = [(0, "Modulo", "Modulo(p=0)", '0')] expected_environments = [(0, 'LambdaSimulation')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1)] except Exception as e: raise finally: if Path('coba/tests/.temp/transactions.log').exists(): Path('coba/tests/.temp/transactions.log').unlink() self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_environments, expected_environments) self.assertCountEqual(actual_interactions, expected_interactions)
def test_pickle_n_interactions_2(self): def C(i: int): return [1, 2][i] def A(i: int, c: int): return [[1, 2, 3], [4, 5, 6]][i] def R(i: int, c: int, a: int): return a - c simulation = pickle.loads(pickle.dumps(LambdaSimulation(2, C, A, R))) interactions = list(simulation.read()) self.assertEqual("LambdaSimulation", str(simulation)) self.assertEqual({"type": "LambdaSimulation"}, simulation.params) self.assertEqual(len(interactions), 2) self.assertEqual(1, interactions[0].context) self.assertEqual([1, 2, 3], interactions[0].actions) self.assertEqual([0, 1, 2], interactions[0].kwargs["rewards"]) self.assertEqual(2, interactions[1].context) self.assertEqual([4, 5, 6], interactions[1].actions) self.assertEqual([2, 3, 4], interactions[1].kwargs["rewards"])
def test_wrapped_not_picklable_learner_sans_reduce(self): sim1 = LambdaSimulation(5, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) learner = WrappedLearner(NotPicklableLearner()) experiment = Experiment([sim1], [learner]) CobaContext.logger = BasicLogger(ListSink()) experiment.evaluate() self.assertEqual(1, len(CobaContext.logger.sink.items)) self.assertIn("pickle", CobaContext.logger.sink.items[0])
def test_params(self): def C(i: int): return [1, 2][i] def A(i: int, c: int): return [[1, 2, 3], [4, 5, 6]][i] def R(i: int, c: int, a: int): return a - c self.assertEqual({"type": "LambdaSimulation"}, LambdaSimulation(2, C, A, R).params)
def test_simple(self): sim1 = LambdaSimulation(5, lambda i: i, lambda i,c: [0,1,2], lambda i,c,a: cast(float,a)) lrn1 = ModuloLearner("1") task = ObserveTask() item = WorkItem(1, 1, sim1, lrn1, task) transactions = list(ProcessWorkItems().filter([item])) self.assertEqual(len(task.observed[1]), 5) self.assertEqual(['T3', (1,1), []], transactions[0])
def test_four_groups(self): sim1 = LambdaSimulation(5, lambda i: i, lambda i,c: [0,1,2], lambda i,c,a: cast(float,a)) sim2 = LambdaSimulation(5, lambda i: i, lambda i,c: [0,1,2], lambda i,c,a: cast(float,a)) tasks = [ WorkItem(None, 0, None, None, None), WorkItem(None, 1, None, None, None), WorkItem(1, None, sim2, None, None), WorkItem(0, None, sim1, None, None), WorkItem(0, 0, sim1, None, None), WorkItem(2, 0, sim1, None, None), WorkItem(0, 1, sim1, None, None), WorkItem(1, 1, sim2, None, None) ] groups = list(ChunkBySource().filter(tasks)) self.assertEqual(len(groups), 4) self.assertEqual(groups[0], tasks[0:1]) self.assertEqual(groups[1], tasks[1:2]) self.assertEqual(groups[2], [tasks[3],tasks[4],tasks[6],tasks[5]]) self.assertEqual(groups[3], [tasks[2],tasks[7]])
def test_n_interactions_2_seed_1(self): def C(i: int, rng: CobaRandom): return [1, 2][i] def A(i: int, c: int, rng: CobaRandom): return [[1, 2, 3], [4, 5, 6]][i] def R(i: int, c: int, a: int, rng: CobaRandom): return a - c simulation = LambdaSimulation(2, C, A, R, seed=1) interactions = list(simulation.read()) self.assertEqual(len(interactions), 2) self.assertEqual(1, interactions[0].context) self.assertEqual([1, 2, 3], interactions[0].actions) self.assertEqual([0, 1, 2], interactions[0].kwargs["rewards"]) self.assertEqual(2, interactions[1].context) self.assertEqual([4, 5, 6], interactions[1].actions) self.assertEqual([2, 3, 4], interactions[1].kwargs["rewards"])
def test_ignore_raise(self): CobaContext.logger = IndentLogger(ListSink()) sim1 = LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) sim2 = LambdaSimulation(3, lambda i: i, lambda i, c: [3, 4, 5], lambda i, c, a: cast(float, a)) experiment = Experiment( [sim1, sim2], [ModuloLearner(), BrokenLearner()], evaluation_task=OnlineOnPolicyEvalTask(False)) result = experiment.evaluate() actual_learners = result.learners.to_tuples() actual_environments = result.environments.to_tuples() actual_interactions = result.interactions.to_tuples() expected_learners = [(0, "Modulo", "Modulo(p=0)", '0'), (1, "Broken", "Broken", float('nan'))] expected_environments = [(0, 'LambdaSimulation'), (1, 'LambdaSimulation')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1), (1, 0, 1, 3), (1, 0, 2, 4), (1, 0, 3, 5)] self.assertIsInstance(CobaContext.logger, IndentLogger) self.assertEqual( 2, sum([ int("Unexpected exception:" in item) for item in CobaContext.logger.sink.items ])) self.assertCountEqual(actual_learners[0], expected_learners[0]) self.assertCountEqual(actual_learners[1][:3], expected_learners[1][:3]) self.assertTrue(math.isnan(expected_learners[1][3])) self.assertCountEqual(actual_environments, expected_environments) self.assertCountEqual(actual_interactions, expected_interactions)
def test_sims(self): sim1 = LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) sim2 = LambdaSimulation(3, lambda i: i, lambda i, c: [3, 4, 5], lambda i, c, a: cast(float, a)) learner = ModuloLearner() experiment = Experiment([sim1, sim2], [learner], evaluation_task=OnlineOnPolicyEvalTask(False)) result = experiment.evaluate() actual_learners = result.learners.to_tuples() actual_environments = result.environments.to_tuples() actual_interactions = result.interactions.to_tuples() expected_learners = [(0, "Modulo", "Modulo(p=0)", '0')] expected_environments = [(0, 'LambdaSimulation'), (1, 'LambdaSimulation')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1), (1, 0, 1, 3), (1, 0, 2, 4), (1, 0, 3, 5)] self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_environments, expected_environments) self.assertCountEqual(actual_interactions, expected_interactions)
def test_pickle_n_interactions_none(self): def C(i: int): return [1, 2][i] def A(i: int, c: int): return [[1, 2, 3], [4, 5, 6]][i] def R(i: int, c: int, a: int): return a - c with self.assertRaises(CobaException) as e: pickle.loads(pickle.dumps(LambdaSimulation(None, C, A, R))) self.assertIn("pickle", str(e.exception))
def test_n_interactions_none_seed_none(self): def C(i: int): return [1, 2][i] def A(i: int, c: int): return [[1, 2, 3], [4, 5, 6]][i] def R(i: int, c: int, a: int): return a - c simulation = LambdaSimulation(None, C, A, R) interactions = iter(simulation.read()) interaction = next(interactions) self.assertEqual(1, interaction.context) self.assertEqual([1, 2, 3], interaction.actions) self.assertEqual([0, 1, 2], interaction.kwargs["rewards"]) interaction = next(interactions) self.assertEqual(2, interaction.context) self.assertEqual([4, 5, 6], interaction.actions) self.assertEqual([2, 3, 4], interaction.kwargs["rewards"])
def test_pipe_four_groups(self): sim1 = Environments(LinearSyntheticSimulation(500)).shuffle([1,2])._environments sim2 = LambdaSimulation(5, lambda i: i, lambda i,c: [0,1,2], lambda i,c,a: cast(float,a)) tasks = [ WorkItem(None, 0, None, None, None), WorkItem(None, 1, None, None, None), WorkItem(1, None, sim2, None, None), WorkItem(0, None, sim1[0], None, None), WorkItem(0, 0, sim1[0], None, None), WorkItem(2, 0, sim1[1], None, None), WorkItem(0, 1, sim1[0], None, None), WorkItem(1, 1, sim2, None, None) ] groups = list(ChunkBySource().filter(tasks)) self.assertEqual(len(groups), 4) self.assertEqual(groups[0], tasks[0:1]) self.assertEqual(groups[1], tasks[1:2]) self.assertEqual(groups[2], [tasks[3],tasks[4],tasks[6],tasks[5]]) self.assertEqual(groups[3], [tasks[2],tasks[7]])
def test_learners(self): sim = LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) learner1 = ModuloLearner("0") #type: ignore learner2 = ModuloLearner("1") #type: ignore experiment = Experiment([sim], [learner1, learner2], evaluation_task=OnlineOnPolicyEvalTask(False)) actual_result = experiment.evaluate() actual_learners = actual_result._learners.to_tuples() actual_environments = actual_result._environments.to_tuples() actual_interactions = actual_result.interactions.to_tuples() expected_learners = [(0, "Modulo", "Modulo(p=0)", '0'), (1, "Modulo", "Modulo(p=1)", '1')] expected_environments = [(0, 'LambdaSimulation')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1), (0, 1, 1, 0), (0, 1, 2, 1)] self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_environments, expected_environments) self.assertCountEqual(actual_interactions, expected_interactions)
def test_restore_not_matched_environments(self): path = Path("coba/tests/.temp/experiment.log") if path.exists(): path.unlink() path.write_text( '["version",4]\n["experiment",{"n_environments":1,"n_learners":1}]' ) try: sim1 = LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) learner = ModuloLearner() with self.assertRaises(AssertionError) as e: result = Experiment([sim1, sim1], [learner]).evaluate(str(path)) with self.assertRaises(AssertionError) as e: result = Experiment([sim1], [learner, learner]).evaluate(str(path)) finally: path.unlink()
def read(self): return LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)).read()