def test_transaction_resume_1(self): sim = LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) working_learner = ModuloLearner() broken_learner = BrokenLearner() benchmark = Benchmark([sim]) #the second time the broken_factory() shouldn't ever be used for learning or choosing #because it already worked the first time and we are "resuming" benchmark from transaction.log try: first_result = benchmark.evaluate( [working_learner], "coba/tests/.temp/transactions.log") second_result = benchmark.evaluate( [broken_learner], "coba/tests/.temp/transactions.log") actual_learners = second_result.learners.to_tuples() actual_simulations = second_result.simulations.to_tuples() actual_interactions = second_result.interactions.to_tuples() expected_learners = [(0, "Modulo(p=0)", "Modulo", '0')] expected_simulations = [(0, "LambdaSimulation", "None", "None", '"LambdaSimulation"')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1)] except Exception as e: raise finally: if Path('coba/tests/.temp/transactions.log').exists(): Path('coba/tests/.temp/transactions.log').unlink() self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_simulations, expected_simulations) self.assertCountEqual(actual_interactions, expected_interactions)
def test_wrapped_not_picklable_learner_with_reduce(self): sim1 = LambdaSimulation(5, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) learner = WrappedLearner(NotPicklableLearnerWithReduce()) benchmark = Benchmark([sim1], shuffle=[1, 4]) benchmark.evaluate([learner])
def test_not_picklable_learner(self): sim1 = LambdaSimulation(5, lambda t: t, lambda t: [0,1,2], lambda c,a: a) learner = NotPicklableLearner() benchmark = Benchmark([sim1], batch_sizes=[2], ignore_raise=False, seeds=[1,4]) with self.assertRaises(Exception) as cm: benchmark.evaluate([learner]) self.assertTrue("Learners are required to be picklable" in str(cm.exception))
def test_wrapped_not_picklable_learner_sans_reduce(self): sim1 = LambdaSimulation(5, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) learner = WrappedLearner(NotPicklableLearner()) benchmark = Benchmark([sim1]) CobaConfig.Logger = BasicLogger(MemorySink()) benchmark.evaluate([learner]) self.assertEqual(1, len(CobaConfig.Logger.sink.items)) self.assertIn("pickle", CobaConfig.Logger.sink.items[0])
def test_transaction_resume_1(self): sim = LambdaSimulation(5, lambda t: t, lambda t: [0,1,2], lambda c,a: a) working_learner = ModuloLearner() broken_learner = BrokenLearner() benchmark = Benchmark([sim], batch_count=1) #the second time the broken_factory() shouldn't ever be used for learning or choosing #because it already worked the first time and we are "resuming" benchmark from transaction.log try: first_results = benchmark.evaluate([working_learner], "coba/tests/.temp/transactions.log") second_results = benchmark.evaluate([broken_learner], "coba/tests/.temp/transactions.log") actual_learners,actual_simulations,actual_batches = second_results.to_tuples() expected_learners = [(0,"0","0")] expected_simulations = [(0, '0', ['{"Batch":[None, 1, None]}'], 5, 1, 1, 3)] expected_batches = [(0, 0, [5], [mean([0,1,2,0,1])])] finally: if Path('coba/tests/.temp/transactions.log').exists(): Path('coba/tests/.temp/transactions.log').unlink() self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_simulations, expected_simulations) self.assertCountEqual(actual_batches, expected_batches)
def test_seeds(self): sim1 = LambdaSimulation(5, lambda t: t, lambda t: [0,1,2], lambda c,a: a) learner = ModuloLearner() benchmark = Benchmark([sim1], batch_sizes=[2], ignore_raise=False, seeds=[1,4]) actual_learners,actual_simulations,actual_batches = benchmark.evaluate([learner]).to_tuples() expected_learners = [(0,"0","0")] expected_simulations = [(0, '0', ['{"Shuffle":1}', '{"Batch":[None, None, [2]]}'], 2, 1, 1, 3), (1, '0', ['{"Shuffle":4}', '{"Batch":[None, None, [2]]}'], 2, 1, 1, 3)] expected_batches = [(0, 0, [2], [mean([1,0])]), (1, 0, [2], [mean([2,0])])] self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_simulations, expected_simulations) self.assertCountEqual(actual_batches, expected_batches)
def test_take(self): sim1 = LambdaSimulation(5, lambda t: t, lambda t: [0,1,2], lambda c,a: a) sim2 = LambdaSimulation(4, lambda t: t, lambda t: [3,4,5], lambda c,a: a) learner = ModuloLearner() benchmark = Benchmark([sim1,sim2], batch_count=1, take=5, ignore_raise=False) actual_learners,actual_simulations,actual_batches = benchmark.evaluate([learner]).to_tuples() expected_learners = [(0,"0","0")] expected_simulations = [(0, '0', ['{"Take":5}', '{"Batch":[None, 1, None]}'], 5, 1, 1, 3), (1, '1', ['{"Take":5}', '{"Batch":[None, 1, None]}'], 0, 0, 0, 0)] expected_batches = [(0, 0, [5], [mean([0,1,2,0,1])])] self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_simulations, expected_simulations) self.assertCountEqual(actual_batches, expected_batches)
def test_learners(self): sim = LambdaSimulation(5, lambda t: t, lambda t: [0,1,2], lambda c,a: a) learner1 = ModuloLearner("0") #type: ignore learner2 = ModuloLearner("1") #type: ignore benchmark = Benchmark([sim], batch_count=1, ignore_raise=False) actual_results = benchmark.evaluate([learner1, learner2]) actual_learners,actual_simulations,actual_batches = actual_results.to_tuples() expected_learners = [(0,"0","0"), (1,"1","1")] expected_simulations = [(0, '0', ['{"Batch":[None, 1, None]}'], 5, 1, 1, 3)] expected_batches = [(0, 0, [5], [mean([0,1,2,0,1])]), (0, 1, [5], [mean([0,1,2,0,1])]) ] self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_simulations, expected_simulations) self.assertCountEqual(actual_batches, expected_batches)
def test_sources(self): sim1 = LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) learner = ModuloLearner() benchmark = Benchmark([sim1]) result = benchmark.evaluate([learner]) actual_learners = result.learners.to_tuples() actual_simulations = result.simulations.to_tuples() actual_interactions = result.interactions.to_tuples() expected_learners = [(0, "Modulo(p=0)", "Modulo", '0')] expected_simulations = [(0, "LambdaSimulation", "None", "None", '"LambdaSimulation"')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1)] self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_simulations, expected_simulations) self.assertCountEqual(actual_interactions, expected_interactions)
def test_learners(self): sim = LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) learner1 = ModuloLearner("0") #type: ignore learner2 = ModuloLearner("1") #type: ignore benchmark = Benchmark([sim]) actual_result = benchmark.evaluate([learner1, learner2]) actual_learners = actual_result._learners.to_tuples() actual_simulations = actual_result._simulations.to_tuples() actual_interactions = actual_result._interactions.to_tuples() expected_learners = [(0, "Modulo(p=0)", "Modulo", '0'), (1, "Modulo(p=1)", "Modulo", '1')] expected_simulations = [(0, "LambdaSimulation", "None", "None", '"LambdaSimulation"')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1), (0, 1, 1, 0), (0, 1, 2, 1)] self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_simulations, expected_simulations) self.assertCountEqual(actual_interactions, expected_interactions)
def test_eval_seeds(self): sim1 = LambdaSimulation(3, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) learner = RandomLearner() benchmark = Benchmark([sim1], shuffle=[1, 4]) result = benchmark.evaluate([learner], seed=1) actual_learners = result.learners.to_tuples() actual_simulations = result.simulations.to_tuples() actual_interactions = result.interactions.to_tuples() expected_learners = [(0, "random", "random")] expected_simulations = [(0, "LambdaSimulation", "1", "None", '"LambdaSimulation",{"Shuffle":1}'), (1, "LambdaSimulation", "4", "None", '"LambdaSimulation",{"Shuffle":4}')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 2), (0, 0, 3, 1), (1, 0, 1, 0), (1, 0, 2, 2), (1, 0, 3, 1)] self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_simulations, expected_simulations) self.assertCountEqual(actual_interactions, expected_interactions)
def test_ignore_raise(self): log_sink = MemorySink() CobaConfig.Logger = IndentLogger(log_sink) sim1 = LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) sim2 = LambdaSimulation(3, lambda i: i, lambda i, c: [3, 4, 5], lambda i, c, a: cast(float, a)) learners = [ModuloLearner(), BrokenLearner()] benchmark = Benchmark([sim1, sim2]) result = benchmark.evaluate(learners) actual_learners = result.learners.to_tuples() actual_simulations = result.simulations.to_tuples() actual_interactions = result.interactions.to_tuples() expected_learners = [(0, "Modulo(p=0)", "Modulo", '0'), (1, "Broken", "Broken", float('nan'))] expected_simulations = [ (0, "LambdaSimulation", "None", "None", '"LambdaSimulation"'), (1, "LambdaSimulation", "None", "None", '"LambdaSimulation"') ] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1), (1, 0, 1, 3), (1, 0, 2, 4), (1, 0, 3, 5)] self.assertEqual( 2, sum([ int("Unexpected exception:" in item) for item in log_sink.items ])) self.assertCountEqual(actual_learners[0], expected_learners[0]) self.assertCountEqual(actual_learners[1][:3], expected_learners[1][:3]) self.assertTrue(math.isnan(expected_learners[1][3])) self.assertCountEqual(actual_simulations, expected_simulations) self.assertCountEqual(actual_interactions, expected_interactions)
def test_take(self): sim1 = LambdaSimulation(5, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) sim2 = LambdaSimulation(2, lambda i: i, lambda i, c: [3, 4, 5], lambda i, c, a: cast(float, a)) learner = ModuloLearner() benchmark = Benchmark([sim1, sim2], take=3) result = benchmark.evaluate([learner]) actual_learners = result.learners.to_tuples() actual_simulations = result.simulations.to_tuples() actual_interactions = result.interactions.to_tuples() expected_learners = [(0, "Modulo(p=0)", "Modulo", '0')] expected_simulations = [(0, "LambdaSimulation", "None", "3", '"LambdaSimulation",{"Take":3}'), (1, "LambdaSimulation", "None", "3", '"LambdaSimulation",{"Take":3}')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1), (0, 0, 3, 2)] self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_simulations, expected_simulations) self.assertCountEqual(actual_interactions, expected_interactions)
#LambdaSimulation(2000, no_contexts, actions, random_rewards_2, seed=10), #not CB since reward is independent of context #LambdaSimulation(2000, no_contexts, actions, random_rewards_3, seed=10), #not CB since reward is independent of context LambdaSimulation(2000, contexts, actions, linear_plus_random_rewards_1, seed=10), LambdaSimulation(2000, contexts, actions, linear_plus_random_rewards_2, seed=10), LambdaSimulation(2000, contexts, actions, polynomial_reward_1, seed=10), ] #define a benchmark: this benchmark replays the simulation 15 times benchmark = Benchmark(simulations, batch_size=1, seeds=list(range(5))) #create the learner factories learner_factories = [ RandomLearner(seed=10), EpsilonLearner(epsilon=0.025, seed=10), UcbTunedLearner(seed=10), VowpalLearner(epsilon=0.025, seed=10), VowpalLearner(epsilon=0.025, is_adf=False, seed=10), VowpalLearner(bag=5, seed=10), ] benchmark.evaluate(learner_factories).standard_plot()
#First, we define the learners that we want to test learners = [ RandomLearner(), EpsilonBanditLearner(epsilon=0.025), VowpalLearner( epsilon=.1), #This learner requires that VowpalWabbit be installed ] #Then we define the simulations that we want to test our learners on simulations = [ ValidationSimulation(300, context_features=True, action_features=False, seed=1000) ] #And also define a collection of seeds used to shuffle our simulations seeds = [0, 1, 2, 3] #We then create our benchmark using our simulations and seeds benchmark = Benchmark(simulations, shuffle=seeds) #Finally we evaluate our learners on our benchmark (the results will be saved in `result_file`). result = benchmark.evaluate(learners) #After evaluating can create a quick summary plot to get a sense of how the learners performed result.plot_learners() #We can also create a plot examining how one specific learner did across each shuffle of our simulation result.plot_shuffles(learner_pattern="vw")