def test_invalid_no_context_policy(self): decisions = [1, 1, 1] rewards = [0, 0, 0] context_history = [[1, 1, 1], [1, 1, 1], [1, 1, 1]] mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0)) with self.assertRaises(TypeError): mab.fit(decisions, rewards, context_history)
def test_invalid_no_context_history(self): decisions = [1, 1, 1] rewards = [0, 0, 0] mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0), NeighborhoodPolicy.Radius(2)) with self.assertRaises(TypeError): mab.fit(decisions, rewards)
def test_invalid_add_arm_scaler(self): scaler = StandardScaler() arm_to_scaler = {0: deepcopy(scaler), 1: deepcopy(scaler)} mab = MAB([0, 1], LearningPolicy.LinUCB(arm_to_scaler=arm_to_scaler)) with self.assertRaises(TypeError): mab.add_arm(2, scaler=deepcopy(scaler))
def test_invalid_log_format(self): rng = np.random.RandomState(seed=7) with self.assertRaises(TypeError): Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))], decisions=[rng.randint(0, 2) for _ in range(10)], rewards=[rng.randint(0, 100) for _ in range(10)], contexts=[[rng.rand() for _ in range(5)] for _ in range(10)], scaler=StandardScaler(), test_size=0.4, batch_size=0, is_ordered=True, seed=7, log_format=7) with self.assertRaises(TypeError): Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))], decisions=[rng.randint(0, 2) for _ in range(10)], rewards=[rng.randint(0, 100) for _ in range(10)], contexts=[[rng.rand() for _ in range(5)] for _ in range(10)], scaler=StandardScaler(), test_size=0.4, batch_size=0, is_ordered=True, seed=7, log_format=None)
def test_simulator_mixed(self): size = 100 decisions = [random.randint(0, 2) for _ in range(size)] rewards = [random.randint(0, 1000) for _ in range(size)] contexts = [[random.random() for _ in range(50)] for _ in range(size)] n_jobs = 1 mixed = [('RandomRadius', MAB([0, 1], LearningPolicy.Random(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)), ('Random', MAB([0, 1], LearningPolicy.Random(), n_jobs=n_jobs))] sim = Simulator(mixed, decisions, rewards, contexts, scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456) sim.run() self.assertTrue(sim.bandit_to_confusion_matrices) self.assertTrue(sim.bandit_to_predictions)
def test_add_arm_scaler(self): scaler = StandardScaler() scaler.fit( np.array([[1, 2, 3, 4, 5], [5, 4, 3, 2, 1]]).astype('float64')) arm_to_scaler = {0: deepcopy(scaler), 1: deepcopy(scaler)} mab = MAB([0, 1], LearningPolicy.LinUCB(arm_to_scaler=arm_to_scaler)) mab.add_arm(2, scaler=deepcopy(scaler))
def test_rewards_inf_df(self): history = pd.DataFrame({ 'decision': [1, 1, 1, 2, 2, 2, 3, 3, 3], 'reward': [0, 0, 0, 0, 0, 0, 1, 1, np.inf] }) mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0)) with self.assertRaises(TypeError): mab.fit(history['decision'], history['reward'])
def test_invalid_add_arm(self): mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0)) with self.assertRaises(ValueError): mab.add_arm(None) with self.assertRaises(ValueError): mab.add_arm(np.nan) with self.assertRaises(ValueError): mab.add_arm(np.inf) with self.assertRaises(ValueError): mab.add_arm(3)
def test_simulator_contextual(self): size = 100 decisions = [random.randint(0, 2) for _ in range(size)] rewards = [random.randint(0, 1000) for _ in range(size)] contexts = [[random.random() for _ in range(50)] for _ in range(size)] def binarize(decision, reward): if decision == 0: return reward <= 50 else: return reward >= 220 n_jobs = 1 contextual_mabs = [('Random', MAB([0, 1], LearningPolicy.Random(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)), ('UCB1', MAB([0, 1], LearningPolicy.UCB1(1), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)), ('ThompsonSampling', MAB([0, 1], LearningPolicy.ThompsonSampling(binarize), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)), ('EpsilonGreedy', MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=.15), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)), ('Softmax', MAB([0, 1], LearningPolicy.Softmax(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs))] sim = Simulator(contextual_mabs, decisions, rewards, contexts, scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456) sim.run() self.assertTrue(sim.bandit_to_confusion_matrices) self.assertTrue(sim.bandit_to_predictions)
def test_invalid_lp_arg(self): with self.assertRaises(TypeError): MAB(['a', 'b'], LearningPolicy.UCB1(epsilon=2)) with self.assertRaises(TypeError): MAB(['a', 'b'], LearningPolicy.EpsilonGreedy(alpha=2)) with self.assertRaises(TypeError): MAB(['a', 'b'], LearningPolicy.ThompsonSampling(alpha=2)) with self.assertRaises(TypeError): MAB(['a', 'b'], LearningPolicy.Softmax(alpha=2)) with self.assertRaises(TypeError): MAB(['a', 'b'], LearningPolicy.LinUCB(tau=1))
def test_simulator_hyper_parameter(self): size = 100 decisions = [random.randint(0, 2) for _ in range(size)] rewards = [random.randint(0, 1000) for _ in range(size)] contexts = [[random.random() for _ in range(50)] for _ in range(size)] n_jobs = 1 hyper_parameter_tuning = [] for radius in range(6, 10): hyper_parameter_tuning.append( ('Radius' + str(radius), MAB([0, 1], LearningPolicy.UCB1(1), NeighborhoodPolicy.Radius(radius), n_jobs=n_jobs))) sim = Simulator(hyper_parameter_tuning, decisions, rewards, contexts, scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456, is_quick=True) sim.run() self.assertTrue(sim.bandit_to_confusion_matrices) self.assertTrue(sim.bandit_to_predictions)
def test_tables(self): seed = 11 n_dimensions = 5 n_tables = 5 rng = np.random.RandomState(seed) contexts = np.array([[rng.rand() for _ in range(7)] for _ in range(10)]) decisions = np.array([rng.randint(0, 2) for _ in range(10)]) rewards = np.array([rng.rand() for _ in range(10)]) lsh = MAB(arms=[0, 1], learning_policy=LearningPolicy.Softmax(), neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions, n_tables), seed=seed) for i in range(n_tables): self.assertListEqual([], lsh._imp.table_to_plane[i]) lsh.fit(decisions, rewards, contexts) self.assertListAlmostEqual(list(lsh._imp.table_to_plane[0][0]), [1.74945474, -0.286073, -0.48456513, -2.65331856, -0.00828463]) self.assertListEqual(list(lsh._imp.table_to_hash_to_index[0].keys()), [1, 4, 5, 12, 13, 14, 15]) self.assertListEqual(lsh._imp.table_to_hash_to_index[0][1], [3]) self.assertListEqual(lsh._imp.table_to_hash_to_index[0][14], [0, 4, 8])
def test_popularity(self): list_of_arms = ['Arm1', 'Arm2'] decisions = ['Arm1', 'Arm1', 'Arm2', 'Arm1'] rewards = [20, 17, 25, 9] mab = MAB(list_of_arms, LearningPolicy.Popularity()) mab.fit(decisions, rewards) mab.predict() self.assertEqual("Arm2", mab.predict()) self.assertDictEqual( { 'Arm1': 0.38016528925619836, 'Arm2': 0.6198347107438016 }, mab.predict_expectations())
def test_incomplete_learning_policy_implementation(self): class TestMAB(BaseMAB): def __init__(self): rng = np.random.RandomState(7) arms = [0, 1] n_jobs = 1 backend = None super().__init__(rng, arms, n_jobs, backend) def _fit_arm(self, arm, decisions, rewards, contexts=None): pass def _predict_contexts(self, contexts, is_predict, seeds=None, start_index=None): pass def _uptake_new_arm(self, arm, binarizer=None, scaler=None): pass def fit(self, decisions, rewards, contexts=None): pass def partial_fit(self, decisions, rewards, contexts=None): pass def predict(self, contexts=None): pass def predict_expectations(self, contexts=None): pass mab = MAB([0, 1], learning_policy=LearningPolicy.EpsilonGreedy()) mab._imp = TestMAB() with self.assertRaises(NotImplementedError): mab.learning_policy
def test_invalid_plot_args_metric_value(self): rng = np.random.RandomState(seed=7) sim = Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))], decisions=[rng.randint(0, 2) for _ in range(10)], rewards=[rng.randint(0, 100) for _ in range(10)], contexts=[[rng.rand() for _ in range(5)] for _ in range(10)], scaler=StandardScaler(), test_size=0.4, batch_size=0, is_ordered=True, seed=7) sim.run() with self.assertRaises(ValueError): sim.plot('mean')
def predict( arms: List[Arm], decisions: Union[List, np.ndarray, pd.Series], rewards: Union[List, np.ndarray, pd.Series], learning_policy: Union[LearningPolicy.EpsilonGreedy, LearningPolicy.Random, LearningPolicy.Softmax, LearningPolicy.ThompsonSampling, LearningPolicy.UCB1, LearningPolicy.LinTS, LearningPolicy.LinUCB], neighborhood_policy: Union[None, NeighborhoodPolicy.Clusters, NeighborhoodPolicy.Radius, NeighborhoodPolicy.KNearest] = None, context_history: Union[None, List[Num], List[List[Num]], np.ndarray, pd.DataFrame, pd.Series] = None, contexts: Union[None, List[Num], List[List[Num]], np.ndarray, pd.DataFrame, pd.Series] = None, seed: Optional[int] = 123456, num_run: Optional[int] = 1, is_predict: Optional[bool] = True, n_jobs: Optional[int] = 1, backend: Optional[str] = None ) -> (Union[Arm, List[Arm], List[float], List[List[float]]], MAB): """Sets up a MAB model and runs the given configuration. Return list of predictions or prediction and the mab instance, when is_predict is true Return list of expectations or expectation and the mab instance, when is predict is false Calls the predict or predict_expectation method num_run number of times. """ # Model mab = MAB(arms, learning_policy, neighborhood_policy, seed, n_jobs, backend) # Train mab.fit(decisions, rewards, context_history) # Test if is_predict: # Return: prediction(s) and the MAB instance predictions = [mab.predict(contexts) for _ in range(num_run)] return predictions[0] if num_run == 1 else predictions, mab else: # Return: expectations(s) and the MAB instance expectations = [ mab.predict_expectations(contexts) for _ in range(num_run) ] return expectations[0] if num_run == 1 else expectations, mab
def test_invalid_simulator_stats_scope(self): rng = np.random.RandomState(seed=7) decisions = np.array([rng.randint(0, 2) for _ in range(10)]) rewards = np.array([rng.randint(0, 100) for _ in range(10)]) sim = Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))], decisions=decisions, rewards=rewards, contexts=[[rng.rand() for _ in range(5)] for _ in range(10)], scaler=StandardScaler(), test_size=0.4, batch_size=0, is_ordered=True, seed=7) with self.assertRaises(ValueError): sim._set_stats('validation', decisions, rewards)
def test_invalid_get_arm_stats(self): rng = np.random.RandomState(seed=9) decisions = np.array([rng.randint(0, 2) for _ in range(5)]) rewards = np.array([rng.randint(0, 100) for _ in range(5)]) new_rewards = np.array(['h', 'e', 'l', 'l', 'o']) sim = Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))], decisions=decisions, rewards=rewards, contexts=[[rng.rand() for _ in range(5)] for _ in range(5)], scaler=StandardScaler(), test_size=0.4, batch_size=0, is_ordered=True, seed=7) with self.assertRaises(TypeError): stats = sim.get_arm_stats(decisions, new_rewards)
def test_partial_fit_indices(self): seed = 11 n_dimensions = 5 n_tables = 5 rng = np.random.RandomState(seed) contexts = np.array([[rng.rand() for _ in range(7)] for _ in range(10)]) decisions = np.array([rng.randint(0, 2) for _ in range(10)]) rewards = np.array([rng.rand() for _ in range(10)]) lsh = MAB(arms=[0, 1], learning_policy=LearningPolicy.Softmax(), neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions, n_tables), seed=seed) lsh.fit(decisions, rewards, contexts) contexts2 = np.array([[rng.rand() for _ in range(7)] for _ in range(10)]) decisions2 = np.array([rng.randint(0, 2) for _ in range(10)]) rewards2 = np.array([rng.rand() for _ in range(10)]) lsh.partial_fit(decisions2, rewards2, contexts2) self.assertListEqual(lsh._imp.table_to_hash_to_index[0][4], [1, 15, 16]) self.assertListEqual(lsh._imp.table_to_hash_to_index[0][12], [9, 10, 11, 19])
def test_invalid_seed(self): with self.assertRaises(TypeError): MAB([0, 1], LearningPolicy.EpsilonGreedy(0), seed=[0, 1])
def test_invalid_rewards_to_binary_type(self): thresholds = {1: 1, 'b': 1} with self.assertRaises(TypeError): MAB(['a', 'b'], LearningPolicy.ThompsonSampling(thresholds))
def test_invalid_epsilon_value(self): with self.assertRaises(ValueError): MAB(['a', 'b'], LearningPolicy.EpsilonGreedy(epsilon=2))
def test_invalid_epsilon_type(self): with self.assertRaises(TypeError): MAB(['a', 'b'], LearningPolicy.EpsilonGreedy(epsilon="one"))
def test_convert_matrix_invalid(self): data = {'one': [1, 1, 1, 1, 1]} with self.assertRaises(NotImplementedError): MAB._convert_matrix(data)
def test_rewards_null_list(self): decisions = [1, 1, 1, 2, 2, 2, 3, 3, 3] rewards = [0, 0, 0, 0, 0, 0, 1, 1, None] mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0)) with self.assertRaises(TypeError): mab.fit(decisions, rewards)
def test_convert_array_invalid(self): df = pd.DataFrame({'a': [1, 1, 1, 1, 1]}) with self.assertRaises(NotImplementedError): MAB._convert_array(df)
def test_invalid_decisions_rewards_length(self): decisions = [1, 1, 2, 2, 2, 3, 3] rewards = [0, 0, 0, 0, 0, 0, 1, 1, 1] mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0)) with self.assertRaises(ValueError): mab.fit(decisions, rewards)
def test_predict_without_fit(self): for lp in BaseTest.lps: with self.assertRaises(Exception): mab = MAB([1, 2], lp) mab.predict() for para_lp in BaseTest.para_lps: with self.assertRaises(Exception): mab = MAB([1, 2], para_lp) mab.predict([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]]) for cp in BaseTest.nps: for lp in BaseTest.lps: with self.assertRaises(Exception): mab = MAB([1, 2], lp, cp) mab.predict([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]]) for cp in BaseTest.nps: for para_lp in BaseTest.lps: with self.assertRaises(Exception): mab = MAB([1, 2], para_lp, cp) mab.predict([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]])
def test_rewards_inf_array(self): decisions = np.asarray([1, 1, 1, 2, 2, 2, 3, 3, 3]) rewards = np.asarray([0, 0, 0, 0, 0, 0, 1, 1, np.inf]) mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0)) with self.assertRaises(TypeError): mab.fit(decisions, rewards)
def test_predict_with_no_fit(self): for lp in InvalidTest.lps: mab = MAB([1, 2], lp) with self.assertRaises(Exception): mab.predict_expectations() for lp in InvalidTest.para_lps: mab = MAB([1, 2], lp) with self.assertRaises(Exception): mab.predict_expectations([[0, 1, 1, 2]]) for cp in InvalidTest.nps: for lp in InvalidTest.lps: mab = MAB([1, 2], lp, cp) with self.assertRaises(Exception): mab.predict_expectations([[0, 1, 1, 2]])