def test_greedy0_n3(self): arms, mab = self.predict( arms=[1, 2, 3, 4], decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3], rewards=[0, 0, 0, 1, 1, 0, 0, 1, 1, 1], learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0), neighborhood_policy=NeighborhoodPolicy.Clusters(3), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=1, is_predict=True) self.assertListEqual(arms, [2, 2]) self.assertEqual(mab._imp.kmeans.n_clusters, 3)
def test_greedy0_k2_exps(self): exps, mab = self.predict( arms=[1, 2, 3, 4], decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3], rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1], learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0), neighborhood_policy=NeighborhoodPolicy.KNearest(2), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=1, is_predict=False) self.assertDictEqual(exps[0], {1: 0.0, 2: 0.0, 3: 0, 4: 0}) self.assertDictEqual(exps[1], {1: 1.0, 2: 0.0, 3: 0, 4: 0})
def test_alpha0_nearest5(self): arm, mab = self.predict( arms=[1, 2, 3], decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3, 1], rewards=[0, 0, 0, 0, 0, 0, 1, 1, 1, 1], learning_policy=LearningPolicy.LinUCB(alpha=0), neighborhood_policy=NeighborhoodPolicy.KNearest(k=5), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=3, is_predict=True) self.assertEqual(len(arm), 3) self.assertEqual(arm, [[3, 3], [3, 3], [3, 3]])
def test_greedy0_n2_mini(self): arms, mab = self.predict( arms=[1, 2, 3, 4], decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3], rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1], learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0), neighborhood_policy=NeighborhoodPolicy.Clusters(2, True), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=1, is_predict=True) self.assertListEqual(arms, [3, 1]) self.assertTrue(isinstance(mab._imp.kmeans, MiniBatchKMeans))
def test_greedy0_no_nhood_predict_random(self): # 2nd, 3rd arm has bad rewards should not be selected # Use small neighborhood size to force to no nhood arms, mab = self.predict(arms=[1, 2, 3], decisions=[1, 1, 1, 2, 2, 2], rewards=[10, 10, 10, -10, -10, -10], learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0), neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions=25), context_history=[[1, 1, 2, 3, 5], [1, 2, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0]], contexts=[[0, -1, -2, -3, -5], [-1, -1, -1, -1, -1]], seed=7, num_run=2, is_predict=True) # 3rd arm was never seen but picked up by random neighborhood in both tests self.assertListEqual(arms[0], [2, 3]) self.assertListEqual(arms[1], [2, 1])
def test_partial_fit_indices(self): seed = 11 n_dimensions = 5 n_tables = 5 rng = np.random.RandomState(seed) contexts = np.array([[rng.rand() for _ in range(7)] for _ in range(10)]) decisions = np.array([rng.randint(0, 2) for _ in range(10)]) rewards = np.array([rng.rand() for _ in range(10)]) lsh = MAB(arms=[0, 1], learning_policy=LearningPolicy.Softmax(), neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions, n_tables), seed=seed) lsh.fit(decisions, rewards, contexts) contexts2 = np.array([[rng.rand() for _ in range(7)] for _ in range(10)]) decisions2 = np.array([rng.randint(0, 2) for _ in range(10)]) rewards2 = np.array([rng.rand() for _ in range(10)]) lsh.partial_fit(decisions2, rewards2, contexts2) self.assertListEqual(lsh._imp.table_to_hash_to_index[0][4], [1, 15, 16]) self.assertListEqual(lsh._imp.table_to_hash_to_index[0][12], [9, 10, 11, 19])
def test_greedy0_no_nhood_expectation_nan(self): # 2nd, 3rd arm has bad rewards should not be selected # Use small neighborhood size to force to no nhoods exps, mab = self.predict(arms=[1, 2, 3], decisions=[1, 1, 1, 2, 2, 2], rewards=[10, 10, 10, -10, -10, -10], learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0), neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions=25), context_history=[[1, 1, 2, 3, 5], [1, 2, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0]], contexts=[[0, -1, -2, -3, -5], [-1, -1, -1, -1, -1]], seed=7, num_run=1, is_predict=False) # When there are no neighborhoods, expectations will be nan self.assertDictEqual(exps[0], {1: np.nan, 2: np.nan, 3: np.nan}) self.assertDictEqual(exps[1], {1: np.nan, 2: np.nan, 3: np.nan})
def test_partial_fit_thompson_thresholds(self): arm_to_threshold = {1: 1, 2: 5, 3: 2, 4: 3} def binarize(arm, reward): return reward >= arm_to_threshold[arm] arms, mab = self.predict( arms=[1, 2, 3, 4], decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3], rewards=[0, 1, 7, 0, 1, 9, 0, 2, 6, 11], learning_policy=LearningPolicy.ThompsonSampling(binarize), neighborhood_policy=NeighborhoodPolicy.KNearest(2), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=1, is_predict=True) self.assertTrue(mab._imp.lp.is_contextual_binarized) self.assertListEqual(arms, [4, 4]) self.assertEqual(len(mab._imp.decisions), 10) self.assertEqual(len(mab._imp.rewards), 10) self.assertEqual(len(mab._imp.contexts), 10) self.assertEqual(np.ndim(mab._imp.decisions), 1) self.assertListEqual(list(set(mab._imp.rewards)), [0, 1]) decisions2 = [1, 2, 3] rewards2 = [11, 1, 6] context_history2 = [[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0]] mab.partial_fit(decisions2, rewards2, context_history2) self.assertEqual(len(mab._imp.decisions), 13) self.assertEqual(len(mab._imp.rewards), 13) self.assertEqual(len(mab._imp.contexts), 13) self.assertEqual(np.ndim(mab._imp.decisions), 1) arm = mab.predict([[0, 1, 2, 3, 5]]) self.assertEqual(arm, 3) self.assertListEqual(list(set(mab._imp.rewards)), [0, 1])
def test_tables(self): seed = 11 n_dimensions = 5 n_tables = 5 rng = np.random.RandomState(seed) contexts = np.array([[rng.rand() for _ in range(7)] for _ in range(10)]) decisions = np.array([rng.randint(0, 2) for _ in range(10)]) rewards = np.array([rng.rand() for _ in range(10)]) lsh = MAB(arms=[0, 1], learning_policy=LearningPolicy.Softmax(), neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions, n_tables), seed=seed) for i in range(n_tables): self.assertListEqual([], lsh._imp.table_to_plane[i]) lsh.fit(decisions, rewards, contexts) self.assertListAlmostEqual(list(lsh._imp.table_to_plane[0][0]), [1.74945474, -0.286073, -0.48456513, -2.65331856, -0.00828463]) self.assertListEqual(list(lsh._imp.table_to_hash_to_index[0].keys()), [1, 4, 5, 12, 13, 14, 15]) self.assertListEqual(lsh._imp.table_to_hash_to_index[0][1], [3]) self.assertListEqual(lsh._imp.table_to_hash_to_index[0][14], [0, 4, 8])
def test_greedy0_no_nhood_predict_weighted(self): # 2nd, 3rd arm has bad rewards should not be selected # Use small neighborhood size to force to no nhoods arms, mab = self.predict(arms=[1, 2, 3], decisions=[1, 1, 1, 2, 2, 2], rewards=[10, 10, 10, -10, -10, -10], learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0), neighborhood_policy=NeighborhoodPolicy.LSHNearest( n_dimensions=25, no_nhood_prob_of_arm=[0, 0.8, 0.2]), context_history=[[1, 1, 2, 3, 5], [1, 2, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0]], contexts=[[0, -1, -2, -3, -5], [-1, -1, -1, -1, -1]], seed=7, num_run=2, is_predict=True) # 2nd arm is weighted highly but 3rd is picked too self.assertListEqual(arms[0], [2, 2]) self.assertListEqual(arms[1], [2, 2])
def test_add_arm(self): arms, mab = self.predict(arms=[1, 2, 3, 4], decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3], rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1], learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0), neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions=2), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=1, is_predict=True) mab.add_arm(5) self.assertTrue(5 in mab.arms) self.assertTrue(5 in mab._imp.arms) self.assertTrue(5 in mab._imp.lp.arms) self.assertTrue(5 in mab._imp.lp.arm_to_expectation.keys())
def test_lints_knearest(self): train_df = pd.DataFrame({ 'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5], 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10], 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38], 'click_rate': [ 0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17, 0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83 ], 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0] }) # Test data to for new prediction test_df = pd.DataFrame({ 'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1] }) # Scale the data scaler = StandardScaler() train = scaler.fit_transform( np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64')) test = scaler.transform(np.asarray(test_df, dtype='float64')) arms, mab = self.predict( arms=[1, 2, 3, 4, 5], decisions=train_df['ad'], rewards=train_df['revenues'], learning_policy=LearningPolicy.LinTS(alpha=1), neighborhood_policy=NeighborhoodPolicy.KNearest(k=4), context_history=train, contexts=test, seed=123456, num_run=1, is_predict=True) self.assertEqual(arms, [1, 2])
def test_copy_arms(self): arms, mab = self.predict( arms=[1, 2, 3, 4], decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3], rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1], learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0), neighborhood_policy=NeighborhoodPolicy.Clusters(2), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=1, is_predict=True) self.assertTrue(mab.arms is mab._imp.lp_list[0].arms) self.assertTrue(mab.arms is mab._imp.lp_list[1].arms) mab.add_arm(5) self.assertTrue(mab.arms is mab._imp.lp_list[0].arms) self.assertTrue(mab.arms is mab._imp.lp_list[1].arms)
def test_add_arm(self): arms, mab = self.predict( arms=[1, 2, 3, 4], decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3], rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1], learning_policy=LearningPolicy.Popularity(), neighborhood_policy=NeighborhoodPolicy.Clusters(2), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=1, is_predict=True) mab.add_arm(5) self.assertTrue(5 in mab.arms) self.assertTrue(5 in mab._imp.arms) self.assertTrue(5 in mab._imp.lp_list[0].arms) self.assertTrue(5 in mab._imp.lp_list[0].arm_to_expectation.keys())
test_df_revenue = pd.Series([7, 13]) # Scale the training and test data scaler = StandardScaler() train = scaler.fit_transform(train_df[['age', 'click_rate', 'subscriber']].values.astype('float64')) test = scaler.transform(test_df.values.astype('float64')) ######################################################## # Radius Neighborhood Policy with UCB1 Learning Policy ######################################################## # Radius contextual policy with radius equals to 5 and ucb1 learning with alpha 1.25 radius = MAB(arms=ads, learning_policy=LearningPolicy.UCB1(alpha=1.25), neighborhood_policy=NeighborhoodPolicy.Radius(radius=5)) # Learn from previous ads shown and revenues generated radius.fit(decisions=train_df['ad'], rewards=train_df['revenues'], contexts=train) # Predict the next best ad to show prediction = radius.predict(test) # Expectation of each ad based on learning from past ad revenues expectations = radius.predict_expectations(test) # Results print("Radius: ", prediction, " ", expectations) assert (prediction == [4, 4])
def test_invalid_n_tables_type(self): with self.assertRaises(TypeError): MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0), NeighborhoodPolicy.LSHNearest(n_tables='string'))
def test_invalid_learning_policy(self): with self.assertRaises(TypeError): MAB([0, 1], NeighborhoodPolicy.Radius(radius=12))
def test_invalid_clusters_num(self): with self.assertRaises(ValueError): MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0), NeighborhoodPolicy.Clusters(n_clusters=1))
def test_invalid_k(self): with self.assertRaises(ValueError): MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0), NeighborhoodPolicy.KNearest(k=0))
def test_invalid_minibatch(self): with self.assertRaises(TypeError): MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0), NeighborhoodPolicy.Clusters(minibatch=0))
def test_invalid_radius(self): with self.assertRaises(ValueError): MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0), NeighborhoodPolicy.Radius(radius=-1))
def test_invalid_radius_no_nhood_sum(self): with self.assertRaises(ValueError): MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0), NeighborhoodPolicy.Radius(radius=1, no_nhood_prob_of_arm=[0, 0]))
class BaseTest(unittest.TestCase): # A list of valid learning policies lps = [ LearningPolicy.EpsilonGreedy(), LearningPolicy.EpsilonGreedy(epsilon=0), LearningPolicy.EpsilonGreedy(epsilon=0.0), LearningPolicy.EpsilonGreedy(epsilon=0.5), LearningPolicy.EpsilonGreedy(epsilon=1), LearningPolicy.EpsilonGreedy(epsilon=1.0), LearningPolicy.Random(), LearningPolicy.Softmax(), LearningPolicy.Softmax(tau=0.1), LearningPolicy.Softmax(tau=0.5), LearningPolicy.Softmax(tau=1), LearningPolicy.Softmax(tau=1.0), LearningPolicy.Softmax(tau=5.0), LearningPolicy.ThompsonSampling(), LearningPolicy.UCB1(), LearningPolicy.UCB1(alpha=0), LearningPolicy.UCB1(alpha=0.0), LearningPolicy.UCB1(alpha=0.5), LearningPolicy.UCB1(alpha=1), LearningPolicy.UCB1(alpha=1.0), LearningPolicy.UCB1(alpha=5) ] para_lps = [ LearningPolicy.LinTS(alpha=0.00001, l2_lambda=1), LearningPolicy.LinTS(alpha=0.5, l2_lambda=1), LearningPolicy.LinTS(alpha=1, l2_lambda=1), LearningPolicy.LinTS(alpha=0.00001, l2_lambda=0.5), LearningPolicy.LinTS(alpha=0.5, l2_lambda=0.5), LearningPolicy.LinTS(alpha=1, l2_lambda=0.5), LearningPolicy.LinUCB(alpha=0, l2_lambda=1), LearningPolicy.LinUCB(alpha=0.5, l2_lambda=1), LearningPolicy.LinUCB(alpha=1, l2_lambda=1), LearningPolicy.LinUCB(alpha=0, l2_lambda=0.5), LearningPolicy.LinUCB(alpha=0.5, l2_lambda=0.5), LearningPolicy.LinUCB(alpha=1, l2_lambda=0.5) ] # A list of valid context policies nps = [ NeighborhoodPolicy.KNearest(), NeighborhoodPolicy.KNearest(k=1), NeighborhoodPolicy.KNearest(k=3), NeighborhoodPolicy.Radius(), NeighborhoodPolicy.Radius(2.5), NeighborhoodPolicy.Radius(5) ] cps = [ NeighborhoodPolicy.Clusters(), NeighborhoodPolicy.Clusters(n_clusters=3), NeighborhoodPolicy.Clusters(is_minibatch=True), NeighborhoodPolicy.Clusters(n_clusters=3, is_minibatch=True) ] @staticmethod def predict( arms: List[Arm], decisions: Union[List, np.ndarray, pd.Series], rewards: Union[List, np.ndarray, pd.Series], learning_policy: Union[LearningPolicy.EpsilonGreedy, LearningPolicy.Random, LearningPolicy.Softmax, LearningPolicy.ThompsonSampling, LearningPolicy.UCB1, LearningPolicy.LinTS, LearningPolicy.LinUCB], neighborhood_policy: Union[None, NeighborhoodPolicy.Clusters, NeighborhoodPolicy.Radius, NeighborhoodPolicy.KNearest] = None, context_history: Union[None, List[Num], List[List[Num]], np.ndarray, pd.DataFrame, pd.Series] = None, contexts: Union[None, List[Num], List[List[Num]], np.ndarray, pd.DataFrame, pd.Series] = None, seed: Optional[int] = 123456, num_run: Optional[int] = 1, is_predict: Optional[bool] = True, n_jobs: Optional[int] = 1, backend: Optional[str] = None ) -> (Union[Arm, List[Arm], List[float], List[List[float]]], MAB): """Sets up a MAB model and runs the given configuration. Return list of predictions or prediction and the mab instance, when is_predict is true Return list of expectations or expectation and the mab instance, when is predict is false Calls the predict or predict_expectation method num_run number of times. """ # Model mab = MAB(arms, learning_policy, neighborhood_policy, seed, n_jobs, backend) # Train mab.fit(decisions, rewards, context_history) # Test if is_predict: # Return: prediction(s) and the MAB instance predictions = [mab.predict(contexts) for _ in range(num_run)] return predictions[0] if num_run == 1 else predictions, mab else: # Return: expectations(s) and the MAB instance expectations = [ mab.predict_expectations(contexts) for _ in range(num_run) ] return expectations[0] if num_run == 1 else expectations, mab def assertListAlmostEqual(self, list1, list2): """ Asserts that floating values in the given lists (almost) equals to each other """ if not isinstance(list1, list): list1 = list(list1) if not isinstance(list2, list): list2 = list(list2) self.assertEqual(len(list1), len(list2)) for index, val in enumerate(list1): self.assertAlmostEqual(val, list2[index])
def test_invalid_metric(self): with self.assertRaises(ValueError): MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0), NeighborhoodPolicy.Radius(metric='linear'))
#################################### # Different Bandits for Simulation #################################### print('Starting simulation 1\n') def binarize(decision, reward): if decision == 0: return reward <= 50 else: return reward >= 220 n_jobs=2 contextual_mabs = [('Random', MAB([0, 1], LearningPolicy.Random(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)), ('UCB1', MAB([0, 1], LearningPolicy.UCB1(1), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)), ('ThompsonSampling', MAB([0, 1], LearningPolicy.ThompsonSampling(binarize), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)), ('EpsilonGreedy', MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=.15), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)), ('Softmax', MAB([0, 1], LearningPolicy.Softmax(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs))] context_free_mabs = [('Random', MAB([0, 1], LearningPolicy.Random(), n_jobs=n_jobs)), ('UCB1', MAB([0, 1], LearningPolicy.UCB1(1), n_jobs=n_jobs)), ('ThompsonSampling', MAB([0, 1], LearningPolicy.ThompsonSampling(binarize), n_jobs=n_jobs)), ('EpsilonGreedy', MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=.15), n_jobs=n_jobs)), ('Softmax', MAB([0, 1], LearningPolicy.Softmax(), n_jobs=n_jobs))] mixed = [('RandomRadius', MAB([0, 1], LearningPolicy.Random(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)), ('Random', MAB([0, 1], LearningPolicy.Random(), n_jobs=n_jobs))]
def test_invalid_radius_no_nhood_type_ann(self): with self.assertRaises(TypeError): MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0), NeighborhoodPolicy.LSHNearest(no_nhood_prob_of_arm={}))
def test_invalid_n_dimensions_value(self): with self.assertRaises(ValueError): MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0), NeighborhoodPolicy.LSHNearest(n_dimensions=0))
# Online update of model linucb.partial_fit(decisions=prediction, rewards=test_df_revenue, contexts=test) # Update the model with new arm linucb.add_arm(6) ################################################################### # LinUCB Learning Policy combined with Radius Neighborhood Policy ################################################################### # Radius context policy with radius equals to 1 and LinUCB learning with alpha of 1 radius = MAB(arms=ads, learning_policy=LearningPolicy.LinUCB(alpha=1), neighborhood_policy=NeighborhoodPolicy.Radius(radius=1)) # Learn from previous ads shown and revenues generated radius.fit(decisions=train_df['ad'], rewards=train_df['revenues'], contexts=train) # Predict the next best ad to show prediction = radius.predict(test) # Expectation of each ad based on learning from past ad revenues expectations = radius.predict_expectations(test) # Results print("Radius: ", prediction, " ", expectations) assert (prediction == [1, 2])