def test_numpy_rewards(self): exp, mab = self.predict( arms=["one", "two"], decisions=["one", "one", "one", "two", "two", "two"], rewards=np.array([1, 1, 1, 0, 1, 1]), learning_policy=LearningPolicy.Popularity(), seed=123456, num_run=1, is_predict=False) # Initial probabilities self.assertAlmostEqual(1.0, exp["one"] + exp["two"]) self.assertAlmostEqual(exp["one"], 0.6) self.assertAlmostEqual(exp["two"], 0.4)
def test_3arm_equal_prob(self): arm, mab = self.predict(arms=[1, 2, 3], decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3], rewards=[1, 1, 1, 1, 1, 1, 1, 1, 1], learning_policy=LearningPolicy.Popularity(), seed=123456, num_run=5, is_predict=True) self.assertEqual(arm, [1, 3, 1, 3, 2]) exp, mab = self.predict(arms=[1, 2, 3], decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3], rewards=[1, 1, 1, 1, 1, 1, 1, 1, 1], learning_policy=LearningPolicy.Popularity(), seed=123456, num_run=1, is_predict=False) # Check that normalized probabilities are 1/3 each, and sum up to 1.0 self.assertAlmostEqual(1.0, exp[1] + exp[2] + exp[3]) self.assertAlmostEqual(exp[1], exp[2]) self.assertAlmostEqual(exp[2], exp[3]) self.assertAlmostEqual(exp[3], exp[1])
def test_unused_arm(self): exp, mab = self.predict(arms=[1, 2, 3], decisions=[1, 1, 1, 2, 2, 2], rewards=[1, 1, 1, 0, 1, 1], learning_policy=LearningPolicy.Popularity(), seed=123456, num_run=1, is_predict=False) # Initial probabilities self.assertAlmostEqual(1.0, exp[1] + exp[2] + exp[3]) self.assertAlmostEqual(exp[1], 0.6) self.assertAlmostEqual(exp[2], 0.4) self.assertAlmostEqual(exp[3], 0.0)
def test_tau1_expectations(self): arm, mab = self.predict(arms=[1, 2, 3], decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3], rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1], learning_policy=LearningPolicy.Softmax(tau=1), seed=123456, num_run=1, is_predict=False) self.assertDictEqual(arm, { 1: 0.4083425853583662, 2: 0.20965007375301267, 3: 0.3820073408886212 })
def test_ucb_t5(self): arm, mab = self.predict(arms=['one', 'two', 'three'], decisions=[ 'one', 'one', 'one', 'three', 'two', 'two', 'three', 'one', 'three', 'two' ], rewards=[1, 0, 1, 0, 1, 0, 1, 1, 1, 0], learning_policy=LearningPolicy.UCB1(alpha=1), seed=23, num_run=4, is_predict=True) self.assertEqual(len(arm), 4) self.assertEqual(arm, ['three', 'three', 'three', 'three'])
def test_greedy_t6(self): arms, mab = self.predict( arms=['one', 'two', 'three'], decisions=[ 'one', 'one', 'one', 'three', 'two', 'two', 'three', 'one', 'three', 'two' ], rewards=[2, 7, 7, 9, 1, 3, 1, 2, 6, 4], learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.5), seed=17, num_run=4, is_predict=True) self.assertEqual(arms, ['three', 'one', 'three', 'one'])
def test_greedy_t5(self): arms, mab = self.predict( arms=['one', 'two', 'three'], decisions=[ 'one', 'one', 'one', 'three', 'two', 'two', 'three', 'one', 'three', 'two' ], rewards=[1, 0, 1, 0, 1, 0, 1, 1, 1, 0], learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.25), seed=123456, num_run=4, is_predict=True) self.assertEqual(arms, ['three', 'one', 'one', 'one'])
def test_thompson_t5(self): arms, mab = self.predict( arms=['one', 'two', 'three'], decisions=[ 'one', 'one', 'one', 'three', 'two', 'two', 'three', 'one', 'three', 'two' ], rewards=[1, 0, 1, 0, 1, 0, 1, 1, 1, 0], learning_policy=LearningPolicy.ThompsonSampling(), seed=123456, num_run=4, is_predict=True) self.assertEqual(arms, ['one', 'one', 'one', 'three'])
def test_add_arm(self): arms, mab = self.predict( arms=[1, 2, 3], decisions=[1, 1, 1, 3, 2, 2, 3, 1, 3], rewards=[0, 1, 1, 0, 1, 0, 1, 1, 1], learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.25), seed=123456, num_run=4, is_predict=True) mab.add_arm(4) self.assertTrue(4 in mab.arms) self.assertTrue(4 in mab._imp.arms) self.assertTrue(4 in mab._imp.arm_to_expectation.keys()) self.assertTrue(mab._imp.arm_to_sum[4] == 0)
def test_seed_epsilon50(self): arms, mab = self.predict( arms=[1, 2, 3], decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3], rewards=[0, 0, 0, 0, 0, 0, 1, 1, 1], learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.5), seed=123456, num_run=5, is_predict=True) self.assertListEqual(arms, [3, 3, 3, 3, 3]) # change seed and assert a different result arms, mab = self.predict( arms=[1, 2, 3], decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3], rewards=[0, 0, 0, 0, 0, 0, 1, 1, 1], learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.5), seed=123, num_run=5, is_predict=True) self.assertListEqual(arms, [3, 1, 3, 3, 2])
def test_softmax_t5(self): arm, mab = self.predict( arms=['one', 'two', 'three'], decisions=[ 'one', 'one', 'one', 'three', 'two', 'two', 'three', 'one', 'three', 'two' ], rewards=[1, 0, 1, 0, 1, 0, 1, 1, 1, 0], learning_policy=LearningPolicy.Softmax(tau=1.5), seed=123456, num_run=4, is_predict=True) self.assertEqual(arm, ['one', 'three', 'one', 'three'])
def test_approximate(self): train_df = pd.DataFrame({ 'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5], 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10], 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38], 'click_rate': [ 0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17, 0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83 ], 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0] }) # Test data to for new prediction test_df = pd.DataFrame({ 'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1] }) test_df_revenue = pd.Series([7, 13]) # Scale the data scaler = StandardScaler() train = scaler.fit_transform( np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64')) test = scaler.transform(np.asarray(test_df, dtype='float64')) arms, mab = self.predict( arms=[1, 2, 3, 4, 5], decisions=train_df['ad'], rewards=train_df['revenues'], learning_policy=LearningPolicy.UCB1(alpha=1.25), neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_tables=5, n_dimensions=5), context_history=train, contexts=test, seed=123456, num_run=1, is_predict=True) self.assertEqual(arms, [1, 1]) mab.partial_fit(decisions=arms, rewards=test_df_revenue, contexts=test) mab.add_arm(6) self.assertTrue(6 in mab.arms) self.assertTrue(6 in mab._imp.arm_to_expectation.keys())
def test_ucb_t6(self): arm, mab = self.predict( arms=['one', 'two', 'three'], decisions=[ 'one', 'one', 'one', 'three', 'two', 'two', 'three', 'one', 'three', 'two' ], rewards=[2, 7, 7, 9, 1, 3, 1, 2, 6, 4], learning_policy=LearningPolicy.UCB1(alpha=1.25), seed=17, num_run=4, is_predict=True) self.assertEqual(len(arm), 4) self.assertEqual(arm, ['three', 'three', 'three', 'three'])
def test_softmax_t9(self): # Dates to test a = datetime.datetime(2018, 1, 1) b = datetime.datetime(2017, 7, 31) c = datetime.datetime(2018, 9, 15) arm, mab = self.predict(arms=[a, b, c], decisions=[a, b, c, a, b, c, a, b, c, a], rewards=[1.25, 0.7, 12, 10, 1.43, 0.2, -1, -10, 4, 0], learning_policy=LearningPolicy.Softmax(tau=1.25), seed=123456, num_run=4, is_predict=True) self.assertEqual(arm, [a, c, a, c])
def test_add_arm(self): arm, mab = self.predict(arms=[1, 2, 3, 4], decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3], rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1], learning_policy=LearningPolicy.Softmax(tau=1), seed=123456, num_run=1, is_predict=False) mab.add_arm(5) self.assertTrue(5 in mab.arms) self.assertTrue(5 in mab._imp.arms) self.assertTrue(5 in mab._imp.arm_to_expectation.keys()) self.assertTrue(mab._imp.arm_to_mean[5] == 0) self.assertTrue(mab._imp.arm_to_expectation[4] == mab._imp.arm_to_expectation[5])
def test_greedy_t10(self): # Dates for testing a = datetime.datetime(2018, 1, 1) b = datetime.datetime(2017, 7, 31) c = datetime.datetime(2018, 9, 15) arms, mab = self.predict( arms=[a, b, c], decisions=[a, b, c, a, b, c, a, b, c, a, b, b], rewards=[7, 12, 1, -10, 5, 1, 2, 9, 3, 3, 6, 7], learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.33), seed=7, num_run=4, is_predict=True) self.assertEqual(arms, [b, a, a, c])
def test_softmax_t10(self): # Dates to test a = datetime.datetime(2018, 1, 1) b = datetime.datetime(2017, 7, 31) c = datetime.datetime(2018, 9, 15) arm, mab = self.predict(arms=[a, b, c], decisions=[a, b, c, a, b, c, a, b, c, a, b, b], rewards=[7, 12, 1, -10, 5, 1, 2, 9, 3, 3, 6, 7], learning_policy=LearningPolicy.Softmax(tau=0.33), seed=7, num_run=5, is_predict=True) self.assertEqual(arm, [b, b, b, b, c])
def test_ts_series(self): df = pd.DataFrame({ 'layouts': [1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1], 'revenues': [10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10] }) arm, mab = self.predict( arms=[1, 2], decisions=df['layouts'], rewards=df['revenues'], learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.15), seed=123456, num_run=1, is_predict=True) self.assertEqual(arm, 1)
def test_thompson_df_list(self): df = pd.DataFrame({ "decisions": [1, 1, 1, 2, 2, 2, 3, 3, 3], "rewards": [0, 0, 0, 0, 0, 0, 1, 1, 1] }) arms, mab = self.predict( arms=[1, 2, 3], decisions=df["decisions"], rewards=[0, 0, 1, 0, 1, 0, 1, 1, 1], learning_policy=LearningPolicy.ThompsonSampling(), seed=123456, num_run=4, is_predict=True) self.assertEqual(arms, [2, 3, 3, 3])
def test_thompson_t4(self): dec_to_threshold = {1: 5, 2: 5, 4: 5} def binarize(dec, reward): return reward >= dec_to_threshold[dec] arms, mab = self.predict( arms=[1, 2, 4], decisions=[1, 1, 4, 4, 2, 2, 1, 1, 4, 2, 1, 4, 1, 2, 4], rewards=[7, 9, 10, 20, 2, 5, 8, 15, 17, 11, 0, 5, 2, 9, 3], learning_policy=LearningPolicy.ThompsonSampling(binarize), seed=23, num_run=4, is_predict=True) self.assertEqual(arms, [2, 2, 4, 2])
def test_greedy_t9(self): # Dates for testing a = datetime.datetime(2018, 1, 1) b = datetime.datetime(2017, 7, 31) c = datetime.datetime(2018, 9, 15) arms, mab = self.predict( arms=[a, b, c], decisions=[a, b, c, a, b, c, a, b, c, a], rewards=[1.25, 0.7, 12, 10, 1.43, 0.2, -1, -10, 4, 0], learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.25), seed=123456, num_run=4, is_predict=True) self.assertEqual(arms, [c, c, c, c])
def test_thompson_t8(self): dec_to_threshold = {'a': 1, 'b': 1, 'c': 1} def binarize(dec, reward): return reward >= dec_to_threshold[dec] arms, mab = self.predict( arms=['a', 'b', 'c'], decisions=['a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c', 'a'], rewards=[-1.25, 0.7, 12, 10, 12, 9.2, -1, -10, 4, 0], learning_policy=LearningPolicy.ThompsonSampling(binarize), seed=9, num_run=5, is_predict=True) self.assertEqual(arms, ['c', 'c', 'c', 'c', 'c'])
def test_add_arm_new_function(self): def bin1(dec, reward): if dec == 0: if reward > 50: return 1 else: return 0 elif dec == 1: if reward < 20: return 1 else: return 0 arm, mab = self.predict( arms=[0, 1], decisions=[1, 0, 1, 1, 0], rewards=[10, 4, 3, 70, 6], learning_policy=LearningPolicy.ThompsonSampling(bin1), seed=123456, num_run=1, is_predict=True) self.assertIs(mab._imp.binarizer, bin1) def bin2(dec, reward): if dec == 0: if reward > 50: return 1 else: return 0 elif dec == 1: if reward < 20: return 1 else: return 0 elif dec == 2: if reward >= 1: return 1 else: return 0 mab.add_arm(2, bin2) self.assertTrue(mab._imp.arm_to_fail_count[2] == 1) self.assertTrue(mab._imp.arm_to_success_count[2] == 1) self.assertIs(mab._imp.binarizer, bin2)
def test_ucb_t10(self): # Dates to test a = datetime.datetime(2018, 1, 1) b = datetime.datetime(2017, 7, 31) c = datetime.datetime(2018, 9, 15) arm, mab = self.predict(arms=[a, b, c], decisions=[a, b, c, a, b, c, a, b, c, a, b, b], rewards=[7, 12, 1, -10, 5, 1, 2, 9, 3, 3, 6, 7], learning_policy=LearningPolicy.UCB1(alpha=1), seed=7, num_run=4, is_predict=True) self.assertEqual(len(arm), 4) self.assertEqual(arm, [b, b, b, b])
def test_epsilon25_df_list(self): df = pd.DataFrame({ "decisions": [1, 1, 1, 2, 2, 2, 3, 3, 3], "rewards": [0, 0, 0, 0, 0, 0, 1, 1, 1] }) arms, mab = self.predict( arms=[1, 2, 3], decisions=df["decisions"], rewards=[0, 0, 0, 0, 0, 0, 1, 1, 1], learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.25), seed=7, num_run=4, is_predict=True) self.assertEqual(arms, [2, 3, 3, 3])
def test_greedy0_d2(self): arms, mab = self.predict(arms=[1, 2, 3, 4], decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3], rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1], learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0), neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions=2), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=1, is_predict=True) self.assertListEqual(arms, [3, 1])