示例#1
0
 def test_invalid_no_context_history(self):
     decisions = [1, 1, 1]
     rewards = [0, 0, 0]
     mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0),
               NeighborhoodPolicy.Radius(2))
     with self.assertRaises(TypeError):
         mab.fit(decisions, rewards)
示例#2
0
 def test_invalid_no_context_policy(self):
     decisions = [1, 1, 1]
     rewards = [0, 0, 0]
     context_history = [[1, 1, 1], [1, 1, 1], [1, 1, 1]]
     mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0))
     with self.assertRaises(TypeError):
         mab.fit(decisions, rewards, context_history)
示例#3
0
 def test_rewards_inf_df(self):
     history = pd.DataFrame({
         'decision': [1, 1, 1, 2, 2, 2, 3, 3, 3],
         'reward': [0, 0, 0, 0, 0, 0, 1, 1, np.inf]
     })
     mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0))
     with self.assertRaises(TypeError):
         mab.fit(history['decision'], history['reward'])
示例#4
0
    def predict(
        arms: List[Arm],
        decisions: Union[List, np.ndarray, pd.Series],
        rewards: Union[List, np.ndarray, pd.Series],
        learning_policy: Union[LearningPolicy.EpsilonGreedy,
                               LearningPolicy.Random, LearningPolicy.Softmax,
                               LearningPolicy.ThompsonSampling,
                               LearningPolicy.UCB1, LearningPolicy.LinTS,
                               LearningPolicy.LinUCB],
        neighborhood_policy: Union[None, NeighborhoodPolicy.Clusters,
                                   NeighborhoodPolicy.Radius,
                                   NeighborhoodPolicy.KNearest] = None,
        context_history: Union[None, List[Num], List[List[Num]], np.ndarray,
                               pd.DataFrame, pd.Series] = None,
        contexts: Union[None, List[Num], List[List[Num]], np.ndarray,
                        pd.DataFrame, pd.Series] = None,
        seed: Optional[int] = 123456,
        num_run: Optional[int] = 1,
        is_predict: Optional[bool] = True,
        n_jobs: Optional[int] = 1,
        backend: Optional[str] = None
    ) -> (Union[Arm, List[Arm], List[float], List[List[float]]], MAB):
        """Sets up a MAB model and runs the given configuration.

        Return list of predictions or prediction and the mab instance, when is_predict is true
        Return list of expectations or expectation and the mab instance, when is predict is false

        Calls the predict or predict_expectation method num_run number of times.
        """

        # Model
        mab = MAB(arms, learning_policy, neighborhood_policy, seed, n_jobs,
                  backend)

        # Train
        mab.fit(decisions, rewards, context_history)

        # Test
        if is_predict:

            # Return: prediction(s) and the MAB instance
            predictions = [mab.predict(contexts) for _ in range(num_run)]
            return predictions[0] if num_run == 1 else predictions, mab

        else:

            # Return: expectations(s) and the MAB instance
            expectations = [
                mab.predict_expectations(contexts) for _ in range(num_run)
            ]
            return expectations[0] if num_run == 1 else expectations, mab
示例#5
0
    def test_popularity(self):

        list_of_arms = ['Arm1', 'Arm2']
        decisions = ['Arm1', 'Arm1', 'Arm2', 'Arm1']
        rewards = [20, 17, 25, 9]
        mab = MAB(list_of_arms, LearningPolicy.Popularity())
        mab.fit(decisions, rewards)
        mab.predict()
        self.assertEqual("Arm2", mab.predict())
        self.assertDictEqual(
            {
                'Arm1': 0.38016528925619836,
                'Arm2': 0.6198347107438016
            }, mab.predict_expectations())
示例#6
0
    def test_partial_fit_indices(self):
        seed = 11
        n_dimensions = 5
        n_tables = 5
        rng = np.random.RandomState(seed)
        contexts = np.array([[rng.rand() for _ in range(7)] for _ in range(10)])
        decisions = np.array([rng.randint(0, 2) for _ in range(10)])
        rewards = np.array([rng.rand() for _ in range(10)])
        lsh = MAB(arms=[0, 1], learning_policy=LearningPolicy.Softmax(),
                  neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions, n_tables),
                  seed=seed)
        lsh.fit(decisions, rewards, contexts)
        contexts2 = np.array([[rng.rand() for _ in range(7)] for _ in range(10)])
        decisions2 = np.array([rng.randint(0, 2) for _ in range(10)])
        rewards2 = np.array([rng.rand() for _ in range(10)])
        lsh.partial_fit(decisions2, rewards2, contexts2)

        self.assertListEqual(lsh._imp.table_to_hash_to_index[0][4], [1, 15, 16])
        self.assertListEqual(lsh._imp.table_to_hash_to_index[0][12], [9, 10, 11, 19])
示例#7
0
    def test_tables(self):
        seed = 11
        n_dimensions = 5
        n_tables = 5
        rng = np.random.RandomState(seed)
        contexts = np.array([[rng.rand() for _ in range(7)] for _ in range(10)])
        decisions = np.array([rng.randint(0, 2) for _ in range(10)])
        rewards = np.array([rng.rand() for _ in range(10)])
        lsh = MAB(arms=[0, 1], learning_policy=LearningPolicy.Softmax(),
                  neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions, n_tables),
                  seed=seed)
        for i in range(n_tables):
            self.assertListEqual([], lsh._imp.table_to_plane[i])

        lsh.fit(decisions, rewards, contexts)
        self.assertListAlmostEqual(list(lsh._imp.table_to_plane[0][0]),
                             [1.74945474, -0.286073, -0.48456513, -2.65331856, -0.00828463])
        self.assertListEqual(list(lsh._imp.table_to_hash_to_index[0].keys()), [1, 4, 5, 12, 13, 14, 15])
        self.assertListEqual(lsh._imp.table_to_hash_to_index[0][1], [3])
        self.assertListEqual(lsh._imp.table_to_hash_to_index[0][14], [0, 4, 8])
示例#8
0
 def test_rewards_inf_array(self):
     decisions = np.asarray([1, 1, 1, 2, 2, 2, 3, 3, 3])
     rewards = np.asarray([0, 0, 0, 0, 0, 0, 1, 1, np.inf])
     mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0))
     with self.assertRaises(TypeError):
         mab.fit(decisions, rewards)
示例#9
0
 def test_rewards_null_list(self):
     decisions = [1, 1, 1, 2, 2, 2, 3, 3, 3]
     rewards = [0, 0, 0, 0, 0, 0, 1, 1, None]
     mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0))
     with self.assertRaises(TypeError):
         mab.fit(decisions, rewards)
示例#10
0
 def test_invalid_decisions_rewards_length(self):
     decisions = [1, 1, 2, 2, 2, 3, 3]
     rewards = [0, 0, 0, 0, 0, 0, 1, 1, 1]
     mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0))
     with self.assertRaises(ValueError):
         mab.fit(decisions, rewards)
示例#11
0
train = scaler.fit_transform(train_df[['age', 'click_rate',
                                       'subscriber']].values.astype('float64'))
test = scaler.transform(test_df.values.astype('float64'))

########################################################
# Radius Neighborhood Policy with UCB1 Learning Policy
########################################################

# Radius contextual policy with radius equals to 5 and ucb1 learning with alpha 1.25
radius = MAB(arms=ads,
             learning_policy=LearningPolicy.UCB1(alpha=1.25),
             neighborhood_policy=NeighborhoodPolicy.Radius(radius=5))

# Learn from previous ads shown and revenues generated
radius.fit(decisions=train_df['ad'],
           rewards=train_df['revenues'],
           contexts=train)

# Predict the next best ad to show
prediction = radius.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = radius.predict_expectations(test)

# Results
print("Radius: ", prediction, " ", expectations)
assert (prediction == [4, 4])

# Online update of model
radius.partial_fit(decisions=prediction,
                   rewards=test_df_revenue,
示例#12
0
# Historical data of layouts decisions and corresponding rewards
layouts = [1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1]
revenues = [10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10]

###################################
# Epsilon Greedy Learning Policy
###################################

# Epsilon Greedy learning policy with random exploration set to 15%
greedy = MAB(arms=options,
             learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.15),
             seed=123456)

# Learn from previous layouts decisions and revenues generated
greedy.fit(decisions=layouts, rewards=revenues)

# Predict the next best layouts decision
prediction = greedy.predict()

# Expected revenues of each layouts learnt from historical data based on epsilon greedy policy
expectations = greedy.predict_expectations()

# Results
print("Epsilon Greedy: ", prediction, " ", expectations)
assert (prediction == 1)

# Additional historical data becomes available which allows _online learning
additional_layouts = [1, 2, 1, 2]
additional_revenues = [0, 12, 7, 19]
示例#13
0
arm_to_scaler = {}
for arm in arms:
    # Get indices for arm
    indices = np.where(decisions_train == arm)

    # Fit standard scaler
    scaler = StandardScaler()
    scaler.fit(contexts[indices])
    arm_to_scaler[arm] = scaler

########################################################
# LinUCB Learning Policy
########################################################

# LinUCB learning policy with alpha 1.25 and n_jobs = -1 (maximum available cores)
linucb = MAB(arms=arms,
             learning_policy=LearningPolicy.LinUCB(
                 alpha=1.25, arm_to_scaler=arm_to_scaler),
             n_jobs=-1)

# Learn from playlists shown and observed click rewards for each arm
linucb.fit(decisions=decisions_train,
           rewards=rewards_train,
           contexts=contexts_train)

# Predict the next best playlist to recommend
prediction = linucb.predict(contexts_test)

# Results
print("LinUCB: ", prediction[:10])
示例#14
0
scaler = StandardScaler()
train = scaler.fit_transform(train_df[['age', 'click_rate',
                                       'subscriber']].values.astype('float64'))
test = scaler.transform(test_df.values.astype('float64'))

##################################################
# Linear Upper Confidence Bound Learning Policy
##################################################

# LinUCB learning policy with alpha 1.25 and l2_lambda 1
linucb = MAB(arms=ads,
             learning_policy=LearningPolicy.LinUCB(alpha=1.25, l2_lambda=1))

# Learn from previous ads shown and revenues generated
linucb.fit(decisions=train_df['ad'],
           rewards=train_df['revenues'],
           contexts=train)

# Predict the next best ad to show
prediction = linucb.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = linucb.predict_expectations(test)

# Results
print("LinUCB: ", prediction, " ", expectations)
assert (prediction == [5, 2])

# Online update of model
linucb.partial_fit(decisions=prediction,
                   rewards=test_df_revenue,