示例#1
0
    def test_invalid_add_arm_scaler(self):

        scaler = StandardScaler()
        arm_to_scaler = {0: deepcopy(scaler), 1: deepcopy(scaler)}
        mab = MAB([0, 1], LearningPolicy.LinUCB(arm_to_scaler=arm_to_scaler))
        with self.assertRaises(TypeError):
            mab.add_arm(2, scaler=deepcopy(scaler))
示例#2
0
 def test_add_arm_scaler(self):
     scaler = StandardScaler()
     scaler.fit(
         np.array([[1, 2, 3, 4, 5], [5, 4, 3, 2, 1]]).astype('float64'))
     arm_to_scaler = {0: deepcopy(scaler), 1: deepcopy(scaler)}
     mab = MAB([0, 1], LearningPolicy.LinUCB(arm_to_scaler=arm_to_scaler))
     mab.add_arm(2, scaler=deepcopy(scaler))
示例#3
0
 def test_invalid_add_arm(self):
     mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0))
     with self.assertRaises(ValueError):
         mab.add_arm(None)
     with self.assertRaises(ValueError):
         mab.add_arm(np.nan)
     with self.assertRaises(ValueError):
         mab.add_arm(np.inf)
     with self.assertRaises(ValueError):
         mab.add_arm(3)
示例#4
0
prediction = radius.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = radius.predict_expectations(test)

# Results
print("Radius: ", prediction, " ", expectations)
assert (prediction == [4, 4])

# Online update of model
radius.partial_fit(decisions=prediction,
                   rewards=test_df_revenue,
                   contexts=test)

# Updating of the model with new arm
radius.add_arm(6)

########################################################
# KNearest Neighborhood Policy with UCB1 Learning Policy
########################################################

# KNearest context policy with k equals to 5 and ucb1 learning with alpha of 1.25
knearest = MAB(arms=ads,
               learning_policy=LearningPolicy.UCB1(alpha=1.25),
               neighborhood_policy=NeighborhoodPolicy.KNearest(k=5))

# Learn from previous ads shown and revenues generated
knearest.fit(decisions=train_df['ad'],
             rewards=train_df['revenues'],
             contexts=train)
示例#5
0
# Expected revenues of each layouts learnt from historical data based on epsilon greedy policy
expectations = greedy.predict_expectations()

# Results
print("Epsilon Greedy: ", prediction, " ", expectations)
assert (prediction == 1)

# Additional historical data becomes available which allows _online learning
additional_layouts = [1, 2, 1, 2]
additional_revenues = [0, 12, 7, 19]

# Online updating of the model
greedy.partial_fit(additional_layouts, additional_revenues)

# Adding a new layout option
greedy.add_arm(3)

#################################################
# Randomized Popularity Learning Policy
#################################################

# Randomized Popularity learning policy that select arms
# with weighted probability based on the mean reward for each arm
popularity = MAB(arms=options,
                 learning_policy=LearningPolicy.Popularity(),
                 seed=123456)

# Learn from previous layouts decisions and revenues generated
popularity.fit(decisions=layouts, rewards=revenues)

# Predict the next best layouts decision
示例#6
0
prediction = linucb.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = linucb.predict_expectations(test)

# Results
print("LinUCB: ", prediction, " ", expectations)
assert (prediction == [5, 2])

# Online update of model
linucb.partial_fit(decisions=prediction,
                   rewards=test_df_revenue,
                   contexts=test)

# Update the model with new arm
linucb.add_arm(6)

###################################################################
# LinUCB Learning Policy combined with Radius Neighborhood Policy
###################################################################

# Radius context policy with radius equals to 1 and LinUCB learning with alpha of 1
radius = MAB(arms=ads,
             learning_policy=LearningPolicy.LinUCB(alpha=1),
             neighborhood_policy=NeighborhoodPolicy.Radius(radius=1))

# Learn from previous ads shown and revenues generated
radius.fit(decisions=train_df['ad'],
           rewards=train_df['revenues'],
           contexts=train)