Exemplo n.º 1
0
 def test_invalid_arm_not_list(self):
     with self.assertRaises(TypeError):
         MAB(1, LearningPolicy.EpsilonGreedy(epsilon=0))
Exemplo n.º 2
0
 def test_invalid_clusters_num(self):
     with self.assertRaises(ValueError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0),
             NeighborhoodPolicy.Clusters(n_clusters=1))
Exemplo n.º 3
0
 def test_invalid_seed(self):
     with self.assertRaises(TypeError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(0), seed=[0, 1])
Exemplo n.º 4
0
 def test_invalid_minibatch(self):
     with self.assertRaises(TypeError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0),
             NeighborhoodPolicy.Clusters(minibatch=0))
Exemplo n.º 5
0
 def test_invalid_arm_list(self):
     with self.assertRaises(ValueError):
         MAB([0], LearningPolicy.EpsilonGreedy(epsilon=0))
Exemplo n.º 6
0
 def test_invalid_epsilon_type(self):
     with self.assertRaises(TypeError):
         MAB(['a', 'b'], LearningPolicy.EpsilonGreedy(epsilon="one"))
Exemplo n.º 7
0
})
test_df_revenue = pd.Series([7, 13])

# Scale the training and test data
scaler = StandardScaler()
train = scaler.fit_transform(train_df[['age', 'click_rate',
                                       'subscriber']].values.astype('float64'))
test = scaler.transform(test_df.values.astype('float64'))

########################################################
# Radius Neighborhood Policy with UCB1 Learning Policy
########################################################

# Radius contextual policy with radius equals to 5 and ucb1 learning with alpha 1.25
radius = MAB(arms=ads,
             learning_policy=LearningPolicy.UCB1(alpha=1.25),
             neighborhood_policy=NeighborhoodPolicy.Radius(radius=5))

# Learn from previous ads shown and revenues generated
radius.fit(decisions=train_df['ad'],
           rewards=train_df['revenues'],
           contexts=train)

# Predict the next best ad to show
prediction = radius.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = radius.predict_expectations(test)

# Results
print("Radius: ", prediction, " ", expectations)
Exemplo n.º 8
0
 def test_invalid_metric(self):
     with self.assertRaises(ValueError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0),
             NeighborhoodPolicy.Radius(metric='linear'))
Exemplo n.º 9
0
 def test_invalid_radius(self):
     with self.assertRaises(ValueError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0),
             NeighborhoodPolicy.Radius(radius=-1))
Exemplo n.º 10
0
 def test_invalid_tau_value(self):
     with self.assertRaises(ValueError):
         MAB(['a', 'b'], LearningPolicy.Softmax(tau=0))
Exemplo n.º 11
0
 def test_invalid_context_policy(self):
     with self.assertRaises(TypeError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0),
             LearningPolicy.EpsilonGreedy(epsilon=0))
Exemplo n.º 12
0
 def test_invalid_tau_type(self):
     with self.assertRaises(TypeError):
         MAB(['a', 'b'], LearningPolicy.Softmax(tau="one"))
Exemplo n.º 13
0
 def test_invalid_ucb_alpha_value(self):
     with self.assertRaises(ValueError):
         MAB(['a', 'b'], LearningPolicy.UCB1(alpha=-2))
Exemplo n.º 14
0
######################################################################################

# Arms
options = [1, 2]

# Historical data of layouts decisions and corresponding rewards
layouts = [1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1]
revenues = [10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10]

###################################
# Epsilon Greedy Learning Policy
###################################

# Epsilon Greedy learning policy with random exploration set to 15%
greedy = MAB(arms=options,
             learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.15),
             seed=123456)

# Learn from previous layouts decisions and revenues generated
greedy.fit(decisions=layouts, rewards=revenues)

# Predict the next best layouts decision
prediction = greedy.predict()

# Expected revenues of each layouts learnt from historical data based on epsilon greedy policy
expectations = greedy.predict_expectations()

# Results
print("Epsilon Greedy: ", prediction, " ", expectations)
assert(prediction == 1)
Exemplo n.º 15
0
 def test_invalid_learning_policy(self):
     with self.assertRaises(TypeError):
         MAB([0, 1], NeighborhoodPolicy.Radius(radius=12))
Exemplo n.º 16
0
 def test_invalid_radius_no_nhood_sum(self):
     with self.assertRaises(ValueError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0),
             NeighborhoodPolicy.Radius(radius=1,
                                       no_nhood_prob_of_arm=[0, 0]))
Exemplo n.º 17
0
 def test_invalid_decisions_rewards_length(self):
     decisions = [1, 1, 2, 2, 2, 3, 3]
     rewards = [0, 0, 0, 0, 0, 0, 1, 1, 1]
     mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0))
     with self.assertRaises(ValueError):
         mab.fit(decisions, rewards)
Exemplo n.º 18
0
 def test_invalid_k(self):
     with self.assertRaises(ValueError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0),
             NeighborhoodPolicy.KNearest(k=0))
Exemplo n.º 19
0
 def test_rewards_null_array(self):
     decisions = np.asarray([1, 1, 1, 2, 2, 2, 3, 3, 3])
     rewards = np.asarray([0, 0, 0, 0, 0, 0, 1, 1, None])
     mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0))
     with self.assertRaises(TypeError):
         mab.fit(decisions, rewards)
Exemplo n.º 20
0
    'subscriber': [0, 1]
})
test_df_revenue = pd.Series([7, 13])

# Scale the training and test data
scaler = StandardScaler()
train = scaler.fit_transform(train_df[['age', 'click_rate',
                                       'subscriber']].values.astype('float64'))
test = scaler.transform(test_df.values.astype('float64'))

##################################################
# Linear Upper Confidence Bound Learning Policy
##################################################

# LinUCB learning policy with alpha 1.25 and l2_lambda 1
linucb = MAB(arms=ads,
             learning_policy=LearningPolicy.LinUCB(alpha=1.25, l2_lambda=1))

# Learn from previous ads shown and revenues generated
linucb.fit(decisions=train_df['ad'],
           rewards=train_df['revenues'],
           contexts=train)

# Predict the next best ad to show
prediction = linucb.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = linucb.predict_expectations(test)

# Results
print("LinUCB: ", prediction, " ", expectations)
assert (prediction == [5, 2])