Python MAB.add_arm示例

编程语言: Python

命名空间/包名称: mabwiser.mab

类/类型: MAB

方法/功能: add_arm

hotexamples.com的示例: 6

Python MAB.add_arm - 已找到6个示例。这些是从开源项目中提取的最受好评的mabwiser.mab.MAB.add_arm现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

MAB(20)

fit(14)

predict(8)

predict_expectations(7)

add_arm(6)

partial_fit(4)

_convert_array(1)

_convert_matrix(1)

_imp(1)

示例#1

显示文件

文件： test_invalid.py 项目： BigRLab/mabwiser

    def test_invalid_add_arm_scaler(self):

        scaler = StandardScaler()
        arm_to_scaler = {0: deepcopy(scaler), 1: deepcopy(scaler)}
        mab = MAB([0, 1], LearningPolicy.LinUCB(arm_to_scaler=arm_to_scaler))
        with self.assertRaises(TypeError):
            mab.add_arm(2, scaler=deepcopy(scaler))

示例#2

显示文件

 def test_add_arm_scaler(self):
     scaler = StandardScaler()
     scaler.fit(
         np.array([[1, 2, 3, 4, 5], [5, 4, 3, 2, 1]]).astype('float64'))
     arm_to_scaler = {0: deepcopy(scaler), 1: deepcopy(scaler)}
     mab = MAB([0, 1], LearningPolicy.LinUCB(arm_to_scaler=arm_to_scaler))
     mab.add_arm(2, scaler=deepcopy(scaler))

示例#3

显示文件

文件： test_invalid.py 项目： BigRLab/mabwiser

 def test_invalid_add_arm(self):
     mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0))
     with self.assertRaises(ValueError):
         mab.add_arm(None)
     with self.assertRaises(ValueError):
         mab.add_arm(np.nan)
     with self.assertRaises(ValueError):
         mab.add_arm(np.inf)
     with self.assertRaises(ValueError):
         mab.add_arm(3)

示例#4

显示文件

文件： contextual_mab.py 项目： ccoffrin/mabwiser

prediction = radius.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = radius.predict_expectations(test)

# Results
print("Radius: ", prediction, " ", expectations)
assert (prediction == [4, 4])

# Online update of model
radius.partial_fit(decisions=prediction,
                   rewards=test_df_revenue,
                   contexts=test)

# Updating of the model with new arm
radius.add_arm(6)

########################################################
# KNearest Neighborhood Policy with UCB1 Learning Policy
########################################################

# KNearest context policy with k equals to 5 and ucb1 learning with alpha of 1.25
knearest = MAB(arms=ads,
               learning_policy=LearningPolicy.UCB1(alpha=1.25),
               neighborhood_policy=NeighborhoodPolicy.KNearest(k=5))

# Learn from previous ads shown and revenues generated
knearest.fit(decisions=train_df['ad'],
             rewards=train_df['revenues'],
             contexts=train)

示例#5

显示文件

# Expected revenues of each layouts learnt from historical data based on epsilon greedy policy
expectations = greedy.predict_expectations()

# Results
print("Epsilon Greedy: ", prediction, " ", expectations)
assert (prediction == 1)

# Additional historical data becomes available which allows _online learning
additional_layouts = [1, 2, 1, 2]
additional_revenues = [0, 12, 7, 19]

# Online updating of the model
greedy.partial_fit(additional_layouts, additional_revenues)

# Adding a new layout option
greedy.add_arm(3)

#################################################
# Randomized Popularity Learning Policy
#################################################

# Randomized Popularity learning policy that select arms
# with weighted probability based on the mean reward for each arm
popularity = MAB(arms=options,
                 learning_policy=LearningPolicy.Popularity(),
                 seed=123456)

# Learn from previous layouts decisions and revenues generated
popularity.fit(decisions=layouts, rewards=revenues)

# Predict the next best layouts decision

示例#6

显示文件

文件： parametric_mab.py 项目： ccoffrin/mabwiser

prediction = linucb.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = linucb.predict_expectations(test)

# Results
print("LinUCB: ", prediction, " ", expectations)
assert (prediction == [5, 2])

# Online update of model
linucb.partial_fit(decisions=prediction,
                   rewards=test_df_revenue,
                   contexts=test)

# Update the model with new arm
linucb.add_arm(6)

###################################################################
# LinUCB Learning Policy combined with Radius Neighborhood Policy
###################################################################

# Radius context policy with radius equals to 1 and LinUCB learning with alpha of 1
radius = MAB(arms=ads,
             learning_policy=LearningPolicy.LinUCB(alpha=1),
             neighborhood_policy=NeighborhoodPolicy.Radius(radius=1))

# Learn from previous ads shown and revenues generated
radius.fit(decisions=train_df['ad'],
           rewards=train_df['revenues'],
           contexts=train)