Пример #1
0
    def test_create_args(self):
        actual = VowpalLearner(
            "--cb_explore_adf --interactions sa --ignore_linear s --bag 2 --random_seed 1"
        )._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions sa --ignore_linear s --bag 2 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(
            "--cb_explore 10 --interactions sa --ignore_linear s --bag 2 --random_seed 1"
        )._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions sa --ignore_linear s --bag 2 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(
            "--cb_explore --interactions sa --ignore_linear s --bag 2 --random_seed 1"
        )._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions sa --ignore_linear s --bag 2 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(
            "--cb_explore_adf --interactions sa --ignore_linear s --bag 2"
        )._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions sa --ignore_linear s --bag 2"

        self.assertEqual(actual, expected)
Пример #2
0
    def test_create_cover(self):
        actual = VowpalLearner(cover=2)._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --cover 2 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(cover=2, seed=10)._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --cover 2 --random_seed 10"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(cover=2, seed=None)._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --cover 2"

        self.assertEqual(actual, expected)
Пример #3
0
    def test_create_softmax(self):
        actual = VowpalLearner(softmax=0.5)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --softmax --lambda 0.5 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(softmax=0.5, seed=10)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --softmax --lambda 0.5 --random_seed 10"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(softmax=0.5,
                               seed=None)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --softmax --lambda 0.5"

        self.assertEqual(actual, expected)
Пример #4
0
    def test_predict_epsilon_dict_context_adf(self):
        learner = VowpalLearner(epsilon=0.05, adf=True, seed=20)

        self.assertEqual([0.25, 0.25, 0.25, 0.25],
                         learner.predict(1, {
                             1: 10.2,
                             2: 3.5
                         }, [1, 2, 3, 4]))  #type: ignore
Пример #5
0
    def __init__(self, delay: int, batchsize: int, epsilon: float, seed: int, flags: str):
        self.learner = VowpalLearner(seed=seed, epsilon=epsilon, flags=flags)
        self.batchsize = batchsize
        self.delay = delay
        self.epsilon = epsilon
        self.seed = seed
        self.flags = flags
        self.mem = {}

        assert self.delay % self.batchsize == 0
Пример #6
0
    def test_create_bag(self):
        actual = VowpalLearner(bag=2)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --bag 2 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(bag=2, seed=10)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --bag 2 --random_seed 10"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(bag=2, seed=None)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --bag 2"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(bag=2, adf=False)._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --bag 2 --random_seed 1"

        self.assertEqual(actual, expected)
Пример #7
0
    def test_predict_epsilon_not_adf_args_error_2(self):
        learner = VowpalLearner("--cb_explore --epsilon 0.75 --random_seed 20")
        self.assertEqual(
            [0.25 + 0.25 * 0.75, 0.25 * 0.75, 0.25 * 0.75, 0.25 * 0.75],
            learner.predict(1, None, [1, 2, 3, 4]))

        with self.assertRaises(Exception) as e:
            self.assertEqual(
                [0.25 + 0.25 * 0.75, 0.25 * 0.75, 0.25 * 0.75, 0.25 * 0.75],
                learner.predict(1, None, [1, 2, 3]))

        self.assertTrue("--cb_explore_adf" in str(e.exception))
Пример #8
0
    def test_create_epsilon(self):
        actual = VowpalLearner(epsilon=0.1)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --epsilon 0.1 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(epsilon=0.1, seed=10)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --epsilon 0.1 --random_seed 10"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(epsilon=0.1,
                               seed=None)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --epsilon 0.1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(epsilon=0.1,
                               adf=False)._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --epsilon 0.1 --random_seed 1"

        self.assertEqual(actual, expected)
Пример #9
0
    def test_predict_epsilon_not_adf(self):
        learner = VowpalLearner(epsilon=0.75, is_adf=False, seed=30) 

        self.assertEqual([0.25+0.25*0.75,0.25*0.75,0.25*0.75,0.25*0.75],learner.predict(1, None, [1,2,3,4]))
Пример #10
0
    def test_predict_epsilon_adf(self):
        learner = VowpalLearner(epsilon=0.05, is_adf=True, seed=20) 

        self.assertEqual([0.25,0.25,0.25,0.25],learner.predict(1, None, [1,2,3,4]))
Пример #11
0
    def test_predict_cover_not_adf(self):
        learner = VowpalLearner(cover=5, seed=30)

        self.assertEqual([1,0,0,0], learner.predict(1, None, [1,2,3,4]))
Пример #12
0
    def test_predict_bag_not_adf(self):
        learner = VowpalLearner(bag=5, is_adf=False, seed=30)

        self.assertEqual([1,0,0,0], learner.predict(1, None, [1,2,3,4]))
Пример #13
0
    def test_predict_bag_adf(self):
        learner = VowpalLearner(bag=5, is_adf=True, seed=30)

        self.assertEqual([0.25,0.25,0.25,0.25],learner.predict(1, None, [1,2,3,4]))
Пример #14
0
"""
This is an example script that creates a Benchmark that matches the bandit bakeoff paper.
This script requires that the matplotlib and vowpalwabbit packages be installed.
"""

from coba.learners import RandomLearner, EpsilonLearner, VowpalLearner, UcbTunedLearner, CorralLearner
from coba.benchmarks import Benchmark

if __name__ == '__main__':
    benchmark = Benchmark.from_file("./examples/benchmark_short.json")

    learners = [
        RandomLearner(),
        EpsilonLearner(epsilon=0.025),
        UcbTunedLearner(),
        VowpalLearner(bag=5, seed=10),
        CorralLearner([VowpalLearner(bag=5, seed=10),
                       UcbTunedLearner()],
                      eta=.075,
                      T=40000,
                      seed=10),
    ]

    benchmark.evaluate(learners, './examples/bakeoff.log',
                       seed=10).standard_plot()
Пример #15
0
This is an example script that creates a Benchmark.
This script requires that the matplotlib and vowpalwabbit packages be installed.
"""

from coba.learners import RandomLearner, EpsilonBanditLearner, VowpalLearner
from coba.simulations import ValidationSimulation
from coba.benchmarks import Benchmark

#this line is required by Python in order to use multi-processing
if __name__ == '__main__':

    #First, we define the learners that we want to test
    learners = [
        RandomLearner(),
        EpsilonBanditLearner(epsilon=0.025),
        VowpalLearner(
            epsilon=.1),  #This learner requires that VowpalWabbit be installed
    ]

    #Then we define the simulations that we want to test our learners on
    simulations = [
        ValidationSimulation(300,
                             context_features=True,
                             action_features=False,
                             seed=1000)
    ]

    #And also define a collection of seeds used to shuffle our simulations
    seeds = [0, 1, 2, 3]

    #We then create our benchmark using our simulations and seeds
    benchmark = Benchmark(simulations, shuffle=seeds)
Пример #16
0
def baseLearner():
    from coba.learners import VowpalLearner
    return VowpalLearner(seed=10, epsilon=0.1, flags='--coin')
Пример #17
0
    def test_predict_bag_adf(self):
        learner = VowpalLearner(bag=5, adf=True, seed=30)

        self.assertEqual([0.25, 0.25, 0.25, 0.25],
                         learner.predict(1, None, ['1', '2', '3', '4']))
Пример #18
0
 def test_predict_epsilon_not_adf_args(self):
     learner = VowpalLearner(
         "--cb_explore 20 --epsilon 0.75 --random_seed 20")
     self.assertEqual(
         [0.25 + 0.25 * 0.75, 0.25 * 0.75, 0.25 * 0.75, 0.25 * 0.75],
         learner.predict(1, None, [1, 2, 3, 4]))
Пример #19
0
    def test_predict_epsilon_tuple_context_adf(self):
        learner = VowpalLearner(epsilon=0.05, adf=True, seed=20)

        self.assertEqual([0.25, 0.25, 0.25, 0.25],
                         learner.predict(1, ((1, 2), (10.2, 3.5)),
                                         [1, 2, 3, 4]))  #type: ignore
Пример #20
0
        #LambdaSimulation(2000, no_contexts, actions, random_rewards_1, seed=10), #not CB since reward is independent of context
        #LambdaSimulation(2000, no_contexts, actions, random_rewards_2, seed=10), #not CB since reward is independent of context
        #LambdaSimulation(2000, no_contexts, actions, random_rewards_3, seed=10), #not CB since reward is independent of context
        LambdaSimulation(2000,
                         contexts,
                         actions,
                         linear_plus_random_rewards_1,
                         seed=10),
        LambdaSimulation(2000,
                         contexts,
                         actions,
                         linear_plus_random_rewards_2,
                         seed=10),
        LambdaSimulation(2000, contexts, actions, polynomial_reward_1,
                         seed=10),
    ]

    #define a benchmark: this benchmark replays the simulation 15 times
    benchmark = Benchmark(simulations, batch_size=1, seeds=list(range(5)))

    #create the learner factories
    learner_factories = [
        RandomLearner(seed=10),
        EpsilonLearner(epsilon=0.025, seed=10),
        UcbTunedLearner(seed=10),
        VowpalLearner(epsilon=0.025, seed=10),
        VowpalLearner(epsilon=0.025, is_adf=False, seed=10),
        VowpalLearner(bag=5, seed=10),
    ]

    benchmark.evaluate(learner_factories).standard_plot()
Пример #21
0
    print(f'Random perfomance: {random_perf}')
    print(f'Best performance: {best_perf}')
    epsilon = 0.2
    print(
        f'Best performance with {epsilon} exploration: {best_perf * (1 - epsilon) + random_perf * epsilon}'
    )

    actions_objects = get_actions(means)

    contexts = lambda t: get_context(means, t)
    actions = lambda t: actions_objects

    rewards = lambda c, a: get_reward(means, c, a)

    #define a simulation
    simulations = [
        LambdaSimulation(10000, contexts, actions, rewards, seed=10),
    ]

    #define a benchmark: this benchmark replays the simulation 15 times
    benchmark = Benchmark(simulations,
                          batch_size=1,
                          shuffle_seeds=list(range(5)))

    #create the learner factories
    learner_factories = [
        RandomLearner(seed=10),
        VowpalLearner(epsilon=0.2, seed=10),
    ]

    benchmark.evaluate(learner_factories).standard_plot()