示例#1
0
def test_ucb_policy(n_samples, n_actions, context_dim, dataset):
    # define a solver
    ucbp = UCBPolicy(n_actions=n_actions, lr=0.01)
    policies = [ucbp]

    results = simulate_cb(dataset, n_samples, policies)

    # no operational error
    assert results[0]["simple_regret"] > -1.0
示例#2
0
def test_epsilon_greedy_policy(n_samples, n_actions, context_dim, dataset):
    # define a solver
    egp = EpsilonGreedyPolicy(n_actions=n_actions, lr=0.1, epsilon=0.1)
    policies = [egp]

    results = simulate_cb(dataset, n_samples, policies)

    # no operational error
    assert results[0]["simple_regret"] > -1.0
示例#3
0
def test_linucb_policy(n_samples, n_actions, context_dim, dataset):
    # define a solver
    linucbp = LinUCBPolicy(n_actions=n_actions,
                           context_dim=context_dim,
                           delta=0.25,
                           train_starts_at=500,
                           train_freq=50)

    policies = [linucbp]

    results = simulate_cb(dataset, n_samples, policies)

    # must avoid getting stuck at no eating
    # not sure about synthetic
    assert results[0]["simple_regret"] > -1.0
示例#4
0
def test_linear_gaussian_thompson_sampling_policy(n_samples, n_actions,
                                                  context_dim, dataset):

    lgtsp = LinearGaussianThompsonSamplingPolicy(n_actions=n_actions,
                                                 context_dim=context_dim,
                                                 eta_prior=6.0,
                                                 lambda_prior=0.25,
                                                 train_starts_at=500,
                                                 posterior_update_freq=50)

    policies = [lgtsp]

    results = simulate_cb(dataset, n_samples, policies)

    # must avoid getting stuck at no eating
    # not sure about synthetic
    assert results[0]["simple_regret"] > -1.0