Пример #1
0
def test_creps_variance():
    x = np.zeros(1)
    opt = CREPSOptimizer(x, variance=100.0, random_state=0)
    opt.init(1, 1)
    r = eval_loop(x, opt)
    assert_less(-1e-10, r.max())
    return r
Пример #2
0
def test_creps_baseline():
    x = np.zeros(1)
    opt = CREPSOptimizer(x, context_features="quadratic", random_state=0)
    opt.init(1, 1)
    r = eval_loop(x, opt, baseline_fct=lambda x: x**2)
    assert_less(-1e-7, r.max())
    return r
Пример #3
0
def test_creps():
    x = np.zeros(1)
    opt = CREPSOptimizer(x, random_state=0)
    opt.init(1, 1)
    r = eval_loop(x, opt)
    assert_less(-1e-8, r.max())
    return r
Пример #4
0
def test_record_feedbacks():
    opt = CREPSOptimizer(initial_params=np.zeros(1))
    ctrl = ContextualController(environment=ContextualObjectiveFunction(),
                                behavior_search=JustContextualOptimizer(opt),
                                accumulate_feedbacks=False,
                                record_feedbacks=True)
    returns = ctrl.learn()
    assert_array_equal(returns, ctrl.feedbacks_)
Пример #5
0
def test_record_outputs():
    opt = CREPSOptimizer(initial_params=np.zeros(1))
    ctrl = ContextualController(environment=ContextualObjectiveFunction(),
                                behavior_search=JustContextualOptimizer(opt),
                                record_outputs=True)
    returns = ctrl.learn()
    assert_equal(len(returns), 10)
    assert_equal(np.array(ctrl.outputs_).shape, (10, 1, 0))
Пример #6
0
def test_learn_controller_cmaes_sphere():
    test_contexts = np.linspace(-5, 5, 11)[:, np.newaxis]

    opt = CREPSOptimizer(initial_params=np.zeros(1), random_state=0)
    ctrl = ContextualController(environment=ContextualObjectiveFunction(),
                                behavior_search=JustContextualOptimizer(opt),
                                n_episodes=200,
                                n_episodes_before_test=200,
                                test_contexts=test_contexts)
    ctrl.learn()
    for d in ctrl.test_results_[-1]:
        assert_greater(d, -1e-5)
Пример #7
0
def test_context_cannot_be_set():
    class EnvironmentWithRandomContext(ContextualObjectiveFunction):
        def request_context(self, _):
            self.context = np.random.randn(self.n_context_dims)
            return self.context

    test_contexts = np.linspace(-5, 5, 11)[:, np.newaxis]

    opt = CREPSOptimizer(initial_params=np.zeros(1), random_state=0)
    ctrl = ContextualController(environment=EnvironmentWithRandomContext(),
                                behavior_search=JustContextualOptimizer(opt),
                                n_episodes=2,
                                n_episodes_before_test=1,
                                test_contexts=test_contexts)
    assert_raises_regexp(Exception, "could not set context", ctrl.learn)

random_state = np.random.RandomState(0)
initial_params = 4.0 * np.ones(1)
n_samples_per_update = 30
variance = 0.03
context_features = "quadratic"
ccmaes = CCMAESOptimizer(initial_params=initial_params,
                         n_samples_per_update=n_samples_per_update,
                         variance=variance,
                         context_features=context_features,
                         random_state=0)
creps = CREPSOptimizer(initial_params=initial_params,
                       n_samples_per_update=n_samples_per_update,
                       train_freq=n_samples_per_update,
                       variance=variance,
                       epsilon=2.0,
                       context_features=context_features,
                       random_state=0)
opts = {"C-CMA-ES": ccmaes, "C-REPS": creps}
for opt in opts.values():
    opt.init(1, 1)
n_generations = 16
n_rows = 4

params = np.empty(1)
rewards = dict([(k, []) for k in opts.keys()])
test_contexts = np.arange(-6, 6, 0.1)
colors = {"C-CMA-ES": "r", "C-REPS": "g"}
plt.figure(figsize=(n_generations * 3 / n_rows, 3 * n_rows))
for it in range(n_generations):
Пример #9
0
def test_controller_creps_objective():
    opt = CREPSOptimizer(initial_params=np.zeros(1))
    ctrl = ContextualController(environment=ContextualObjectiveFunction(),
                                behavior_search=JustContextualOptimizer(opt))
    returns = ctrl.learn()
    assert_equal(len(returns), 10)