def test_creps_variance(): x = np.zeros(1) opt = CREPSOptimizer(x, variance=100.0, random_state=0) opt.init(1, 1) r = eval_loop(x, opt) assert_less(-1e-10, r.max()) return r
def test_creps_baseline(): x = np.zeros(1) opt = CREPSOptimizer(x, context_features="quadratic", random_state=0) opt.init(1, 1) r = eval_loop(x, opt, baseline_fct=lambda x: x**2) assert_less(-1e-7, r.max()) return r
def test_creps(): x = np.zeros(1) opt = CREPSOptimizer(x, random_state=0) opt.init(1, 1) r = eval_loop(x, opt) assert_less(-1e-8, r.max()) return r
def test_record_feedbacks(): opt = CREPSOptimizer(initial_params=np.zeros(1)) ctrl = ContextualController(environment=ContextualObjectiveFunction(), behavior_search=JustContextualOptimizer(opt), accumulate_feedbacks=False, record_feedbacks=True) returns = ctrl.learn() assert_array_equal(returns, ctrl.feedbacks_)
def test_record_outputs(): opt = CREPSOptimizer(initial_params=np.zeros(1)) ctrl = ContextualController(environment=ContextualObjectiveFunction(), behavior_search=JustContextualOptimizer(opt), record_outputs=True) returns = ctrl.learn() assert_equal(len(returns), 10) assert_equal(np.array(ctrl.outputs_).shape, (10, 1, 0))
def test_learn_controller_cmaes_sphere(): test_contexts = np.linspace(-5, 5, 11)[:, np.newaxis] opt = CREPSOptimizer(initial_params=np.zeros(1), random_state=0) ctrl = ContextualController(environment=ContextualObjectiveFunction(), behavior_search=JustContextualOptimizer(opt), n_episodes=200, n_episodes_before_test=200, test_contexts=test_contexts) ctrl.learn() for d in ctrl.test_results_[-1]: assert_greater(d, -1e-5)
def test_context_cannot_be_set(): class EnvironmentWithRandomContext(ContextualObjectiveFunction): def request_context(self, _): self.context = np.random.randn(self.n_context_dims) return self.context test_contexts = np.linspace(-5, 5, 11)[:, np.newaxis] opt = CREPSOptimizer(initial_params=np.zeros(1), random_state=0) ctrl = ContextualController(environment=EnvironmentWithRandomContext(), behavior_search=JustContextualOptimizer(opt), n_episodes=2, n_episodes_before_test=1, test_contexts=test_contexts) assert_raises_regexp(Exception, "could not set context", ctrl.learn)
random_state = np.random.RandomState(0) initial_params = 4.0 * np.ones(1) n_samples_per_update = 30 variance = 0.03 context_features = "quadratic" ccmaes = CCMAESOptimizer(initial_params=initial_params, n_samples_per_update=n_samples_per_update, variance=variance, context_features=context_features, random_state=0) creps = CREPSOptimizer(initial_params=initial_params, n_samples_per_update=n_samples_per_update, train_freq=n_samples_per_update, variance=variance, epsilon=2.0, context_features=context_features, random_state=0) opts = {"C-CMA-ES": ccmaes, "C-REPS": creps} for opt in opts.values(): opt.init(1, 1) n_generations = 16 n_rows = 4 params = np.empty(1) rewards = dict([(k, []) for k in opts.keys()]) test_contexts = np.arange(-6, 6, 0.1) colors = {"C-CMA-ES": "r", "C-REPS": "g"} plt.figure(figsize=(n_generations * 3 / n_rows, 3 * n_rows)) for it in range(n_generations):
def test_controller_creps_objective(): opt = CREPSOptimizer(initial_params=np.zeros(1)) ctrl = ContextualController(environment=ContextualObjectiveFunction(), behavior_search=JustContextualOptimizer(opt)) returns = ctrl.learn() assert_equal(len(returns), 10)