Пример #1
0
def test_cmaes_get_best_params_best():
    opt = CMAESOptimizer()
    opt.init(10)
    params = np.empty(10)
    opt.get_next_parameters(params)
    opt.set_evaluation_feedback(np.array([0.0]))
    best_params = opt.get_best_parameters(method="best")
    assert_array_almost_equal(params, best_params)
Пример #2
0
def test_cmaes_stop_fitness_variance():
    opt = CMAESOptimizer(n_samples_per_update=5)
    opt.init(2)
    params = np.empty(2)
    it = 0
    while not opt.is_behavior_learning_done():
        opt.get_next_parameters(params)
        opt.set_evaluation_feedback([0.0])
        it += 1
    assert_equal(it, 6)
Пример #3
0
def test_record_feedbacks():
    opt = CMAESOptimizer(initial_params=np.zeros(2))
    ctrl = Controller(environment=ObjectiveFunction(),
                      behavior_search=JustOptimizer(opt),
                      record_feedbacks=True, accumulate_feedbacks=False)
    returns = ctrl.learn()
    assert_array_equal(returns, ctrl.feedbacks_)
Пример #4
0
def learn(name, setup_fun, run):
    ik, beh, mp_keys, mp_values = setup_fun(cfg.x0, cfg.g, cfg.execution_time,
                                            cfg.dt)
    env = ViaPointEnvironment(ik,
                              cfg.x0,
                              cfg.via_points,
                              cfg.execution_time,
                              cfg.dt,
                              cfg.qlo,
                              cfg.qhi,
                              penalty_vel=cfg.penalty_vel,
                              penalty_acc=cfg.penalty_acc,
                              penalty_via_point=cfg.penalty_via_point)

    opt = CMAESOptimizer(variance=cfg.variance[name], random_state=run)
    bs = BlackBoxSearch(beh, opt)
    controller = Controller(environment=env,
                            behavior_search=bs,
                            n_episodes=n_episodes,
                            verbose=0)
    rewards = controller.learn(mp_keys, mp_values)

    best = bs.get_best_behavior()
    reward = controller.episode_with(best)

    return name, rewards, reward.sum()
Пример #5
0
def learn(setup_fun, variance):
    #ik, beh, mp_keys, mp_values = cfg.make_approx_cart_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt)
    #ik, beh, mp_keys, mp_values = cfg.make_exact_cart_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt)
    ik, beh, mp_keys, mp_values = cfg.make_joint_dmp(cfg.x0, cfg.g,
                                                     cfg.execution_time,
                                                     cfg.dt)

    env = Pendulum(x0=cfg.x0,
                   g=cfg.g,
                   execution_time=cfg.execution_time,
                   dt=cfg.dt)

    opt = CMAESOptimizer(variance=variance, random_state=0)
    bs = BlackBoxSearch(beh, opt)
    controller = Controller(environment=env,
                            behavior_search=bs,
                            n_episodes=n_episodes,
                            verbose=2)
    rewards = controller.learn(mp_keys, mp_values)

    best = bs.get_best_behavior()
    best_params = best.get_params()
    np.save("best_params_pendulum_joint.npy", best_params)
    reward = controller.episode_with(best)

    ax = env.plot()
    plt.show()
Пример #6
0
def test_noncontextual_behavior_search():
    opt = CMAESOptimizer(initial_params=np.zeros(1))
    assert_raises_regexp(TypeError,
                         "requires contextual behavior search",
                         ContextualController,
                         environment=ContextualObjectiveFunction(),
                         behavior_search=JustOptimizer(opt))
Пример #7
0
def test_cmaes_stop_conditioning():
    def objective(x):
        return -1e10 * x[1] ** 2

    opt = CMAESOptimizer(random_state=0)
    opt.init(2)
    params = np.empty(2)
    it = 0
    while not opt.is_behavior_learning_done():
        opt.get_next_parameters(params)
        opt.set_evaluation_feedback(objective(params))
        it += 1
    assert_less(it, 600)
Пример #8
0
def test_learn_controller_cmaes_sphere():
    opt = CMAESOptimizer(initial_params=np.zeros(2), random_state=0)
    ctrl = Controller(environment=ObjectiveFunction(random_state=0),
                      behavior_search=JustOptimizer(opt),
                      n_episodes=200)
    returns = ctrl.learn()
    dist_to_maximum = returns.max() - ctrl.environment.get_maximum_feedback()
    assert_greater(dist_to_maximum, -1e-5)
Пример #9
0
def test_record_inputs():
    opt = CMAESOptimizer(initial_params=np.zeros(2))
    ctrl = Controller(environment=ObjectiveFunction(),
                      behavior_search=JustOptimizer(opt),
                      record_inputs=True)
    returns = ctrl.learn()
    assert_equal(len(returns), 10)
    assert_equal(np.array(ctrl.inputs_).shape, (10, 1, 2))
Пример #10
0
def test_record_test_results():
    opt = CMAESOptimizer(initial_params=np.zeros(2))
    ctrl = Controller(environment=ObjectiveFunction(),
                      behavior_search=JustOptimizer(opt),
                      n_episodes_before_test=10, n_episodes=100)
    ctrl.learn()
    results = np.array(ctrl.test_results_)
    assert_equal(results.shape[0], 10)
    assert_true(np.all(results[:-1] <= results[1:]))
Пример #11
0
def test_cmaes_respects_bounds():
    opt = CMAESOptimizer(bounds=[[-5, 4], [10, 20]], variance=10000.0,
                         random_state=0)
    opt.init(2)
    params = np.empty(2)
    opt.get_next_parameters(params)
    assert_true(np.all(params >= np.array([-5, 10])))
    assert_true(np.all(params <= np.array([4, 20])))
Пример #12
0
def learn(name, run, setup_fun, variance):
    ik, beh, mp_keys, mp_values = setup_fun(
            cfg.x0, cfg.g, cfg.execution_time, cfg.dt)

    env = Pendulum(
        x0=cfg.x0, g=cfg.g,
        execution_time=cfg.execution_time, dt=cfg.dt
    )

    opt = CMAESOptimizer(variance=variance, random_state=run)
    bs = BlackBoxSearch(beh, opt)
    controller = Controller(environment=env, behavior_search=bs,
                            n_episodes=n_episodes, verbose=2)
    rewards = controller.learn(mp_keys, mp_values)

    best = bs.get_best_behavior()
    reward = controller.episode_with(best)

    return name, rewards, reward.sum()
Пример #13
0
def test_cmaes_no_initial_params():
    opt = CMAESOptimizer()
    opt.init(10)
    params = np.empty(10)
    opt.get_next_parameters(params)
Пример #14
0
def test_controller_cmaes_sphere():
    opt = CMAESOptimizer(initial_params=np.zeros(2))
    ctrl = Controller(environment=ObjectiveFunction(),
                      behavior_search=JustOptimizer(opt))
    returns = ctrl.learn()
    assert_equal(len(returns), 10)
Пример #15
0
import numpy as np
from bolero.wrapper import CppBLLoader
from bolero.controller import Controller
from bolero.behavior_search import BlackBoxSearch
from bolero.optimizer import CMAESOptimizer
from dmp_behavior import DMPBehavior
from first_feedback import FirstFeedback


if __name__ == "__main__":
    environment_name = "throwing_environment"
    bll = CppBLLoader()
    bll.load_library(environment_name)
    env = bll.acquire_contextual_environment(environment_name)
    env = FirstFeedback(env, random_state=0)
    env.request_context(np.array([1.5, 1.0]))

    beh = DMPBehavior(dt=0.01, execution_time=0.5, n_features=5)

    opt = CMAESOptimizer(variance=200.0 ** 2, active=True, random_state=0)

    bs = BlackBoxSearch(beh, opt)

    ctrl = Controller(environment=env, behavior_search=bs, n_episodes=200,
                      verbose=2)
    meta_params = [np.array([0.0, -0.8, -0.7]), np.array([0.5, 0.5, 0.5]),
                   np.array([0.0, 0.5, 0.5])]
    print(ctrl.learn(["x0", "g", "gd"], meta_params))
Пример #16
0
import os
import numpy as np
import matplotlib.pyplot as plt
from throw_environment import ThrowEnvironment
from bolero.behavior_search import BlackBoxSearch
from bolero.optimizer import CMAESOptimizer
from bolero.controller import Controller
import throw_config as cfg


ik, beh, mp_keys, mp_values, initial_params, var = cfg.make_approx_cart_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt)
#ik, beh, mp_keys, mp_values, initial_params, var = cfg.make_exact_cart_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt)
#ik, beh, mp_keys, mp_values, initial_params, var = cfg.make_joint_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt)


env = ThrowEnvironment(start=cfg.x0j, random_state=0, verbose=1)
opt = CMAESOptimizer(initial_params=initial_params, variance=var,
                     random_state=0)
bs = BlackBoxSearch(beh, opt)
controller = Controller(environment=env, behavior_search=bs, n_episodes=800,
                        verbose=2)
rewards = controller.learn(mp_keys, mp_values)

best = bs.get_best_behavior()
reward = controller.episode_with(best)
print(reward.sum())

plt.plot(rewards)
plt.show()
Пример #17
0
beh.set_meta_parameters(["g", "x0"], [g, x0])
beh.imitate(np.tile(np.linspace(0, 1, 101), 2).reshape((2, 101, -1)))

env = OptimumTrajectory(x0,
                        g,
                        execution_time,
                        dt,
                        obstacles,
                        penalty_goal_dist=10000.0,
                        penalty_start_dist=10000.0,
                        penalty_obstacle=1000.0,
                        penalty_length=10.,
                        hide_acc_from_interface=True,
                        use_covar=True)
opt = CMAESOptimizer(variance=0.1**2,
                     random_state=0,
                     initial_params=beh.get_params())
bs = BlackBoxSearch(beh, opt)
controller = Controller(environment=env,
                        behavior_search=bs,
                        n_episodes=n_episodes,
                        record_inputs=True)

rewards = controller.learn(["x0", "g"], [x0, g])
controller.episode_with(bs.get_best_behavior(), ["x0", "g"], [x0, g])
X = np.asarray(controller.inputs_[-1])
X_hist = np.asarray(controller.inputs_)

plt.figure(figsize=(8, 5))
ax = plt.subplot(121)
ax.set_title("Optimization progress")
Пример #18
0
to solve the problem and policy search algorithm usually work very well in
this domain.
"""
print(__doc__)

import numpy as np
import matplotlib.pyplot as plt
from bolero.environment import OpenAiGym
from bolero.behavior_search import BlackBoxSearch
from bolero.optimizer import CMAESOptimizer
from bolero.representation import LinearBehavior
from bolero.controller import Controller

beh = LinearBehavior()
env = OpenAiGym("CartPole-v0", render=False, seed=0)
opt = CMAESOptimizer(variance=10.0**2, random_state=0)
bs = BlackBoxSearch(beh, opt)
controller = Controller(environment=env, behavior_search=bs, n_episodes=300)

rewards = controller.learn()
controller.episode_with(bs.get_best_behavior())

plt.figure()
ax = plt.subplot(111)
ax.set_title("Optimization progress")
ax.plot(rewards)
ax.set_xlabel("Episode")
ax.set_ylabel("Reward")
ax.set_ylim(-10, 210)
plt.show()
Пример #19
0
    e = Ellipse(xy=mean, width=width, height=height, angle=np.degrees(angle),
                ec=color, fc="none", lw=3, ls="dashed")
    plt.gca().add_artist(e)


n_generations = 20
n_samples_per_update = 20
n_params = 2


for objective_name in ["Sphere", "SchaffersF7"]:
    objective = FUNCTIONS[objective_name](0, n_params)

    initial_params = 4.0 * np.ones(n_params)
    cmaes = CMAESOptimizer(
        initial_params=initial_params, variance=0.1, active=True,
        n_samples_per_update=n_samples_per_update,
        bounds=np.array([[-5, 5], [-5, 5]]), random_state=0)
    cmaes.init(n_params)

    n_rows = 4
    plt.figure(figsize=(n_generations * 3 / n_rows, 3 * n_rows))
    path = []
    for it in range(n_generations):
        plt.subplot(n_rows, int(n_generations / n_rows, it + 1))
        plot_objective()
        last_mean = cmaes.mean.copy()
        path.append(last_mean)
        last_cov = cmaes.var * cmaes.cov

        X = np.empty((n_samples_per_update, n_params))
        F = np.empty((n_samples_per_update, 1))
Пример #20
0
def test_cmaes_dimensions_mismatch():
    opt = CMAESOptimizer(initial_params=np.zeros(5))
    assert_raises_regexp(ValueError, "Number of dimensions", opt.init, 10)
Пример #21
0
def test_cmaes_diagonal_cov():
    opt = CMAESOptimizer(covariance=np.zeros(10))
    opt.init(10)
    params = np.empty(10)
    opt.get_next_parameters(params)
Пример #22
0
def test_acmaes():
    x = np.zeros(n_dims)
    opt = CMAESOptimizer(x, active=True, random_state=0, log_to_stdout=False)
    opt.init(n_dims)
    r = eval_loop(x, opt, n_dims)
    assert_greater(r.max(), -1e-5)
Пример #23
0
                          cfg.via_points,
                          cfg.execution_time,
                          cfg.dt,
                          cfg.qlo,
                          cfg.qhi,
                          penalty_vel=cfg.penalty_vel,
                          penalty_acc=cfg.penalty_acc,
                          penalty_via_point=cfg.penalty_via_point,
                          log_to_stdout=True)

if os.path.exists("initial_params.txt"):
    initial_params = np.loadtxt("initial_params.txt")
else:
    initial_params = None
opt = CMAESOptimizer(initial_params=initial_params,
                     variance=cfg.variance["approxik"],
                     random_state=0)
bs = BlackBoxSearch(beh, opt)
controller = Controller(environment=env,
                        behavior_search=bs,
                        n_episodes=1000,
                        verbose=2)
rewards = controller.learn(mp_keys, mp_values)

best = bs.get_best_behavior()
best_params = best.get_params()
np.save("best_params_viapoint_joint.npy", best_params)
reward = controller.episode_with(best)
print(reward.sum())

plt.plot(rewards)
Пример #24
0
        results[i] = objective.feedback(x)
        opt.set_evaluation_feedback(results[i])
    return results - objective.f_opt


n_dims = 2
n_iter = 800
x = np.zeros(n_dims)

optimizers = {
    "None":
    NoOptimizer(x),
    "Random":
    RandomOptimizer(x, random_state=0),
    "CMA-ES":
    CMAESOptimizer(x, bounds=np.array([[-5, 5]]), random_state=0),
    "aCMA-ES":
    CMAESOptimizer(x, bounds=np.array([[-5, 5]]), active=True, random_state=0),
    "REPS":
    REPSOptimizer(x, random_state=0),
    "ACM-ES":
    ACMESOptimizer(x, random_state=0),
    "XNES":
    XNESOptimizer(x, random_state=0),
    "CEM":
    CEMOptimizer(x, random_state=0)
}

plt.figure(figsize=(12, 8))
plt.xlabel("Function evaluations")
plt.ylabel("$f(x)$")