Exemplo n.º 1
0
def test_smoke():
    dmp_seq, _ = create_dmp_seq(n_task_dims=1)

    controller = Controller({"Controller":
                             {"record_inputs": True},
                             "Environment":
                             {"type": "bolero.environment.OptimumTrajectory",
                              "x0": np.zeros(1),
                              "g": np.ones(1),
                              "execution_time": 1.0,
                              "dt": 0.01}})
    controller.episode_with(dmp_seq)

    params = np.random.randn(dmp_seq.get_n_params())
    dmp_seq.set_params(params)
    assert_equal(dmp_seq.get_n_params(), dmp_seq.get_params().size)
    assert_equal(dmp_seq.n_weights, 15)
    assert_equal(dmp_seq.get_n_params(), 21)
    params_copy = dmp_seq.get_params()
    assert_array_almost_equal(params, params_copy)
    new_subgoal = [0.5]
    dmp_seq.set_subgoal(2, new_subgoal)
    subgoal = dmp_seq.get_subgoal(2)
    assert_array_almost_equal(new_subgoal, subgoal)
    new_subgoal_velocity = [0.0]
    dmp_seq.set_subgoal_velocity(2, new_subgoal_velocity)
    subgoal_velocity = dmp_seq.get_subgoal_velocity(2)
    assert_array_almost_equal(new_subgoal_velocity, subgoal_velocity)

    X = np.array(controller.inputs_[0])
    assert_equal(len(X), 101)
    assert_almost_equal(X[0, 0], 0.0)
    assert_almost_equal(X[20, 0], 0.5, places=2)
    assert_almost_equal(X[50, 0], 1.0, places=2)
    assert_almost_equal(X[100, 0], 2.0, places=2)
Exemplo n.º 2
0
def test_missing_behavior_search():
    ctrl = Controller(environment=ObjectiveFunction())
    beh = DummyBehavior(initial_params=np.array([0.0, 0.0]))
    beh.init(0, 2)
    feedback = ctrl.episode_with(beh)
    assert_equal(len(feedback), 1)
    assert_less(feedback[0], ctrl.environment.get_maximum_feedback())
Exemplo n.º 3
0
def learn(setup_fun, variance):
    #ik, beh, mp_keys, mp_values = cfg.make_approx_cart_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt)
    #ik, beh, mp_keys, mp_values = cfg.make_exact_cart_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt)
    ik, beh, mp_keys, mp_values = cfg.make_joint_dmp(cfg.x0, cfg.g,
                                                     cfg.execution_time,
                                                     cfg.dt)

    env = Pendulum(x0=cfg.x0,
                   g=cfg.g,
                   execution_time=cfg.execution_time,
                   dt=cfg.dt)

    opt = CMAESOptimizer(variance=variance, random_state=0)
    bs = BlackBoxSearch(beh, opt)
    controller = Controller(environment=env,
                            behavior_search=bs,
                            n_episodes=n_episodes,
                            verbose=2)
    rewards = controller.learn(mp_keys, mp_values)

    best = bs.get_best_behavior()
    best_params = best.get_params()
    np.save("best_params_pendulum_joint.npy", best_params)
    reward = controller.episode_with(best)

    ax = env.plot()
    plt.show()
Exemplo n.º 4
0
def learn(name, setup_fun, run):
    ik, beh, mp_keys, mp_values = setup_fun(cfg.x0, cfg.g, cfg.execution_time,
                                            cfg.dt)
    env = ViaPointEnvironment(ik,
                              cfg.x0,
                              cfg.via_points,
                              cfg.execution_time,
                              cfg.dt,
                              cfg.qlo,
                              cfg.qhi,
                              penalty_vel=cfg.penalty_vel,
                              penalty_acc=cfg.penalty_acc,
                              penalty_via_point=cfg.penalty_via_point)

    opt = CMAESOptimizer(variance=cfg.variance[name], random_state=run)
    bs = BlackBoxSearch(beh, opt)
    controller = Controller(environment=env,
                            behavior_search=bs,
                            n_episodes=n_episodes,
                            verbose=0)
    rewards = controller.learn(mp_keys, mp_values)

    best = bs.get_best_behavior()
    reward = controller.episode_with(best)

    return name, rewards, reward.sum()
def test_mc_rl():
    env = OpenAiGym("FrozenLake-v0", render=False, seed=1)
    try:
        env.init()
    except ImportError:
        raise SkipTest("gym is not installed")
    bs = MonteCarloRL(env.get_discrete_action_space(), random_state=1)
    ctrl = Controller(environment=env, behavior_search=bs, n_episodes=10000,
                    finish_after_convergence=True)
    returns = ctrl.learn()
    assert_less(len(returns), 1000)
    beh = bs.get_best_behavior()
    rewards = ctrl.episode_with(beh)
    assert_equal(sum(rewards), 1.0)
Exemplo n.º 6
0
def learn(name, run, setup_fun, variance):
    ik, beh, mp_keys, mp_values = setup_fun(
            cfg.x0, cfg.g, cfg.execution_time, cfg.dt)

    env = Pendulum(
        x0=cfg.x0, g=cfg.g,
        execution_time=cfg.execution_time, dt=cfg.dt
    )

    opt = CMAESOptimizer(variance=variance, random_state=run)
    bs = BlackBoxSearch(beh, opt)
    controller = Controller(environment=env, behavior_search=bs,
                            n_episodes=n_episodes, verbose=2)
    rewards = controller.learn(mp_keys, mp_values)

    best = bs.get_best_behavior()
    reward = controller.episode_with(best)

    return name, rewards, reward.sum()
Exemplo n.º 7
0
                          log_to_stdout=True)

if os.path.exists("initial_params.txt"):
    initial_params = np.loadtxt("initial_params.txt")
else:
    initial_params = None
opt = CMAESOptimizer(initial_params=initial_params,
                     variance=cfg.variance["approxik"],
                     random_state=0)
bs = BlackBoxSearch(beh, opt)
controller = Controller(environment=env,
                        behavior_search=bs,
                        n_episodes=1000,
                        verbose=2)
rewards = controller.learn(mp_keys, mp_values)

best = bs.get_best_behavior()
best_params = best.get_params()
np.save("best_params_viapoint_joint.npy", best_params)
reward = controller.episode_with(best)
print(reward.sum())

plt.plot(rewards)
ax = env.plot()
ax.view_init(azim=-110, elev=30)
ax.set_xticks((-0.3, 0.0, 0.3))
ax.set_yticks((0.0, -0.3, -0.6))
ax.set_zticks((0.3, 0.6, 0.9))
plt.savefig("viapoints.pdf")
plt.show()
Exemplo n.º 8
0
to solve the problem and policy search algorithm usually work very well in
this domain.
"""
print(__doc__)

import numpy as np
import matplotlib.pyplot as plt
from bolero.environment import OpenAiGym
from bolero.behavior_search import BlackBoxSearch
from bolero.optimizer import CMAESOptimizer
from bolero.representation import LinearBehavior
from bolero.controller import Controller

beh = LinearBehavior()
env = OpenAiGym("CartPole-v0", render=False, seed=0)
opt = CMAESOptimizer(variance=10.0**2, random_state=0)
bs = BlackBoxSearch(beh, opt)
controller = Controller(environment=env, behavior_search=bs, n_episodes=300)

rewards = controller.learn()
controller.episode_with(bs.get_best_behavior())

plt.figure()
ax = plt.subplot(111)
ax.set_title("Optimization progress")
ax.plot(rewards)
ax.set_xlabel("Episode")
ax.set_ylabel("Reward")
ax.set_ylim(-10, 210)
plt.show()
Exemplo n.º 9
0
                        penalty_start_dist=10000.0,
                        penalty_obstacle=1000.0,
                        penalty_length=10.,
                        hide_acc_from_interface=True,
                        use_covar=True)
opt = CMAESOptimizer(variance=0.1**2,
                     random_state=0,
                     initial_params=beh.get_params())
bs = BlackBoxSearch(beh, opt)
controller = Controller(environment=env,
                        behavior_search=bs,
                        n_episodes=n_episodes,
                        record_inputs=True)

rewards = controller.learn(["x0", "g"], [x0, g])
controller.episode_with(bs.get_best_behavior(), ["x0", "g"], [x0, g])
X = np.asarray(controller.inputs_[-1])
X_hist = np.asarray(controller.inputs_)

plt.figure(figsize=(8, 5))
ax = plt.subplot(121)
ax.set_title("Optimization progress")
ax.plot(rewards)
ax.set_xlabel("Episode")
ax.set_ylabel("Reward")

ax = plt.subplot(122, aspect="equal")
ax.set_title("Learned trajectory")
plot_covariance(ax, X[:, :2], np.array(X[:, 4:]).reshape(-1, 4, 4))

env.plot(ax)
Exemplo n.º 10
0
    best_params = np.load("best_params_viapoint_%s.npy" % name)
    beh.set_params(best_params)

    controller = Controller(environment=env, verbose=2)

    initial_param_value = best_params[modified_parameter]
    offset = np.linspace(-param_range / 2.0, param_range / 2.0, n_steps)

    returns = np.empty(n_steps)
    for i in range(n_steps):
        params = np.copy(best_params)
        params[modified_parameter] = initial_param_value + offset[i]
        beh.set_params(params)
        beh.reset()

        rewards = controller.episode_with(beh, mp_keys, mp_values)
        returns[i] = np.sum(rewards)

    plt.plot(offset,
             returns,
             label=labels[name],
             ls=linestyles[name],
             lw=2,
             c="k")

plt.xlabel("Weight offset")
plt.ylabel("Reward")
plt.xticks(np.linspace(-param_range / 2.0, param_range / 2.0, 5))
plt.yticks(np.linspace(-125, 0, 6))
plt.ylim((-125, 0))
plt.legend(loc="best")
Exemplo n.º 11
0
    for n_task_dims in test_task_dims:
        dmp_seq, subgoals = create_dmp_seq(n_task_dims=n_task_dims)
        dmp_seq.set_params(dmp_seq.get_params())
        for i in range(len(subgoals)):
            assert_equal(len(dmp_seq.get_subgoal_velocity(i)), n_task_dims)


if __name__ == "__main__":
    import matplotlib.pyplot as plt
    dmp_seq, _ = create_dmp_seq(2)

    controller = Controller({"Controller":
                             {"record_outputs": True},
                             "Environment":
                             {"type": "bolero.environment.OptimumTrajectory",
                              "x0": np.zeros(2),
                              "g": np.ones(2),
                              "execution_time": 1.0,
                              "dt": 0.01}},
                            record_inputs=True)
    controller.episode_with(dmp_seq)
    X = np.array(controller.outputs_)[0]

    plt.figure()
    plt.plot(X[:, 0], X[:, 1])
    plt.figure()
    plt.plot(X[:, 2])
    plt.figure()
    plt.plot(X[:, 3])
    plt.show()