Пример #1
0
def create_job(kwargs):
    import warnings
    warnings.filterwarnings("ignore")

    # pendulum env
    env = gym.make('Pendulum-TO-v0')
    env._max_episode_steps = 10000
    env.unwrapped.dt = 0.02
    env.unwrapped.umax = np.array([2.5])
    env.unwrapped.periodic = False

    dm_state = env.observation_space.shape[0]
    dm_act = env.action_space.shape[0]

    state = env.reset()
    init_state = tuple([state, 1e-4 * np.eye(dm_state)])
    solver = MBGPS(env,
                   init_state=init_state,
                   init_action_sigma=25.,
                   nb_steps=300,
                   kl_bound=.1,
                   action_penalty=1e-3,
                   activation={
                       'shift': 250,
                       'mult': 0.5
                   })

    solver.run(nb_iter=100, verbose=False)

    solver.ctl.sigma = np.dstack([1e-1 * np.eye(dm_act)] * 300)
    data = solver.rollout(nb_episodes=1, stoch=True, init=state)

    obs, act = np.squeeze(data['x'], axis=-1).T, np.squeeze(data['u'],
                                                            axis=-1).T
    return obs, act
Пример #2
0
def create_job(kwargs):
    import warnings
    warnings.filterwarnings("ignore")

    # cartpole env
    env = gym.make('Cartpole-TO-v0')
    env._max_episode_steps = 10000
    env.unwrapped._dt = 0.01

    dm_state = env.observation_space.shape[0]
    dm_act = env.action_space.shape[0]

    horizon, nb_steps = 50, 500

    state = np.zeros((dm_state, nb_steps + 1))
    action = np.zeros((dm_act, nb_steps))

    state[:, 0] = env.reset()
    for t in range(nb_steps):
        solver = MBGPS(env,
                       init_state=tuple(
                           [state[:, t], 1e-16 * np.eye(dm_state)]),
                       init_action_sigma=5.,
                       nb_steps=horizon,
                       kl_bound=0.1)
        trace = solver.run(nb_iter=10, verbose=False)

        _nominal_action = solver.udist.mu

        action[:, t] = _nominal_action[:, 0]
        state[:, t + 1], _, _, _ = env.step(action[:, t])

        print('Time Step:', t, 'Cost:', trace[-1])

    return state[:, :-1].T, action.T
Пример #3
0
def create_job(kwargs):
    import warnings
    warnings.filterwarnings("ignore")

    # pendulum env
    env = gym.make('Pendulum-TO-v1')
    env._max_episode_steps = 10000
    env.unwrapped._dt = 0.05

    dm_state = env.observation_space.shape[0]
    dm_act = env.action_space.shape[0]

    horizon, nb_steps = 15, 150

    state = np.zeros((dm_state, nb_steps + 1))
    action = np.zeros((dm_act, nb_steps))

    state[:, 0] = env.reset()
    for t in range(nb_steps):
        solver = MBGPS(env,
                       init_state=tuple([state[:, t],
                                         1e-4 * np.eye(dm_state)]),
                       init_action_sigma=1.,
                       nb_steps=horizon,
                       kl_bound=2.)
        trace = solver.run(nb_iter=10, verbose=False)

        _act = solver.ctl.sample(state[:, t], 0, stoch=False)
        action[:, t] = np.clip(_act, -env.ulim, env.ulim)
        state[:, t + 1], _, _, _ = env.step(action[:, t])

        print('Time Step:', t, 'Cost:', trace[-1])

    return state[:, :-1].T, action.T
Пример #4
0
def create_job(kwargs):
    import warnings
    warnings.filterwarnings("ignore")

    # pendulum env
    env = gym.make('Pendulum-TO-v0')
    env._max_episode_steps = 10000
    env.unwrapped.dt = 0.02
    env.unwrapped.umax = np.array([2.5])
    env.unwrapped.periodic = True

    dm_state = env.observation_space.shape[0]
    dm_act = env.action_space.shape[0]

    horizon, nb_steps = 15, 100

    state = np.zeros((dm_state, nb_steps + 1))
    action = np.zeros((dm_act, nb_steps))

    state[:, 0] = env.reset()
    for t in range(nb_steps):
        init_state = tuple([state[:, t], 1e-4 * np.eye(dm_state)])
        solver = MBGPS(env,
                       init_state=init_state,
                       init_action_sigma=2.5,
                       nb_steps=horizon,
                       kl_bound=1.,
                       action_penalty=1e-3)
        trace = solver.run(nb_iter=5, verbose=False)

        solver.ctl.sigma = np.dstack([1e-2 * np.eye(dm_act)] * horizon)
        u = solver.ctl.sample(state[:, t], 0, stoch=True)
        action[:, t] = np.clip(u, -env.ulim, env.ulim)
        state[:, t + 1], _, _, _ = env.step(action[:, t])

        # print('Time Step:', t, 'Cost:', trace[-1])

    return state[:, :-1].T, action.T
Пример #5
0
def create_job(kwargs):
    import warnings
    warnings.filterwarnings("ignore")

    # cartpole env
    env = gym.make('Cartpole-TO-v0')
    env._max_episode_steps = 10000
    env.unwrapped.dt = 0.05
    env.unwrapped.umax = np.array([5.])
    env.unwrapped.periodic = True

    dm_state = env.observation_space.shape[0]
    dm_act = env.action_space.shape[0]

    horizon, nb_steps = 20, 100

    env_sigma = env.unwrapped.sigma

    state = np.zeros((dm_state, nb_steps + 1))
    action = np.zeros((dm_act, nb_steps))

    state[:, 0] = env.reset()
    for t in range(nb_steps):
        solver = MBGPS(env,
                       init_state=tuple([state[:, t], env_sigma]),
                       init_action_sigma=1.,
                       nb_steps=horizon,
                       kl_bound=2.,
                       action_penalty=1e-5)
        trace = solver.run(nb_iter=10, verbose=False)

        _act = solver.ctl.sample(state[:, t], 0, stoch=False)
        action[:, t] = np.clip(_act, -env.ulim, env.ulim)
        state[:, t + 1], _, _, _ = env.step(action[:, t])

        print('Time Step:', t, 'Cost:', trace[-1])

    return state[:, :-1].T, action.T
Пример #6
0
import warnings
warnings.filterwarnings("ignore")


# pendulum env
env = gym.make('QQube-TO-v1')
env._max_episode_steps = 500

alg = MBGPS(env, nb_steps=500,
            kl_bound=0.01,
            init_ctl_sigma=100.,
            activation={'shift': 450, 'mult': 0.5})

# run gps
trace = alg.run(nb_iter=10, verbose=True)

# plot dists
alg.plot()

# plot objective
import matplotlib.pyplot as plt

plt.figure()
plt.plot(trace)
plt.show()

# sample and plot one trajectory
data = alg.sample(nb_episodes=1, stoch=True)

plt.figure()
Пример #7
0
# pendulum env
env = gym.make('Pendulum-TO-v0')
env._max_episode_steps = 100
env.unwrapped.dt = 0.05

env.seed(1337)

solver = MBGPS(env,
               nb_steps=100,
               init_state=env.init(),
               init_action_sigma=5.0,
               kl_bound=1e1,
               slew_rate=False,
               action_penalty=1,
               activation={
                   'mult': 1.,
                   'shift': 80
               })

trace = solver.run(nb_iter=25, verbose=True)

# plot dists
solver.plot()

# plot objective

plt.figure()
plt.plot(trace)
plt.show()
Пример #8
0
horizon, nb_steps = 100, 500

env_sigma = env.unwrapped._sigma

state = np.zeros((dm_state, nb_steps + 1))
action = np.zeros((dm_act, nb_steps))

state[:, 0] = env.reset()
for t in range(nb_steps):
    solver = MBGPS(env,
                   init_state=tuple([state[:, t], env_sigma]),
                   init_action_sigma=1.,
                   nb_steps=horizon,
                   kl_bound=2.)
    trace = solver.run(nb_iter=10, verbose=False)

    _act = solver.ctl.sample(state[:, t], 0, stoch=False)
    action[:, t] = np.clip(_act, -env.ulim, env.ulim)
    state[:, t + 1], _, _, _ = env.step(action[:, t])

    print('Time Step:', t, 'Cost:', trace[-1])

import matplotlib.pyplot as plt

plt.figure()

plt.subplot(7, 1, 1)
plt.plot(state[0, :], '-b')
plt.subplot(7, 1, 2)
plt.plot(state[1, :], '-b')
Пример #9
0
import gym
from trajopt.rgps import LRGPS
from trajopt.gps import MBGPS

import warnings
warnings.filterwarnings("ignore")

# lqr task
env = gym.make('LQR-TO-v1')
env.env._max_episode_steps = 100

# mass-damper
rgps = LRGPS(env,
             nb_steps=100,
             policy_kl_bound=0.25,
             param_kl_bound=25e2,
             init_state=env.init(),
             init_action_sigma=100.)

rgps.run(nb_iter=50, verbose=True)

gps = MBGPS(env,
            nb_steps=100,
            init_state=env.init(),
            init_action_sigma=100.,
            kl_bound=0.25)

gps.run(nb_iter=100, verbose=True)

rgps.compare(std_ctl=gps.ctl)
Пример #10
0
# @Date: 2019-06-16-18-38
# @Author: Hany Abdulsamad
# @Contact: [email protected]


import gym
from trajopt.gps import MBGPS


# double cartpole env
env = gym.make('DoubleCartpole-TO-v0')
env._max_episode_steps = 200

alg = MBGPS(env, nb_steps=200,
            kl_bound=10.,
            init_ctl_sigma=5.0,
            activation=range(150, 200))

# run gps
trace = alg.run()

# plot dists
alg.plot()

# plot objective
import matplotlib.pyplot as plt

plt.figure()
plt.plot(trace)
plt.show()
Пример #11
0
warnings.filterwarnings("ignore")

# lqr task
env = gym.make('LQR-TO-v0')
env.env._max_episode_steps = 100

dm_state = env.unwrapped.dm_state
dm_act = env.unwrapped.dm_act

mbgps = MBGPS(env,
              nb_steps=100,
              init_state=env.init(),
              init_action_sigma=100.,
              kl_bound=5.)

mbgps.run(nb_iter=15, verbose=True)

riccati = Riccati(env, nb_steps=100, init_state=env.init())

riccati.run()

np.random.seed(1337)
env.seed(1337)
gps_data = mbgps.rollout(250, stoch=False)

np.random.seed(1337)
env.seed(1337)
riccati_data = riccati.rollout(250)

print('GPS Cost: ', np.mean(np.sum(gps_data['c'], axis=0)), ', Riccati Cost',
      np.mean(np.sum(riccati_data['c'], axis=0)))
Пример #12
0
# @Date: 2019-06-16-18-38
# @Author: Hany Abdulsamad
# @Contact: [email protected]


import gym
from trajopt.gps import MBGPS


# cartpole env
env = gym.make('Cartpole-TO-v1')
env._max_episode_steps = 501

alg = MBGPS(env, nb_steps=200,
            kl_bound=1.,
            init_ctl_sigma=5.0,
            activation=range(150, 200))

# run gps
trace = alg.run(nb_iter=50)

# plot dists
alg.plot()

# plot objective
import matplotlib.pyplot as plt

plt.figure()
plt.plot(trace)
plt.show()