def create_job(kwargs): import warnings warnings.filterwarnings("ignore") # pendulum env env = gym.make('Pendulum-TO-v1') env._max_episode_steps = 100000 env.unwrapped.dt = 0.05 dm_state = env.observation_space.shape[0] dm_act = env.action_space.shape[0] horizon, nb_steps = 25, 100 state = np.zeros((dm_state, nb_steps + 1)) action = np.zeros((dm_act, nb_steps)) state[:, 0] = env.reset() for t in range(nb_steps): solver = iLQR(env, init_state=state[:, t], nb_steps=horizon, action_penalty=np.array([1e-5])) solver.run(nb_iter=10, verbose=False) action[:, t] = solver.uref[:, 0] state[:, t + 1], _, _, _ = env.step(action[:, t]) return state[:, :-1].T, action.T
warnings.filterwarnings("ignore") # lqr task env = gym.make('LQR-TO-v0') env._max_episode_steps = 100000 dm_state = env.observation_space.shape[0] dm_act = env.action_space.shape[0] horizon, nb_steps = 25, 100 state = np.zeros((dm_state, nb_steps + 1)) action = np.zeros((dm_act, nb_steps)) state[:, 0] = env.reset() for t in range(nb_steps): solver = iLQR(env, init_state=state[:, t], nb_steps=horizon) trace = solver.run(nb_iter=5, verbose=False) action[:, t] = solver.uref[:, 0] state[:, t + 1], _, _, _ = env.step(action[:, t]) print('Time Step:', t, 'Cost:', trace[-1]) import matplotlib.pyplot as plt plt.figure() plt.subplot(3, 1, 1) plt.plot(state[0, :], '-b') plt.subplot(3, 1, 2) plt.plot(state[1, :], '-b')
env._max_episode_steps = 100000 env.unwrapped._dt = 0.05 dm_state = env.observation_space.shape[0] dm_act = env.action_space.shape[0] horizon, nb_steps = 10, 150 state = np.zeros((dm_state, nb_steps + 1)) action = np.zeros((dm_act, nb_steps)) init_action = np.zeros((dm_act, horizon)) nb_iter = 5 state[:, 0] = env.reset() for t in range(nb_steps): solver = iLQR(env, init_state=state[:, t], init_action=None, nb_steps=horizon) trace = solver.run(nb_iter=nb_iter, verbose=False) _nominal_state = solver.xref _nominal_action = solver.uref action[:, t] = _nominal_action[:, 0] state[:, t + 1], _, _, _ = env.step(action[:, t]) init_action = np.hstack((_nominal_action[:, 1:], np.zeros((dm_act, 1)))) print('Time Step:', t, 'Cost:', trace[-1]) import matplotlib.pyplot as plt plt.figure()
# pendulum env env = gym.make('Pendulum-TO-v0') env._max_episode_steps = 100000 env.unwrapped.dt = 0.05 dm_state = env.observation_space.shape[0] dm_act = env.action_space.shape[0] horizon, nb_steps = 25, 100 state = np.zeros((dm_state, nb_steps + 1)) action = np.zeros((dm_act, nb_steps)) state[:, 0] = env.reset() for t in range(nb_steps): solver = iLQR(env, init_state=state[:, t], nb_steps=horizon, action_penalty=np.array([1e-5])) trace = solver.run(nb_iter=10, verbose=False) action[:, t] = solver.uref[:, 0] state[:, t + 1], _, _, _ = env.step(action[:, t]) print('Time Step:', t, 'Cost:', trace[-1]) import matplotlib.pyplot as plt plt.figure() plt.subplot(3, 1, 1) plt.plot(state[0, :], '-b') plt.subplot(3, 1, 2) plt.plot(state[1, :], '-b')
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Filename: pendulum.py # @Date: 2019-06-16-18-38 # @Author: Hany Abdulsamad # @Contact: [email protected] import gym from trajopt.ilqr import iLQR # pendulum env env = gym.make('Pendulum-TO-v0') env._max_episode_steps = 150 alg = iLQR(env, nb_steps=150, activation=range(100, 150)) # run iLQR trace = alg.run(nb_iter=25) # plot forward pass alg.plot() # plot objective import matplotlib.pyplot as plt plt.figure() plt.plot(trace) plt.show()
import gym from trajopt.ilqr import iLQR # cartpole env env = gym.make('Quanser-Cartpole-TO-v0') env._max_episode_steps = 500 alg = iLQR(env, nb_steps=500, activation={'shift': 450, 'mult': 2.}) # run iLQR trace = alg.run() # plot forward pass alg.plot() # plot objective import matplotlib.pyplot as plt plt.figure() plt.plot(trace) plt.show()
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Filename: cartpole.py # @Date: 2019-06-16-18-38 # @Author: Hany Abdulsamad # @Contact: [email protected] import gym from trajopt.ilqr import iLQR # cartpole env env = gym.make('Quanser-Qube-TO-v0') env._max_episode_steps = 200 alg = iLQR(env, nb_steps=200, activation=range(150, 200)) # run iLQR trace = alg.run() # plot forward pass alg.plot() # plot objective import matplotlib.pyplot as plt plt.figure() plt.plot(trace) plt.show()
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Filename: cartpole.py # @Date: 2019-06-16-18-38 # @Author: Hany Abdulsamad # @Contact: [email protected] import gym from trajopt.ilqr import iLQR # cartpole env env = gym.make('Cartpole-TO-v0') env._max_episode_steps = 700 alg = iLQR(env, nb_steps=700, activation=range(600, 700)) # run iLQR trace = alg.run() # plot forward pass alg.plot() # plot objective import matplotlib.pyplot as plt plt.figure() plt.plot(trace) plt.show()
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Filename: cartpole.py # @Date: 2019-06-16-18-38 # @Author: Hany Abdulsamad # @Contact: [email protected] import gym from trajopt.ilqr import iLQR # cartpole env env = gym.make('Quanser-Cartpole-TO-v0') env._max_episode_steps = 250 alg = iLQR(env, nb_steps=250, activation=range(200, 250)) # run iLQR trace = alg.run() # plot forward pass alg.plot() # plot objective import matplotlib.pyplot as plt plt.figure() plt.plot(trace) plt.show()
import gym from trajopt.ilqr import iLQR import warnings warnings.filterwarnings("ignore") # pendulum env env = gym.make('QQube-TO-v1') env._max_episode_steps = 500 alg = iLQR(env, nb_steps=500, activation={'shift': 250, 'mult': 0.01}) # run gps trace = alg.run(nb_iter=5, verbose=True) # plot dists alg.plot() # plot objective import matplotlib.pyplot as plt plt.figure() plt.plot(trace) plt.show() state, action, _ = alg.forward_pass(ctl=alg.ctl, alpha=1.) plt.figure() plt.subplot(5, 1, 1) plt.plot(state[0, :], '-b')
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Filename: lqr.py # @Date: 2019-06-16-18-38 # @Author: Hany Abdulsamad # @Contact: [email protected] import gym from trajopt.ilqr import iLQR # lqr task env = gym.make('LQR-TO-v0') env._max_episode_steps = 100 alg = iLQR(env, nb_steps=60, activation=range(60)) # run iLQR trace = alg.run() # plot forward pass alg.plot() # plot objective import matplotlib.pyplot as plt plt.figure() plt.plot(trace) plt.show()