def main(): """ Train and evaluate agent. This function basically does the same as the checker that evaluates your agent. You can use it for debugging your agent and visualizing what it does. """ from lunar_lander import LunarLander from gym.wrappers.monitoring.video_recorder import VideoRecorder env = LunarLander() agent = Agent(env) agent.train() rec = VideoRecorder(env, "policy.mp4") episode_length = 300 n_eval = 100 returns = [] print("Evaluating agent...") for i in range(n_eval): print(f"Testing policy: episode {i+1}/{n_eval}") state = env.reset() cumulative_return = 0 # The environment will set terminal to True if an episode is done. terminal = False env.reset() for t in range(episode_length): # if i <= 10: # rec.capture_frame() # Taking an action in the environment action = agent.get_action( torch.as_tensor(state, dtype=torch.float32)) state, reward, terminal = env.transition(action) cumulative_return += reward if terminal: break returns.append(cumulative_return) print(f"Achieved {cumulative_return:.2f} return.") # if i == 10: # rec.close() # print("Saved video of 10 episodes to 'policy.mp4'.") env.close() print(f"Average return: {np.mean(returns):.2f}")
"action": [], "terminal": [], } env = LunarLander() env.render() env.viewer.window.on_key_press = key_press env.viewer.window.on_key_release = key_release a = np.array([0]) episode_rewards = [] steps = 0 while True: episode_reward = 0 state = env.reset() state_img = env.render( mode="rgb_array")[::4, ::4, :] # downsampling (every 4th pixel). while True: next_state, r, done, info = env.step(a[0]) next_state_img = env.render(mode="rgb_array")[::4, ::4, :] episode_reward += r samples["state"].append(state) # state has shape (8,) samples["state_img"].append( state_img) # state_img has shape (100, 150, 3) samples["action"].append(np.array(a)) samples["next_state"].append(next_state)
def plot_io_bounds(x, y, vx, vy, theta, omega, a, steps, discrete=True): import matplotlib.pyplot as plt statebox = [x, y, vx, vy, theta, omega] centerstate = [box[0] + .5 * (box[1] - box[0]) for box in statebox] envstate = [i for i in centerstate] # Zero order hold on actions if needed if discrete and isinstance(a, int): a = a * np.ones(steps, dtype=np.int32) elif not discrete: a = [np.array(a) for i in range(steps)] # System IDed model trajectory centerstatehist = [centerstate] for i in range(steps): centerstate = lander_dynamics(*centerstate, a=a[i], discrete=discrete) centerstatehist.append(centerstate) # Actual openai gym model trajectory envstatehist = [envstate] if discrete: from lunar_lander import LunarLander env = LunarLander() else: from lunar_lander import LunarLanderContinuous env = LunarLanderContinuous() s = env.reset(envstate) for i in range(steps): s, _, _, _ = env.step(a[i]) envstatehist.append(s[0:6]) # Overapproximated trajectory stateboxhist = [statebox] for i in range(steps): statebox = lander_box_dynamics(*statebox, a=a[i], steps=1, discrete=discrete) stateboxhist.append(statebox) centerstatehist = np.array(centerstatehist) envstatehist = np.array(envstatehist) stateboxhist = np.array(stateboxhist) t = np.linspace(0, steps, steps + 1) fig, axs = plt.subplots(6, 1, figsize=(4, 9)) # fig.set_size_inches(5,7,forward=True) limits = [[-1, 1], [0, 1], [-1, 1], [-1, 1], [-np.pi / 3, np.pi / 3], [-.5, .5]] for i in range(6): axs[i].fill_between(t, stateboxhist[:, i, 0], stateboxhist[:, i, 1], alpha=0.3) axs[i].plot(centerstatehist[:, i], 'r') axs[i].plot(envstatehist[:, i], 'b.') axs[i].set_ylim(bottom=limits[i][0], top=limits[i][1]) axs[i].set_yticks(np.linspace(limits[i][0], limits[i][1], 17), minor=True) axs[i].grid(which='minor', alpha=.4) axs[0].set_title('Action {0}'.format(a)) plt.show()
from lunar_lander import LunarLander, FPS import random, time, getopt import numpy as np np.set_printoptions(precision=3, suppress=True) if __name__ == "__main__": env = LunarLander() dt = 1.0 / FPS obs = env.reset() env.render() done = False t = 0 t0 = time.time() while not done and t < 500: a = random.randint(0, 3) s1, r, done, info = env.step(a) print(s1, r, info) #time.sleep(dt*10) env.render() t += 1 print("rate:", t / (time.time() - t0))
from lunar_lander import demo_heuristic_lander, LunarLander total_reward_array = [] myLunarLander = LunarLander() dorender = True num_iters = 100 isdumb = True for i in range(0, num_iters): end_reward = demo_heuristic_lander(myLunarLander, render=dorender, dumb=isdumb) total_reward_array.append(end_reward) myLunarLander.reset() print("Iteration: " + str(i)) print("Average Rewards Over " + str(num_iters) + " trials ") average_reward = sum(total_reward_array) / len(total_reward_array) print(average_reward)