def trainer(fargs): trainer_id, args = fargs print("Trainer id", trainer_id, "started") # Create a Gym environment env = gym.make(args.env) # Set maximum episode length if args.episode_steps is not None: env._max_episode_steps = args.episode_steps # Get dimensionalities of actions and observations action_space_dim = get_space_dim(env.action_space) observation_space_dim = get_space_dim(env.observation_space) # Instantiate agent and its policy policy = Policy(observation_space_dim, action_space_dim) agent = Agent(policy) training_history = train(agent, env, args.train_episodes, silent=True, train_run_id=trainer_id, early_stop=False) print("Trainer id", trainer_id, "finished") return training_history
def main(args): # Create a Gym environment env = gym.make(args.env) # Exercise 1 # TODO: For CartPole-v0 - maximum episode length env._max_episode_steps = 1000 # Get dimensionalities of actions and observations action_space_dim = get_space_dim(env.action_space) observation_space_dim = get_space_dim(env.observation_space) # Instantiate agent and its policy policy = Policy(observation_space_dim, action_space_dim) agent = Agent(policy) # Print some stuff print("Environment:", args.env) print("Training device:", agent.train_device) print("Observation space dimensions:", observation_space_dim) print("Action space dimensions:", action_space_dim) # If no model was passed, train a policy from scratch. # Otherwise load the policy from the file and go directly to testing. if args.test is None: training_history = train(args.position, agent, env, args.train_episodes, False, args.render_training) # Save the model tt = str(datetime.datetime.now().date()) + "-" + str( datetime.datetime.now().hour) + "-" + str( datetime.datetime.now().minute) model_file = "%s_params.mdl" % (args.env + tt + "vel") torch.save(policy.state_dict(), model_file) print("Model saved to", model_file) # Plot rewards sns.lineplot(x="episode", y="reward", data=training_history) sns.lineplot(x="episode", y="mean_reward", data=training_history) plt.legend(["Reward", "100-episode average"]) plt.title("Reward history (%s)" % args.env) # time and day of plot plt.savefig("train_history" + tt + "vel" + ".jpg") plt.show() print("Training finished.") else: print("Loading model from", args.test, "...") state_dict = torch.load(args.test) policy.load_state_dict(state_dict) print("Testing...") test(args.position, agent, env, args.train_episodes, args.render_test)
def main(args): # Create a Gym environment env = gym.make(args.env) # Exercise 1 env._max_episode_steps = args.episode_length # Get dimensionalities of actions and observations action_space_dim = get_space_dim(env.action_space) observation_space_dim = get_space_dim(env.observation_space) # Instantiate agent and its policy policy = Policy(observation_space_dim, action_space_dim) agent = Agent(policy) # Print some stuff print("Environment:", args.env) print("Training device:", agent.train_device) print("Observation space dimensions:", observation_space_dim) print("Action space dimensions:", action_space_dim) # If no model was passed, train a policy from scratch. # Otherwise load the policy from the file and go directly to testing. if args.test is None: training_history = train(agent, env, args.train_episodes, False, args.render_training, x0=args.x0, args=args, policy=policy) # Save the model model_file = "%s_params.mdl" % args.env torch.save(policy.state_dict(), model_file) print("Model saved to", model_file) # Plot rewards sns.lineplot(x="episode", y="reward", data=training_history) sns.lineplot(x="episode", y="mean_reward", data=training_history) plt.legend(["Reward", "100-episode average"]) plt.title("Reward history (%s)" % args.env) plt.show() print("Training finished.") else: print("Loading model from", args.test, "...") state_dict = torch.load(args.test) policy.load_state_dict(state_dict) print("Testing...") test(agent, env, args.train_episodes, args.render_test, x0=args.x0)
# Let the agent do its magic (update the policy) agent.episode_finished(episode_number) # Training is finished - plot rewards plt.plot(reward_history) plt.plot(average_reward_history) plt.legend(["Reward", "100-episode average"]) plt.title("Reward history") plt.show() print("Training finished.") # Create a Gym environment env = CartPoleEnv() # For CartPole - maximum episode length env._max_episode_steps = 1000 # Get dimensionalities of actions and observations action_space_dim = get_space_dim(env.action_space) observation_space_dim = get_space_dim(env.observation_space) # Create the agent and the policy policy = Policy(observation_space_dim, action_space_dim) agent = Agent(policy) # Train train(args.train_episodes, agent) model_file = "%s_params.mdl" % args.env torch.save(policy.state_dict(), model_file) print("Model saved to", model_file)