def run_train(args): save_path = os.path.join(args.logdir, args.agent+'@cps'+str(args.stage)) args.save_path = init_trial_path(save_path) with open(os.path.join(args.save_path, 'args.json'), 'w') as f: json.dump(args.__dict__, f) print(args) config=init_config(args) agent = get_agent(args.agent) trainer = Trainer(config, [agent]) results = trainer.run_games_for_agents() agent_name = agent.agent_name res_age=np.array(results[agent_name][0][0]) score = (max(res_age)+np.mean(res_age[-10:]))/2 converge_step_1=np.where(res_age>0)[0] converge_step_2= np.where(res_age > 100)[0] if len(converge_step_1)>0: converge_step_1 =converge_step_1[0] else: converge_step_1 = -1 if len(converge_step_2)>0: converge_step_2 =converge_step_2[0] else: converge_step_2 = -1 print("Train Done") print(f"Agent={agent_name}, Score={score}, Path={args.save_path}") print(f"Final Reward={np.mean(res_age[-10:])}, Converge Step 0 ={converge_step_1}, Converge Step 100 ={converge_step_2},") agent = trainer.trained_agents[0][0] return agent
def test_agent_solve_bit_flipping_game(): AGENTS = [PPO, DDQN, DQN_With_Fixed_Q_Targets, DDQN_With_Prioritised_Experience_Replay, DQN, DQN_HER] trainer = Trainer(config, AGENTS) results = trainer.train() for agent in AGENTS: agent_results = results[agent.agent_name] agent_results = np.max(agent_results[0][1][50:]) assert agent_results >= 0.0, "Failed for {} -- score {}".format(agent.agent_name, agent_results)
def test_agent_solve_bit_flipping_game(): AGENTS = [DQN] trainer = Trainer(config, AGENTS) results = trainer.run_games_for_agents() for agent in AGENTS: agent_results = results[agent.agent_name] agent_results = np.max(agent_results[0][1][50:]) assert agent_results >= 0.0, "Failed for {} -- score {}".format( agent.agent_name, agent_results)
def test_agents_can_play_games_of_different_dimensions(): config.num_episodes_to_run = 10 config.hyperparameters["DQN_Agents"]["batch_size"] = 3 AGENTS = [ A2C, A3C, PPO, DDQN, DQN_With_Fixed_Q_Targets, DDQN_With_Prioritised_Experience_Replay, DQN ] trainer = Trainer(config, AGENTS) config.environment = gym.make("CartPole-v0") results = trainer.run_games_for_agents() for agent in AGENTS: assert agent.agent_name in results.keys() AGENTS = [SAC, TD3, PPO, DDPG] config.environment = gym.make("MountainCarContinuous-v0") trainer = Trainer(config, AGENTS) results = trainer.run_games_for_agents() for agent in AGENTS: assert agent.agent_name in results.keys() AGENTS = [DDQN, SNN_HRL] config.environment = Four_Rooms_Environment( 15, 15, stochastic_actions_probability=0.25, random_start_user_place=True, random_goal_place=False) trainer = Trainer(config, AGENTS) results = trainer.run_games_for_agents() for agent in AGENTS: assert agent.agent_name in results.keys()
def test_get_mean_and_standard_deviation_difference_results(): """Tests that get_mean_and_standard_deviation_difference_results method produces correct output""" results = [[1.0, 2.0, 3.0], [5.0, -33.0, 55.0], [2.5, 2.5, 2.5]] mean_results = [ np.mean([1.0, 5.0, 2.5]), np.mean([2.0, -33.0, 2.5]), np.mean([3.0, 55.0, 2.5]) ] std_results = [ np.std([1.0, 5.0, 2.5]), np.std([2.0, -33.0, 2.5]), np.std([3.0, 55.0, 2.5]) ] mean_minus_1_std = [ mean - std_val for mean, std_val in zip(mean_results, std_results) ] mean_plus_1_std = [ mean + std_val for mean, std_val in zip(mean_results, std_results) ] config = Config() config.standard_deviation_results = 1.0 trainer = Trainer(config, []) mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results( results) assert mean_results == mean_results_guess assert mean_minus_1_std == mean_minus_x_std_guess assert mean_plus_1_std == mean_plus_x_std_guess config.standard_deviation_results = 3.0 trainer = Trainer(config, []) mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results( results) mean_plus_3_std = [ mean + 3.0 * std_val for mean, std_val in zip(mean_results, std_results) ] mean_minus_3_std = [ mean - 3.0 * std_val for mean, std_val in zip(mean_results, std_results) ] assert mean_results == mean_results_guess assert mean_minus_3_std == mean_minus_x_std_guess assert mean_plus_3_std == mean_plus_x_std_guess
"batch_norm": False, "buffer_size": 1000000, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier" }, "min_steps_before_learning": 400, "batch_size": 64, "discount_rate": 0.99, "mu": 0.0, #for O-H noise "theta": 0.15, #for O-H noise "sigma": 0.25, #for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 1, "learning_updates_per_learning_session": 1, "automatically_tune_entropy_hyperparameter": True, "entropy_term_weight": None, "add_extra_noise": False, "do_evaluation_iterations": True } } if __name__ == "__main__": # turn it on if you want to test specific network saved test = False dictPath = None AGENTS = [SAC_Discrete] trainer = Trainer(config, AGENTS) trainer.run_games_for_agents(test, dictPath)
"batch_norm": False, "tau": 0.01, "gradient_clipping_norm": 5 }, "Critic": { "learning_rate": 0.01, "linear_hidden_units": [400, 300], "final_layer_activation": "None", "batch_norm": False, "buffer_size": 100000, "tau": 0.01, "gradient_clipping_norm": 5 }, "batch_size": 64, "discount_rate": 0.99, "mu": 0.0, # for O-H noise "theta": 0.15, # for O-H noise "sigma": 0.2, # for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 1, "learning_updates_per_learning_session": 1, "clip_rewards": False } } if __name__ == "__main__": AGENTS = [DDPG, HIRO] trainer = Trainer(config, AGENTS) trainer.run_games_for_agents()
"discount_rate": 0.99, "tau": 0.1, "alpha_prioritised_replay": 0.6, "beta_prioritised_replay": 0.4, "incremental_td_error": 1e-8, "update_every_n_steps": 3, "linear_hidden_units": [20, 20, 20], "final_layer_activation": "None", "batch_norm": False, "gradient_clipping_norm": 5, "HER_sample_proportion": 0.8, "clip_rewards": False } } trainer = Trainer(config, [DQN_HER]) config.hyperparameters = config.hyperparameters["DQN_Agents"] agent = DQN_HER(config) agent.reset_game() def test_initiation(): """Tests whether DQN_HER initiates correctly""" config.hyperparameters["batch_size"] = 64 agent = DQN_HER(config) agent.reset_game() assert agent.ordinary_buffer_batch_size == int(0.2 * 64) assert agent.HER_buffer_batch_size == 64 - int(0.2 * 64) assert agent.q_network_local.input_dim == 8
"batch_size": 128, "buffer_size": 100000, "epsilon": 1.0, "epsilon_decay_rate_denominator": 150, "discount_rate": 0.999, "alpha_prioritised_replay": 0.6, "beta_prioritised_replay": 0.1, "incremental_td_error": 1e-8, "update_every_n_steps": 15, "tau": 1e-2, "linear_hidden_units": [256, 256], "final_layer_activation": "softmax", # "y_range": (-1, 14), "batch_norm": False, "gradient_clipping_norm": 5, "HER_sample_proportion": 0.8, "learning_iterations": 1, "clip_rewards": False } } config.model = FCNN() if __name__== '__main__': AGENTS = [DQN, DRQN, ]#DDQN, Dueling_DDQN, DDQN_With_Prioritised_Experience_Replay] trainer = Trainer(config, AGENTS) trainer.train()
"final_layer_activation": None, "batch_norm": False, "buffer_size": 1000000, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier" }, "min_steps_before_learning": 400, "batch_size": 256, "discount_rate": 0.99, "mu": 0.0, #for O-H noise "theta": 0.15, #for O-H noise "sigma": 0.25, #for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 1, "learning_updates_per_learning_session": 1, "automatically_tune_entropy_hyperparameter": True, "entropy_term_weight": None, "add_extra_noise": False, "do_evaluation_iterations": True } } if __name__ == "__main__": AGENTS = [DQN] trainer = Trainer(config, AGENTS) trainer.render_games_for_pretrained_agent(DQN, 'Models\\DQN_local_network.pt', False)
config.hyperparameters = dict( # y_range=(-1, 14), HER_sample_proportion=0.8, alpha_prioritised_replay=0.6, batch_norm=False, batch_size=64, beta_prioritised_replay=0.1, buffer_size=1_000_000, clip_rewards=False, discount_rate=0.999, epsilon=1.0, epsilon_decay_rate_denominator=(config.num_episodes_to_run * 0.01) // (1 - 0.01), final_layer_activation="softmax", gradient_clipping_norm=5, incremental_td_error=1e-8, learning_iterations=1, learning_rate=0.01, random_episodes_to_run=0, tau=1e-2, update_every_n_steps=15, num_hidden_layers=2, hidden_layer_size=256, linear_hidden_units=None, # Either set this, or the previous two. ) if __name__ == '__main__': trainer = Trainer(config, DQN) trainer.train()
"Actor": { "learning_rate": 0.0003, "linear_hidden_units": [128, 128, 32], "final_layer_activation": None, "batch_norm": False, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier", }, "Critic": { "learning_rate": 0.0003, "linear_hidden_units": [128, 128, 32], "final_layer_activation": None, "batch_norm": False, "buffer_size": 1000000, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier", }, } } if __name__ == "__main__": print('rerun with -h flag to see possible args or check the read me file') trainer = Trainer(config, AGENTS) if config.eval: trainer.eval_model(config.num_episodes_to_run) else: trainer.run_games_for_agents()
import argparse import sys from os.path import dirname, abspath sys.path.append(dirname(dirname(abspath(__file__)))) from agents.Trainer import Trainer from utilities.data_structures.Config import Config if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--data_path', help='path of pkl data file', required=True) parser.add_argument('--save_path', help='path of saved result', required=True) parser.add_argument('--title', help='title of result image', default='Result') args = parser.parse_args() pkl_path = args.data_path save_path = args.save_path config = Config() trainer = Trainer(config=config, agents=None) trainer.visualise_preexisting_results(save_image_path=save_path, data_path=pkl_path, title='whatever')