def test_get_mean_and_standard_deviation_difference_results(): """Tests that get_mean_and_standard_deviation_difference_results method produces correct output""" results = [[1.0, 2.0, 3.0], [5.0, -33.0, 55.0], [2.5, 2.5, 2.5]] mean_results = [ np.mean([1.0, 5.0, 2.5]), np.mean([2.0, -33.0, 2.5]), np.mean([3.0, 55.0, 2.5]) ] std_results = [ np.std([1.0, 5.0, 2.5]), np.std([2.0, -33.0, 2.5]), np.std([3.0, 55.0, 2.5]) ] mean_minus_1_std = [ mean - std_val for mean, std_val in zip(mean_results, std_results) ] mean_plus_1_std = [ mean + std_val for mean, std_val in zip(mean_results, std_results) ] config = Config() config.standard_deviation_results = 1.0 trainer = Trainer(config, []) mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results( results) assert mean_results == mean_results_guess assert mean_minus_1_std == mean_minus_x_std_guess assert mean_plus_1_std == mean_plus_x_std_guess config.standard_deviation_results = 3.0 trainer = Trainer(config, []) mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results( results) mean_plus_3_std = [ mean + 3.0 * std_val for mean, std_val in zip(mean_results, std_results) ] mean_minus_3_std = [ mean - 3.0 * std_val for mean, std_val in zip(mean_results, std_results) ] assert mean_results == mean_results_guess assert mean_minus_3_std == mean_minus_x_std_guess assert mean_plus_3_std == mean_plus_x_std_guess
def test_add_default_hyperparameters_if_not_overriden(): """Tests that add_default_hyperparameters_if_not_overriden function works""" config = Config() default_hyperparameter_set = {'output_activation': 'None', 'hidden_activations': 'relu', 'dropout': 0.0, 'initialiser': 'default', 'batch_norm': False, 'columns_of_data_to_be_embedded': [], 'embedding_dimensions': [], 'y_range': (), } alternative_hyperparmater_set = {'output_activation': "YESSS!!", 'hidden_activations': 'relu', 'dropout': 0.0, 'initialiser': 'default', 'batch_norm': False, 'columns_of_data_to_be_embedded': [], 'embedding_dimensions': [], 'y_range': (), "helo": 20} config.hyperparameters = {"DQN_Agents": {}} config.hyperparameters = Trainer(config, []).add_default_hyperparameters_if_not_overriden(config.hyperparameters) assert config.hyperparameters == {"DQN_Agents": default_hyperparameter_set} config.hyperparameters = {"DQN_Agents": {}, "Test": {}} config.hyperparameters = Trainer(config, []).add_default_hyperparameters_if_not_overriden(config.hyperparameters) assert config.hyperparameters == {"DQN_Agents": default_hyperparameter_set, "Test": default_hyperparameter_set} config.hyperparameters = {"DQN_Agents": {"helo": 20, "output_activation": "YESSS!!"}} config.hyperparameters = Trainer(config, []).add_default_hyperparameters_if_not_overriden(config.hyperparameters) assert config.hyperparameters == {"DQN_Agents": alternative_hyperparmater_set}
"""Tests for the hierarchical RL agent HIRO""" import copy import gym import random import numpy as np import torch from Hierarchical_Agents.HIRO import HIRO from Utilities.Data_Structures.Config import Config random.seed(1) np.random.seed(1) torch.manual_seed(1) config = Config() config.seed = 1 config.environment = gym.make("Pendulum-v0") config.num_episodes_to_run = 1500 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False
from Hierarchical_Agents.SNN_HRL import SNN_HRL from Agents.Trainer import Trainer from Utilities.Data_Structures.Config import Config from Agents.DQN_Agents.DQN import DQN from Agents.Hierarchical_Agents.h_DQN import h_DQN from Environments.Long_Corridor_Environment import Long_Corridor_Environment config = Config() config.seed = 1 config.env_parameters = {"stochasticity_of_action_right": 0.5} config.environment = Long_Corridor_Environment( stochasticity_of_action_right=config. env_parameters["stochasticity_of_action_right"]) config.num_episodes_to_run = 10000 config.file_to_save_data_results = "Data_and_Graphs/Long_Corridor_Results_Data.pkl" config.file_to_save_results_graph = "Data_and_Graphs/Long_Corridor_Results_Graph.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "h_DQN": { "CONTROLLER": { "batch_size": 256,
from Agents.DQN_Agents.DQN_HER_Agent import DQN_HER_Agent from Environments.Other_Enrivonments.Bit_Flipping_Environment import Bit_Flipping_Environment from Utilities.Data_Structures.Config import Config from Agents.DQN_Agents.DQN_Agent import DQN_Agent from Utilities.Utility_Functions import run_games_for_agents config = Config() config.seed = 100 config.environment = Bit_Flipping_Environment(14) config.max_episodes_to_run = 6000 config.file_to_save_data_results = "Results_Data.pkl" config.file_to_save_data_results_graph = "Results_Graph.png" config.visualise_individual_results = True config.visualise_overall_results = True config.runs_per_agent = 3 config.use_GPU = False config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.001, "batch_size": 128, "buffer_size": 100000, "epsilon": 0.1, "epsilon_decay_rate_denominator": 500, "discount_rate": 0.98, "tau": 0.1, "alpha_prioritised_replay": 0.6, "beta_prioritised_replay": 0.4, "incremental_td_error": 1e-8, "update_every_n_steps": 1, "nn_layers": 2,
import gym from A3C import A3C from Agents.Policy_Gradient_Agents.PPO import PPO from Agents.Trainer import Trainer from Utilities.Data_Structures.Config import Config from Agents.DQN_Agents.DDQN import DDQN from Agents.DQN_Agents.DDQN_With_Prioritised_Experience_Replay import DDQN_With_Prioritised_Experience_Replay from Agents.DQN_Agents.DQN import DQN from Agents.DQN_Agents.DQN_With_Fixed_Q_Targets import DQN_With_Fixed_Q_Targets config = Config() config.seed = 1 config.environment = gym.make("CartPole-v0") config.num_episodes_to_run = 1500 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.005, "batch_size": 128,
from Agents.DQN_Agents.DDQN import DDQN from Agents.DQN_Agents.DDQN_With_Prioritised_Experience_Replay import DDQN_With_Prioritised_Experience_Replay from Agents.DQN_Agents.DQN_With_Fixed_Q_Targets import DQN_With_Fixed_Q_Targets from Environments.Bit_Flipping_Environment import Bit_Flipping_Environment from Agents.Policy_Gradient_Agents.PPO import PPO from Trainer import Trainer from Utilities.Data_Structures.Config import Config from Agents.DQN_Agents.DQN import DQN import numpy as np import torch random.seed(1) np.random.seed(1) torch.manual_seed(1) config = Config() config.seed = 1 config.environment = Bit_Flipping_Environment(4) config.num_episodes_to_run = 1 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.visualise_individual_results = False config.visualise_overall_agent_results = False config.randomise_random_seed = False config.runs_per_agent = 1 config.use_GPU = False config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.005,
from Environments.Bit_Flipping_Environment import Bit_Flipping_Environment from Agents.Policy_Gradient_Agents.PPO import PPO from Four_Rooms_Environment import Four_Rooms_Environment from Hierarchical_Agents.SNN_HRL import SNN_HRL from TD3 import TD3 from Trainer import Trainer from Utilities.Data_Structures.Config import Config from Agents.DQN_Agents.DQN import DQN import numpy as np import torch random.seed(1) np.random.seed(1) torch.manual_seed(1) config = Config() config.seed = 1 config.environment = Bit_Flipping_Environment(4) config.num_episodes_to_run = 2000 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.visualise_individual_results = False config.visualise_overall_agent_results = False config.randomise_random_seed = False config.runs_per_agent = 1 config.use_GPU = False config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.005, "batch_size": 64, "buffer_size": 40000,
import gym from Agents.Policy_Gradient_Agents.PPO import PPO from Agents.Actor_Critic_Agents.DDPG import DDPG from TD3 import TD3 from Agents.Trainer import Trainer from Utilities.Data_Structures.Config import Config config = Config() config.seed = 1 config.environment = gym.make("MountainCarContinuous-v0") config.num_episodes_to_run = 600 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "Policy_Gradient_Agents": { "learning_rate": 0.05, "linear_hidden_units": [30, 15], "final_layer_activation": "TANH",
from A3C import A3C from Agents.DQN_Agents.DQN_HER import DQN_HER from DDQN import DDQN from Environments.Four_Rooms_Environment import Four_Rooms_Environment from Hierarchical_Agents.SNN_HRL import SNN_HRL from Agents.Trainer import Trainer from Utilities.Data_Structures.Config import Config from Agents.DQN_Agents.DQN import DQN config = Config() config.seed = 1 config.environment = Four_Rooms_Environment( 15, 15, stochastic_actions_probability=0.25, random_start_user_place=True, random_goal_place=False) config.num_episodes_to_run = 1000 config.file_to_save_data_results = "Data_and_Graphs/Four_Rooms.pkl" config.file_to_save_results_graph = "Data_and_Graphs/Four_Rooms.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False
from Agents.Trainer import Trainer from DDPG import DDPG from Hierarchical_Agents.HIRO import HIRO from Utilities.Data_Structures.Config import Config import gym config = Config() config.seed = 1 config.environment = gym.make("Hopper-v2") config.num_episodes_to_run = 1500 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False DDPG_hyperparameters = { # hyperparameters taken from https://arxiv.org/pdf/1802.09477.pdf "Actor": { "learning_rate": 0.001, "linear_hidden_units": [30, 20], "final_layer_activation": "TANH", "batch_norm": False, "tau": 0.01, "gradient_clipping_norm": 5 },
import gym from Hierarchical_Agents.SNN_HRL import SNN_HRL from Agents.Trainer import Trainer from Utilities.Data_Structures.Config import Config from Agents.DQN_Agents.DQN import DQN from Agents.Hierarchical_Agents.h_DQN import h_DQN config = Config() config.seed = 1 config.environment = gym.make("Taxi-v2") config.env_parameters = {} config.num_episodes_to_run = 10000 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "h_DQN": { "CONTROLLER": { "batch_size": 256, "learning_rate":
from Agents.Policy_Gradient_Agents.PPO_Agent import PPO_Agent from Utilities.Data_Structures.Config import Config from Agents.DQN_Agents.DDQN_Agent import DDQN_Agent from Agents.DQN_Agents.DDQN_With_Prioritised_Experience_Replay import DDQN_With_Prioritised_Experience_Replay from Agents.DQN_Agents.DQN_Agent import DQN_Agent from Agents.DQN_Agents.DQN_Agent_With_Fixed_Q_Targets import DQN_Agent_With_Fixed_Q_Targets from Environments.Open_AI_Gym_Environments.Cart_Pole_Environment import Cart_Pole_Environment from Agents.Policy_Gradient_Agents.REINFORCE_Agent import REINFORCE_Agent from Agents.Stochastic_Policy_Search_Agents.Genetic_Agent import Genetic_Agent from Agents.Stochastic_Policy_Search_Agents.Hill_Climbing_Agent import Hill_Climbing_Agent from Utilities.Utility_Functions import run_games_for_agents config = Config() config.seed = 1 config.environment = Cart_Pole_Environment() config.max_episodes_to_run = 2000 config.file_to_save_data_results = "Results_Data.pkl" config.file_to_save_data_results_graph = "Results_Graph.png" config.visualise_individual_results = True config.visualise_overall_results = True config.runs_per_agent = 1 config.use_GPU = False config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.005, "batch_size": 256, "buffer_size": 40000, "epsilon": 0.1, "epsilon_decay_rate_denominator": 200, "discount_rate": 0.99,
from gym.wrappers import FlattenDictWrapper from Agents.DQN_Agents.DQN_HER import DQN_HER from Bit_Flipping_Environment import Bit_Flipping_Environment from Agents.Trainer import Trainer from Utilities.Data_Structures.Config import Config from Agents.DQN_Agents.DQN import DQN config = Config() config.seed = 1 config.environment = Bit_Flipping_Environment(14) config.num_episodes_to_run = 4500 config.file_to_save_data_results = None #"Data_and_Graphs/Bit_Flipping_Results_Data.pkl" config.file_to_save_results_graph = None #"Data_and_Graphs/Bit_Flipping_Results_Graph.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.001, "batch_size": 128, "buffer_size": 100000, "epsilon_decay_rate_denominator": 150, "discount_rate": 0.999,