from Actor_Critic_Agents.DDPG_Agent import DDPG_Agent from Data_Structures.Config import Config from Lunar_Lander_Continuous import Lunar_Lander_Continuous from Open_AI_Gym_Environments.Mountain_Car_Continuous_Environment import Mountain_Car_Continuous_Environment from PPO_Agent import PPO_Agent from Utility_Functions import run_games_for_agents config = Config() config.seed = 200 config.environment = Lunar_Lander_Continuous() config.max_episodes_to_run = 3000 config.file_to_save_data_results = "Results_Data.pkl" config.file_to_save_data_results_graph = "Results_Graph2.png" config.visualise_individual_results = False config.visualise_overall_results = True config.runs_per_agent = 10 config.hyperparameters = { "Policy_Gradient_Agents": { "learning_rate": 0.02, "nn_layers": 2, "nn_start_units": 20, "nn_unit_decay": 1.0, "final_layer_activation": "TANH", "learning_iterations_per_round": 10, "discount_rate": 0.99, "batch_norm": False, "clip_epsilon": 0.2, "episodes_per_learning_round": 7, "normalise_rewards": True, "gradient_clipping_norm": 5,
from Actor_Critic_Agents.DDPG_Agent import DDPG_Agent from DDPG_HER_Agent import DDPG_HER_Agent from Data_Structures.Config import Config from Fetch_Reach_Environment import Fetch_Reach_Environment from Utility_Functions import run_games_for_agents config = Config() config.seed = 100 config.environment = Fetch_Reach_Environment() config.max_episodes_to_run = 2000 config.file_to_save_data_results = "Results_Data.pkl" config.file_to_save_data_results_graph = "Results_Graph.png" config.visualise_individual_results = True config.visualise_overall_results = True config.runs_per_agent = 1 config.use_GPU = False config.hyperparameters = { "Actor_Critic_Agents": { "Actor": { "learning_rate": 0.001, "nn_layers": 5, "nn_start_units": 50, "nn_unit_decay": 1.0, "final_layer_activation": "TANH", "batch_norm": False, "tau": 0.01, "gradient_clipping_norm": 5 }, "Critic": { "learning_rate": 0.01,
import gym from Actor_Critic_Agents.DDPG import DDPG from Agents.Actor_Critic_Agents.DDPG_HER import DDPG_HER from Data_Structures.Config import Config from Agents.Trainer import Trainer config = Config() config.seed = 1 config.environment = gym.make("FetchReach-v1") config.num_episodes_to_run = 2 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "Actor_Critic_Agents": { "Actor": { "learning_rate": 0.001, "linear_hidden_units": [50, 50], "final_layer_activation": "TANH", "batch_norm": False, "tau": 0.01,
from PPO_Agent import PPO_Agent from Data_Structures.Config import Config from Agents.DQN_Agents.DDQN_Agent import DDQN_Agent from Agents.DQN_Agents.DDQN_With_Prioritised_Experience_Replay import DDQN_With_Prioritised_Experience_Replay from Agents.DQN_Agents.DQN_Agent import DQN_Agent from Agents.DQN_Agents.DQN_Agent_With_Fixed_Q_Targets import DQN_Agent_With_Fixed_Q_Targets from Environments.Open_AI_Gym_Environments.Cart_Pole_Environment import Cart_Pole_Environment from Agents.Policy_Gradient_Agents.REINFORCE_Agent import REINFORCE_Agent from Agents.Stochastic_Policy_Search_Agents.Genetic_Agent import Genetic_Agent from Agents.Stochastic_Policy_Search_Agents.Hill_Climbing_Agent import Hill_Climbing_Agent from Utilities.Utility_Functions import run_games_for_agents config = Config() config.seed = 100 config.environment = Cart_Pole_Environment() config.max_episodes_to_run = 2000 config.file_to_save_data_results = "Results_Data.pkl" config.file_to_save_data_results_graph = "Results_Graph.png" config.visualise_individual_results = True config.visualise_overall_results = True config.runs_per_agent = 1 config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.005, "batch_size": 256, "buffer_size": 40000, "epsilon": 0.1, "epsilon_decay_rate_denominator": 200, "discount_rate": 0.99, "tau": 0.1,