# from agents.policy_gradient_agents.PPO import PPO # from environments.Four_Rooms_Environment import Four_Rooms_Environment # from agents.hierarchical_agents.SNN_HRL import SNN_HRL # from agents.actor_critic_agents.TD3 import TD3 from agents.Trainer import Trainer from utilities.data_structures.Config import Config from agents.DQN_agents.DQN import DQN import numpy as np import torch random.seed(1) np.random.seed(1) torch.manual_seed(1) config = Config() config.seed = 1 config.environment = Bit_Flipping_Environment(4) config.num_episodes_to_run = 2000 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.visualise_individual_results = False config.visualise_overall_agent_results = False config.randomise_random_seed = False config.runs_per_agent = 1 config.use_GPU = False config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.005, "batch_size": 64, "buffer_size": 40000, "epsilon": 0.1,
'SAC': SAC, 'DDQN': DDQN, 'SAC_Discrete': SAC_Discrete, 'DIAYN': DIAYN, 'DBH': DBH } if args.rts: config.rts() AGENTS = [DDQN, SAC_Discrete, DIAYN, DBH] else: AGENTS = [str_to_obj[i] for i in args.algorithms] config.environment_name = args.environment config.environment = gym.make(config.environment_name) config.eval = args.evaluate config.seed = args.seed config.num_episodes_to_run = args.num_episodes config.runs_per_agent = args.n_trials config.use_GPU = args.use_GPU config.save_results = args.save_results config.run_prefix = args.run_prefix config.train_existing_model = args.tem config.save_directory = 'results/{}'.format(config.run_prefix) if not os.path.exists(config.save_directory): os.makedirs(config.save_directory) config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 linear_hidden_units = [128, 128, 32] learning_rate = 0.01 buffer_size = 100000