from hierarchical_agents.SNN_HRL import SNN_HRL from agents.Trainer import Trainer from utilities.data_structures.Config import Config from agents.DQN_agents.DQN import DQN from agents.hierarchical_agents.h_DQN import h_DQN from environments.Long_Corridor_Environment import Long_Corridor_Environment config = Config() config.seed = 1 config.env_parameters = {"stochasticity_of_action_right": 0.5} config.environment = Long_Corridor_Environment( stochasticity_of_action_right=config. env_parameters["stochasticity_of_action_right"]) config.num_episodes_to_run = 10000 config.file_to_save_data_results = "Data_and_Graphs/Long_Corridor_Results_Data.pkl" config.file_to_save_results_graph = "Data_and_Graphs/Long_Corridor_Results_Graph.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "h_DQN": { "CONTROLLER": { "batch_size": 256,
from gym.wrappers import FlattenDictWrapper from agents.DQN_agents.DQN_HER import DQN_HER from Bit_Flipping_Environment import Bit_Flipping_Environment from agents.Trainer import Trainer from utilities.data_structures.Config import Config from agents.DQN_agents.DQN import DQN config = Config() config.seed = 1 config.environment = Bit_Flipping_Environment(14) config.num_episodes_to_run = 4500 config.file_to_save_data_results = None #"Data_and_Graphs/Bit_Flipping_Results_Data.pkl" config.file_to_save_results_graph = None #"Data_and_Graphs/Bit_Flipping_Results_Graph.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.001, "batch_size": 128, "buffer_size": 100000, "epsilon_decay_rate_denominator": 150, "discount_rate": 0.999, "incremental_td_error": 1e-8,
'DDQN': DDQN, 'SAC_Discrete': SAC_Discrete, 'DIAYN': DIAYN, 'DBH': DBH } if args.rts: config.rts() AGENTS = [DDQN, SAC_Discrete, DIAYN, DBH] else: AGENTS = [str_to_obj[i] for i in args.algorithms] config.environment_name = args.environment config.environment = gym.make(config.environment_name) config.eval = args.evaluate config.seed = args.seed config.num_episodes_to_run = args.num_episodes config.runs_per_agent = args.n_trials config.use_GPU = args.use_GPU config.save_results = args.save_results config.run_prefix = args.run_prefix config.train_existing_model = args.tem config.save_directory = 'results/{}'.format(config.run_prefix) if not os.path.exists(config.save_directory): os.makedirs(config.save_directory) config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 linear_hidden_units = [128, 128, 32] learning_rate = 0.01 buffer_size = 100000 batch_size = 256
from environments.isc_environments.SimpleISC import SimpleISC from utilities.data_structures.Config import Config from agents.Trainer import Trainer from agents.DQN_agents import DQN, DDQN, Dueling_DDQN, DDQN_With_Prioritised_Experience_Replay, DRQN import wandb from gym.core import Wrapper from torch.cuda import is_available config = Config() config.environment = Wrapper(SimpleISC(mode="DISCRETE")) config.num_episodes_to_run = 5_000 config.file_to_save_data_results = "results/data_and_graphs/isc/IllinoisSolarCar_Results_Data.pkl" config.runs_per_agent = 1 config.use_GPU = is_available() config.overwrite_existing_results_file = True config.randomise_random_seed = False config.save_model = False config.model = None config.seed = 0 config.debug_mode = True config.wandb_log = True config.wandb_job_type = "testing" config.wandb_entity = "rafael_piacsek" config.wandb_tags = ["initial testing"] config.wandb_model_log_freq = 1_000
from os.path import dirname, abspath sys.path.append(dirname(dirname(abspath(__file__)))) import gym from agents.actor_critic_agents.A2C import A2C from agents.actor_critic_agents.A3C import A3C from agents.Trainer import Trainer from utilities.data_structures.Config import Config config = Config() config.seed = 1 config.environment = gym.make("gym_boxworld:boxworldRandomSmall-v0") config.num_episodes_to_run = int(1e3) config.file_to_save_data_results = "results/data_and_graphs/Boxworld_Results_Data.pkl" config.file_to_save_results_graph = "results/data_and_graphs/Boxworld_Results_Graph.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = True config.hyperparameters = { "Actor_Critic_Agents": { "learning_rate": 0.0001,