from hierarchical_agents.SNN_HRL import SNN_HRL
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config
from agents.DQN_agents.DQN import DQN
from agents.hierarchical_agents.h_DQN import h_DQN
from environments.Long_Corridor_Environment import Long_Corridor_Environment

config = Config()
config.seed = 1
config.env_parameters = {"stochasticity_of_action_right": 0.5}
config.environment = Long_Corridor_Environment(
    stochasticity_of_action_right=config.
    env_parameters["stochasticity_of_action_right"])
config.num_episodes_to_run = 10000
config.file_to_save_data_results = "Data_and_Graphs/Long_Corridor_Results_Data.pkl"
config.file_to_save_results_graph = "Data_and_Graphs/Long_Corridor_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

config.hyperparameters = {
    "h_DQN": {
        "CONTROLLER": {
            "batch_size":
            256,
示例#2
0
from gym.wrappers import FlattenDictWrapper
from agents.DQN_agents.DQN_HER import DQN_HER
from Bit_Flipping_Environment import Bit_Flipping_Environment
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config
from agents.DQN_agents.DQN import DQN

config = Config()
config.seed = 1
config.environment = Bit_Flipping_Environment(14)
config.num_episodes_to_run = 4500
config.file_to_save_data_results = None  #"Data_and_Graphs/Bit_Flipping_Results_Data.pkl"
config.file_to_save_results_graph = None  #"Data_and_Graphs/Bit_Flipping_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.001,
        "batch_size": 128,
        "buffer_size": 100000,
        "epsilon_decay_rate_denominator": 150,
        "discount_rate": 0.999,
        "incremental_td_error": 1e-8,
示例#3
0
    'DDQN': DDQN,
    'SAC_Discrete': SAC_Discrete,
    'DIAYN': DIAYN,
    'DBH': DBH
}
if args.rts:
    config.rts()
    AGENTS = [DDQN, SAC_Discrete, DIAYN, DBH]

else:
    AGENTS = [str_to_obj[i] for i in args.algorithms]
    config.environment_name = args.environment
    config.environment = gym.make(config.environment_name)
    config.eval = args.evaluate
    config.seed = args.seed
    config.num_episodes_to_run = args.num_episodes
    config.runs_per_agent = args.n_trials
    config.use_GPU = args.use_GPU
    config.save_results = args.save_results
    config.run_prefix = args.run_prefix
    config.train_existing_model = args.tem
    config.save_directory = 'results/{}'.format(config.run_prefix)
    if not os.path.exists(config.save_directory):
        os.makedirs(config.save_directory)
    config.visualise_overall_agent_results = True
    config.standard_deviation_results = 1.0

linear_hidden_units = [128, 128, 32]
learning_rate = 0.01
buffer_size = 100000
batch_size = 256
from environments.isc_environments.SimpleISC import SimpleISC
from utilities.data_structures.Config import Config
from agents.Trainer import Trainer

from agents.DQN_agents import DQN, DDQN, Dueling_DDQN, DDQN_With_Prioritised_Experience_Replay, DRQN

import wandb
from gym.core import Wrapper
from torch.cuda import is_available

config = Config()

config.environment = Wrapper(SimpleISC(mode="DISCRETE"))
config.num_episodes_to_run = 5_000

config.file_to_save_data_results = "results/data_and_graphs/isc/IllinoisSolarCar_Results_Data.pkl"
config.runs_per_agent = 1
config.use_GPU = is_available()
config.overwrite_existing_results_file = True
config.randomise_random_seed = False
config.save_model = False
config.model = None
config.seed = 0

config.debug_mode = True
config.wandb_log = True
config.wandb_job_type = "testing"
config.wandb_entity = "rafael_piacsek"
config.wandb_tags = ["initial testing"]
config.wandb_model_log_freq = 1_000
示例#5
0
from os.path import dirname, abspath

sys.path.append(dirname(dirname(abspath(__file__))))

import gym

from agents.actor_critic_agents.A2C import A2C
from agents.actor_critic_agents.A3C import A3C

from agents.Trainer import Trainer
from utilities.data_structures.Config import Config

config = Config()
config.seed = 1
config.environment = gym.make("gym_boxworld:boxworldRandomSmall-v0")
config.num_episodes_to_run = int(1e3)
config.file_to_save_data_results = "results/data_and_graphs/Boxworld_Results_Data.pkl"
config.file_to_save_results_graph = "results/data_and_graphs/Boxworld_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = True

config.hyperparameters = {
    "Actor_Critic_Agents": {
        "learning_rate": 0.0001,