import gym from agents.policy_gradient_agents.PPO import PPO from agents.actor_critic_agents.DDPG import DDPG from TD3 import TD3 from agents.Trainer import Trainer from utilities.data_structures.Config import Config config = Config() config.seed = 1 config.environment = gym.make("MountainCarContinuous-v0") config.num_episodes_to_run = 450 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "Policy_Gradient_Agents": { "learning_rate": 0.05, "linear_hidden_units": [30, 15], "final_layer_activation": "TANH", "learning_iterations_per_round": 10, "discount_rate": 0.9,
from agents.hierarchical_agents.SNN_HRL import SNN_HRL from agents.policy_gradient_agents.PPO import PPO from agents.policy_gradient_agents.REINFORCE import REINFORCE from environments.FaceDiscreete import FaceEnvironementDiscreete from agents.Trainer import Trainer from utilities.data_structures.Config import Config config = Config() config.seed = 1 config.environment = FaceEnvironementDiscreete( "../weights/blg_small_12_5e-06_5e-05_2_8_small_big_noisy_first_True_512") config.num_episodes_to_run = 5 config.file_to_save_data_results = "Data_and_Graphs/FaceDiscreete.pkl" config.file_to_save_results_graph = "Data_and_Graphs/FaceDiscreete.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 2 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False actor_critic_agent_hyperparameters = { "Actor": { "learning_rate": 0.0003, "linear_hidden_units": [64, 64],
from environments.Long_Corridor_Environment import Long_Corridor_Environment from models.DQN_agents.DQN import DQN from models.Trainer import Trainer from models.hierarchical_agents.SNN_HRL import SNN_HRL from models.hierarchical_agents.h_DQN import h_DQN from utilities.data_structures.Config import Config config = Config() config.seed = 1 config.env_parameters = {"stochasticity_of_action_right": 0.5} config.environment = Long_Corridor_Environment( stochasticity_of_action_right=config. env_parameters["stochasticity_of_action_right"]) config.num_episodes_to_run = 10000 config.file_to_save_data_results = "Data_and_Graphs/Long_Corridor_Results_Data.pkl" config.file_to_save_results_graph = "Data_and_Graphs/Long_Corridor_Results_Graph.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "h_DQN": { "CONTROLLER": { "batch_size": 256,
from agents.actor_critic_agents.SAC_Discrete import SAC_Discrete from agents.Trainer import Trainer from utilities.data_structures.Config import Config from environments.DMP_Env_1D_dynamic import deep_mobile_printing_1d1r config = Config() config.seed = 1 # 0: sin # 1: gaussian # 2: step config.environment = deep_mobile_printing_1d1r() config.num_episodes_to_run = 10000 config.file_to_save_data_results = "results/data_and_graphs/Cart_Pole_Results_Data.pkl" config.file_to_save_results_graph = "results/data_and_graphs/Cart_Pole_Results_Graph.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = True config.overwrite_existing_results_file = True config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "Actor_Critic_Agents": { "learning_rate": 0.005, "linear_hidden_units": [20, 10],
sys.path.append(dirname(dirname(abspath(__file__)))) import gym from agents.actor_critic_agents.A2C import A2C from agents.actor_critic_agents.A3C import A3C from agents.Trainer import Trainer from utilities.data_structures.Config import Config config = Config() config.seed = 1 config.environment = gym.make("gym_boxworld:boxworldRandomSmall-v0") config.num_episodes_to_run = int(1e3) config.file_to_save_data_results = "results/data_and_graphs/Boxworld_Results_Data.pkl" config.file_to_save_results_graph = "results/data_and_graphs/Boxworld_Results_Graph.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = True config.hyperparameters = { "Actor_Critic_Agents": { "learning_rate": 0.0001, # "linear_hidden_units": [20, 10],
from DDQN import DDQN from HRL import HRL from SAC_Discrete import SAC_Discrete from hierarchical_agents.SNN_HRL import SNN_HRL from agents.Trainer import Trainer from utilities.data_structures.Config import Config from agents.DQN_agents.DQN import DQN from agents.hierarchical_agents.h_DQN import h_DQN config = Config() config.seed = 1 config.environment = gym.make("Taxi-v2") config.env_parameters = {} config.num_episodes_to_run = 1000 config.file_to_save_data_results = "data_and_graphs/Taxi_data.pkl" config.file_to_save_results_graph = "data_and_graphs/Taxi_graph.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False linear_hidden_units = [32, 32] learning_rate = 0.001 buffer_size = 100000 batch_size = 256
config.seed = 1 config.environment = deep_mobile_printing_3d1r(plan_choose=PALN_CHOICE) config.num_episodes_to_run = 5000 config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = True config.GPU = "cuda:1" config.overwrite_existing_results_file = True config.randomise_random_seed = False config.save_model = False OUT_FILE_NAME = "SAC_3d_" + PLAN_NAME + "_seed_" + str(config.seed) config.save_model_path = "/mnt/NAS/home/WenyuHan/SNAC/SAC/3D/dynamic/" + OUT_FILE_NAME + "/" config.file_to_save_data_results = "/mnt/NAS/home/WenyuHan/SNAC/SAC/3D/dynamic/" + OUT_FILE_NAME + "/" + "Results_Data.pkl" config.file_to_save_results_graph = "/mnt/NAS/home/WenyuHan/SNAC/SAC/3D/dynamic/" + OUT_FILE_NAME + "/" + "Results_Graph.png" if os.path.exists(config.save_model_path) == False: os.makedirs(config.save_model_path) config.hyperparameters = { "Actor_Critic_Agents": { "learning_rate": 0.0005, "linear_hidden_units": [64, 256, 64], "final_layer_activation": ["SOFTMAX", None], "gradient_clipping_norm": 5.0, "discount_rate": 0.99, "epsilon_decay_rate_denominator": 1.0, "normalise_rewards": False, "exploration_worker_difference": 2.0, "clip_rewards": True,
if __name__ == '__main__': from utilities.data_structures.Config import Config ## envs import ## from environments.carla_enviroments import env_v1_ObstacleAvoidance # net = q_network_toa(n_action=4) # net.to('cuda') # input = torch.rand(size=(10, 3, 224, 224)).to('cuda') # q1, q2 = net(input) config = Config() config.seed = 1 config.environment = gym.make("ObstacleAvoidance-v0") config.num_episodes_to_run = 2000 config.file_to_save_data_results = "C:/my_project/Deep-Reinforcement-Learning-Algorithms-with-PyTorch/results/data_and_graphs/carla_obstacle_avoidance/data.pkl" config.file_to_save_results_graph = "C:/my_project/Deep-Reinforcement-Learning-Algorithms-with-PyTorch/results/data_and_graphs/carla_obstacle_avoidance/data.png" config.show_solution_score = False config.visualise_individual_results = True config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = True config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = True config.resume = True config.resume_path = '' config.backbone_pretrain = True
from gym.wrappers import FlattenDictWrapper from agents.DQN_agents.DQN_HER import DQN_HER from Bit_Flipping_Environment import Bit_Flipping_Environment from agents.Trainer import Trainer from utilities.data_structures.Config import Config from agents.DQN_agents.DQN import DQN config = Config() config.seed = 1 config.environment = Bit_Flipping_Environment(14) config.num_episodes_to_run = 4500 config.file_to_save_data_results = None #"Data_and_Graphs/Bit_Flipping_Results_Data.pkl" config.file_to_save_results_graph = None #"Data_and_Graphs/Bit_Flipping_Results_Graph.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.001, "batch_size": 128, "buffer_size": 100000, "epsilon_decay_rate_denominator": 150, "discount_rate": 0.999, "incremental_td_error": 1e-8,
import gym from agents.DQN_agents.DDQN import DDQN from agents.hierarchical_agents.HRL.HRL import HRL from agents.hierarchical_agents.HRL.Model_HRL import Model_HRL from agents.Trainer import Trainer from utilities.data_structures.Config import Config config = Config() config.seed = 1 config.environment = gym.make("CartPole-v0") config.env_parameters = {} config.num_episodes_to_run = 500 config.file_to_save_data_results = "data_and_graphs/hrl_experiments/Cart_Pole_data.pkl" config.file_to_save_results_graph = "data_and_graphs/hrl_experiments/Cart_Poke.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 10 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.load_model = False # Loss is not drawing a random sample! otherwise wouldnt jump around that much!! linear_hidden_units = [32, 32] learning_rate = 0.005 # 0.001 taxi
from agents.DQN_agents.DQN_HER import DQN_HER from environments.j2n6s300.HER_env_tf import j2n6s300_Environment from agents.Trainer import Trainer from utilities.data_structures.Config import Config from datetime import datetime now = datetime.now() # current date and time num_episodes_to_run = 500 eps_decay_rate_denom = round(num_episodes_to_run/6) config = Config() config.seed = 1 config.environment = j2n6s300_Environment() config.num_episodes_to_run = num_episodes_to_run config.file_to_save_data_results = "Data_and_Graphs/{}jaco.pkl".format(now.strftime("%Y-%m-%d_%H-%M-%S_")) config.file_to_save_results_graph = "Data_and_Graphs/{}jaco.png".format(now.strftime("%Y-%m-%d_%H-%M-%S_")) config.show_solution_score = False config.visualise_results_while_training = True config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = True config.overwrite_existing_results_file = False config.randomise_random_seed = True config.load_model = False config.load_model_path = "Models/.pt" config.save_model = False #config.save_model_path = "Models/{}model.pt".format(now.strftime("%Y-%m-%d_%H-%M-%S_")) config.save_model_path = "Models/DQN_HER_demo_curr.pt"
from agents.DQN_agents.Dueling_DDQN import Dueling_DDQN from agents.actor_critic_agents.SAC_Discrete import SAC_Discrete from agents.actor_critic_agents.A3C import A3C from agents.policy_gradient_agents.PPO import PPO from agents.Trainer import Trainer from utilities.data_structures.Config import Config from agents.DQN_agents.DDQN import DDQN from agents.DQN_agents.DDQN_With_Prioritised_Experience_Replay import DDQN_With_Prioritised_Experience_Replay from agents.DQN_agents.DQN import DQN from agents.DQN_agents.DQN_With_Fixed_Q_Targets import DQN_With_Fixed_Q_Targets config = Config() config.seed = 1 config.environment = gym.make("CartPole-v0") config.num_episodes_to_run = 450 config.file_to_save_data_results = "C:\my_project\Deep-Reinforcement-Learning-Algorithms-with-PyTorch\\results/data_and_graphs/Cart_Pole_Results_Data.pkl" config.file_to_save_results_graph = "C:\my_project\Deep-Reinforcement-Learning-Algorithms-with-PyTorch\\results/data_and_graphs/Cart_Pole_Results_Graph.png" config.show_solution_score = False config.visualise_individual_results = True config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = True config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = True config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.01,
config.resume = False config.resume_path = 'E:\\reinforcement-learning-based-driving-decision-in-Carla\\results\Models\ObstacleAvoidance-v0\DDQN with Prioritised Replay\\20200611150242\\rolling_score_68.0417.model' config.backbone_pretrain = False config.force_explore_mode = True config.force_explore_stare_e = 0.2 ## when the std of rolling score in last 10 window is smaller than this val, start explore mode config.force_explore_rate = 0.95 ## only when the current score bigger than 0.8*max(rolling score[-10:]), forece expolre ## data and graphs save dir ## data_results_root = os.path.join( os.path.dirname(__file__) + "/data_and_graphs/carla_obstacle_avoidance", config.log_base) while os.path.exists(data_results_root): data_results_root += '_' os.makedirs(data_results_root) config.file_to_save_data_results = os.path.join(data_results_root, "data.pkl") config.file_to_save_results_graph = os.path.join(data_results_root, "data.png") config.hyperparameters = { "DQN_Agents": { "learning_rate": 1e-1, "batch_size": 256, "buffer_size": 20000, "epsilon": 1.0, "epsilon_decay_rate_denominator": 1., "discount_rate": 0.9, "tau": 0.01, "alpha_prioritised_replay": 0.6, "beta_prioritised_replay": 0.1, "incremental_td_error": 1e-8, "update_every_n_steps": 1,
height = 15 width = 15 random_goal_place = False num_possible_states = (height * width)**(1 + 1 * random_goal_place) embedding_dimensions = [[num_possible_states, 20]] print("Num possible states ", num_possible_states) config.environment = Four_Rooms_Environment( height, width, stochastic_actions_probability=0.0, random_start_user_place=True, random_goal_place=random_goal_place) config.num_episodes_to_run = 1000 config.file_to_save_data_results = "data_and_graphs/Four_Rooms.pkl" config.file_to_save_results_graph = "data_and_graphs/Four_Rooms.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "DQN_Agents": { "linear_hidden_units": [30, 10], "learning_rate": 0.01,
from environments.isc_environments.SimpleISC import SimpleISC from utilities.data_structures.Config import Config from agents.Trainer import Trainer from agents.actor_critic_agents import A2C, A3C, DDPG, DDPG_HER from gym.core import Wrapper from torch.cuda import is_available config = Config() config.environment = Wrapper(SimpleISC(mode="DISCRETE")) config.num_episodes_to_run = 5 config.file_to_save_data_results = "results/data_and_graphs/isc/IllinoisSolarCar_Results_Data.pkl" config.file_to_save_results_graph = "results/data_and_graphs/isc/IllinoisSolarCar_Results_Graph.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = False config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = is_available() config.overwrite_existing_results_file = True config.randomise_random_seed = False config.save_model = False config.seed = 0 config.debug_mode = True config.wandb_log = True config.hyperparameters = {
from environments.Atari_Environment import make_atari_game from models.DQN_agents.DDQN import DDQN from models.Trainer import Trainer from models.hierarchical_agents.HRL.HRL import HRL from utilities.data_structures.Config import Config config = Config() config.seed = 1 config.environment = make_atari_game("SpaceInvaders-v0") config.env_parameters = {} config.num_episodes_to_run = 500 config.file_to_save_data_results = "data_and_graphs/hrl_experiments/Space_Invaders_Data.pkl" config.file_to_save_results_graph = "data_and_graphs/hrl_experiments/Space_Invaders.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 10 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False # Loss is not drawing a random sample! otherwise wouldnt jump around that much!! linear_hidden_units = [32, 32] learning_rate = 0.005 # 0.001 taxi buffer_size = 1000000 batch_size = 256 batch_norm = False embedding_dimensionality = 10
from utilities.data_structures.Config import Config from agents.Trainer import Trainer from datetime import datetime import os now = datetime.now() # current date and time date_str = now.strftime("%Y-%m-%d_%H-%M-%S") os.mkdir('Data_and_Graphs/results_' + date_str) path = 'Data_and_Graphs/results_' + date_str + '/' config = Config() config.seed = 1 config.environment = j2n6s300_Environment(proxyID='Env1') config.num_episodes_to_run = 1 config.file_to_save_config = path + "config.json" config.file_to_save_data_results = path + "jaco_DDPG-HER.pkl" config.file_to_save_results_graph = path + "jaco_DDPG-HER.png" config.show_solution_score = False config.visualise_results_while_training = True config.visualise_individual_results = True config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = True config.overwrite_existing_results_file = False config.randomise_random_seed = True config.load_model = False config.load_model_path = "Models/model.pt" config.save_model = True config.save_model_path = "Models/{}model.pt".format( now.strftime("%Y-%m-%d_%H-%M-%S_"))
import gym from agents.actor_critic_agents.DDPG import DDPG from agents.actor_critic_agents.SAC import SAC from agents.actor_critic_agents.TD3 import TD3 from agents.hierarchical_agents.DIAYN import DIAYN from agents.policy_gradient_agents.PPO import PPO from agents.Trainer import Trainer from utilities.data_structures.Config import Config config = Config() config.seed = 1 config.environment = gym.make("Walker2d-v2") config.num_episodes_to_run = 400 config.file_to_save_data_results = "data_and_graphs/Walker_Results_Data.pkl" config.file_to_save_results_graph = "data_and_graphs/Walker_Results_Graph.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False actor_critic_agent_hyperparameters = { "Actor": { "learning_rate": 0.0003, "linear_hidden_units": [64, 64], "final_layer_activation": None,
print(symbol) gym_anytrading.register_new_kzz(symbol) config.environment = gym.make('kzz-v1') # config.environment.update_df() # column_list = ['turn', 'pctChg'] # column_list = ['turn', 'pctChg'] column_list = ["test2"] # column_list = ["turn", "pctChg", "peTTM", "psTTM", "pcfNcfTTM", "pbMRQ"] column_list_str = "_".join(column_list) # config.environment.update_df(fn=None, column_list=column_list) config.environment.update_df(fn=None, column_list=None) # config.environment.update_df(fn=lambda df:df.head(100), column_list=column_list) # config.environment = gym.make("CartPole-v0") config.num_episodes_to_run = 50 # config.num_episodes_to_run = 450 config.file_to_save_data_results = "results/data_and_graphs/stocks_Results_Data.pkl" config.file_to_save_results_graph = "results/data_and_graphs/stocks_Results_Graph.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.model_path = r'drive/My Drive/l_gym/Models/%s' % column_list_str config.save_model = False config.load_model = True config.run_test = True config.run_test_path = r"drive/My Drive/l_gym/data_and_graphs/%s/%s/{}_run_test.png" % ( symbol, column_list_str)