Exemplo n.º 1
0
import gym

from drl.agents.hierarchical_agents.HRL.HRL import HRL
from drl.agents.Trainer import Trainer
from drl.utilities.data_structures.Config import Config

config = Config()
config.environment = gym.make("Taxi-v2")
config.seed = 1
config.env_parameters = {}
config.num_episodes_to_run = 2000
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

linear_hidden_units = [32, 32]
learning_rate = 0.01
buffer_size = 100000
batch_size = 256
batch_norm = False
embedding_dimensionality = 10
gradient_clipping_norm = 5
update_every_n_steps = 1
Exemplo n.º 2
0
from environments.Four_Rooms_Environment import Four_Rooms_Environment
from drl.agents.hierarchical_agents.SNN_HRL import SNN_HRL
from drl.agents.actor_critic_agents.TD3 import TD3
from drl.agents.Trainer import Trainer
from drl.utilities.data_structures.Config import Config
from drl.agents.DQN_agents.DQN import DQN
import numpy as np
import torch

random.seed(1)
np.random.seed(1)
torch.manual_seed(1)

config = Config()
config.seed = 1
config.environment = Bit_Flipping_Environment(4)
config.num_episodes_to_run = 2000
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.visualise_individual_results = False
config.visualise_overall_agent_results = False
config.randomise_random_seed = False
config.runs_per_agent = 1
config.use_GPU = False
config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.005,
        "batch_size": 64,
        "buffer_size": 40000,
        "epsilon": 0.1,
        "epsilon_decay_rate_denominator": 200,
Exemplo n.º 3
0
from drl.agents.actor_critic_agents.A2C import A2C
from drl.agents.DQN_agents.Dueling_DDQN import Dueling_DDQN
from drl.agents.actor_critic_agents.SAC_Discrete import SAC_Discrete
from drl.agents.actor_critic_agents.A3C import A3C
from drl.agents.policy_gradient_agents.PPO import PPO
from drl.agents.Trainer import Trainer
from drl.utilities.data_structures.Config import Config
from drl.agents.DQN_agents.DDQN import DDQN
from drl.agents.DQN_agents.DDQN_With_Prioritised_Experience_Replay import DDQN_With_Prioritised_Experience_Replay
from drl.agents.DQN_agents.DQN import DQN
from drl.agents.DQN_agents.DQN_With_Fixed_Q_Targets import DQN_With_Fixed_Q_Targets

config = Config()
config.seed = 1
config.environment = gym.make("CartPole-v0")
config.num_episodes_to_run = 450
config.file_to_save_data_results = "results/data_and_graphs/Cart_Pole_Results_Data.pkl"
config.file_to_save_results_graph = "results/data_and_graphs/Cart_Pole_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = True
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

config.hyperparameters = {
    "DQN_Agents": {
Exemplo n.º 4
0
import gym
from environments.Atari_Environment import make_atari_game
from drl.agents.DQN_agents.DDQN import DDQN
from drl.agents.hierarchical_agents.HRL.HRL import HRL
from drl.agents.hierarchical_agents.HRL.Model_HRL import Model_HRL
from drl.agents.Trainer import Trainer
from drl.utilities.data_structures.Config import Config

config = Config()
config.seed = 1
config.environment = make_atari_game("SpaceInvaders-v0")
config.env_parameters = {}
config.num_episodes_to_run = 500
config.file_to_save_data_results = "data_and_graphs/hrl_experiments/Space_Invaders_Data.pkl"
config.file_to_save_results_graph = "data_and_graphs/hrl_experiments/Space_Invaders.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 10
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False


# Loss is not drawing a random sample! otherwise wouldnt jump around that much!!

linear_hidden_units = [32, 32]
learning_rate = 0.005  # 0.001 taxi
buffer_size = 1000000
Exemplo n.º 5
0
import os
import sys
from os.path import dirname, abspath
sys.path.append(dirname(dirname(abspath(__file__))))
import gym
from drl.agents.Trainer import Trainer
from drl.agents.actor_critic_agents.DDPG import DDPG
from drl.agents.hierarchical_agents.HIRO import HIRO
from drl.utilities.data_structures.Config import Config
config = Config()
config.seed = 1
config.environment = gym.make("Reacher-v2") #  Reacher-v2 "InvertedPendulum-v2") #Pendulum-v0
config.num_episodes_to_run = 1500
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False




config.hyperparameters = {
    "HIRO": {
Exemplo n.º 6
0
import gym
from drl.agents.policy_gradient_agents.PPO import PPO
from drl.agents.actor_critic_agents.DDPG import DDPG
from drl.agents.actor_critic_agents.SAC import SAC
from drl.agents.actor_critic_agents.TD3 import TD3
from drl.agents.Trainer import Trainer
from drl.agents.hierarchical_agents.DIAYN import DIAYN
from drl.utilities.data_structures.Config import Config

config = Config()
config.seed = 1
config.environment = gym.make("Walker2d-v2")
config.num_episodes_to_run = 400
config.file_to_save_data_results = "data_and_graphs/Walker_Results_Data.pkl"
config.file_to_save_results_graph = "data_and_graphs/Walker_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

actor_critic_agent_hyperparameters = {
    "Actor": {
        "learning_rate": 0.0003,
        "linear_hidden_units": [64, 64],
        "final_layer_activation": None,
        "batch_norm": False,
import gym

from drl.agents.actor_critic_agents.DDPG import DDPG
from drl.agents.actor_critic_agents.DDPG_HER import DDPG_HER
from drl.utilities.data_structures.Config import Config
from drl.agents.Trainer import Trainer

config = Config()
config.seed = 1
config.environment = gym.make("FetchReach-v1")
config.num_episodes_to_run = 1000
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

config.hyperparameters = {
    "Actor_Critic_Agents": {
        "Actor": {
            "learning_rate": 0.001,
            "linear_hidden_units": [50, 50],
            "final_layer_activation": "TANH",
            "batch_norm": False,
            "tau": 0.01,
Exemplo n.º 8
0
import gym
from drl.agents.policy_gradient_agents.PPO import PPO
from drl.agents.actor_critic_agents.DDPG import DDPG
from drl.agents.actor_critic_agents.SAC import SAC
from drl.agents.actor_critic_agents.TD3 import TD3
from drl.agents.Trainer import Trainer
from drl.agents.hierarchical_agents.DIAYN import DIAYN
from drl.utilities.data_structures.Config import Config

config = Config()
config.seed = 1
config.environment = gym.make("Hopper-v2")
config.num_episodes_to_run = 1000
config.file_to_save_data_results = "data_and_graphs/Hopper_Results_Data.pkl"
config.file_to_save_results_graph = "data_and_graphs/Hopper_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

actor_critic_agent_hyperparameters = {
    "Actor": {
        "learning_rate": 0.0003,
        "linear_hidden_units": [64, 64],
        "final_layer_activation": None,
        "batch_norm": False,
Exemplo n.º 9
0
from drl.agents.hierarchical_agents.SNN_HRL import SNN_HRL
from drl.agents.Trainer import Trainer
from drl.utilities.data_structures.Config import Config
from drl.agents.DQN_agents.DQN import DQN
from drl.agents.hierarchical_agents.h_DQN import h_DQN
from environments.Long_Corridor_Environment import Long_Corridor_Environment

config = Config()
config.seed = 1
config.env_parameters = {"stochasticity_of_action_right": 0.5}
config.environment = Long_Corridor_Environment(stochasticity_of_action_right=config.env_parameters["stochasticity_of_action_right"])
config.num_episodes_to_run = 10000
config.file_to_save_data_results = "Data_and_Graphs/Long_Corridor_Results_Data.pkl"
config.file_to_save_results_graph = "Data_and_Graphs/Long_Corridor_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

config.hyperparameters = {

    "h_DQN": {
        "CONTROLLER": {
            "batch_size": 256,
            "learning_rate": 0.01,
            "buffer_size": 40000,
from drl.agents.Trainer import Trainer
from drl.utilities.data_structures.Config import Config

config = Config()
config.seed = 1

height = 15
width = 15
random_goal_place = False
num_possible_states = (height * width)**(1 + 1 * random_goal_place)
embedding_dimensions = [[num_possible_states, 20]]
print("Num possible states ", num_possible_states)

config.environment = Four_Rooms_Environment(
    height,
    width,
    stochastic_actions_probability=0.0,
    random_start_user_place=True,
    random_goal_place=random_goal_place)

config.num_episodes_to_run = 1000
config.file_to_save_data_results = "Data_and_Graphs/Four_Rooms.pkl"
config.file_to_save_results_graph = "Data_and_Graphs/Four_Rooms.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False
Exemplo n.º 11
0
import gym

from drl.agents.policy_gradient_agents.PPO import PPO
from drl.agents.actor_critic_agents.DDPG import DDPG
from drl.agents.actor_critic_agents.SAC import SAC
from drl.agents.actor_critic_agents.TD3 import TD3
from drl.agents.Trainer import Trainer
from drl.utilities.data_structures.Config import Config

config = Config()
config.seed = 1
config.environment = gym.make("MountainCarContinuous-v0")
config.num_episodes_to_run = 450
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

config.hyperparameters = {
    "Policy_Gradient_Agents": {
        "learning_rate": 0.05,
        "linear_hidden_units": [30, 15],
        "final_layer_activation": "TANH",
        "learning_iterations_per_round": 10,