示例#1
0
def test_get_mean_and_standard_deviation_difference_results():
    """Tests that get_mean_and_standard_deviation_difference_results method produces correct output"""
    results = [[1.0, 2.0, 3.0], [5.0, -33.0, 55.0], [2.5, 2.5, 2.5]]
    mean_results = [
        np.mean([1.0, 5.0, 2.5]),
        np.mean([2.0, -33.0, 2.5]),
        np.mean([3.0, 55.0, 2.5])
    ]
    std_results = [
        np.std([1.0, 5.0, 2.5]),
        np.std([2.0, -33.0, 2.5]),
        np.std([3.0, 55.0, 2.5])
    ]
    mean_minus_1_std = [
        mean - std_val for mean, std_val in zip(mean_results, std_results)
    ]
    mean_plus_1_std = [
        mean + std_val for mean, std_val in zip(mean_results, std_results)
    ]
    config = Config()
    config.standard_deviation_results = 1.0
    trainer = Trainer(config, [])
    mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results(
        results)
    assert mean_results == mean_results_guess
    assert mean_minus_1_std == mean_minus_x_std_guess
    assert mean_plus_1_std == mean_plus_x_std_guess

    config.standard_deviation_results = 3.0
    trainer = Trainer(config, [])
    mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results(
        results)
    mean_plus_3_std = [
        mean + 3.0 * std_val
        for mean, std_val in zip(mean_results, std_results)
    ]
    mean_minus_3_std = [
        mean - 3.0 * std_val
        for mean, std_val in zip(mean_results, std_results)
    ]
    assert mean_results == mean_results_guess
    assert mean_minus_3_std == mean_minus_x_std_guess
    assert mean_plus_3_std == mean_plus_x_std_guess
示例#2
0
def test_add_default_hyperparameters_if_not_overriden():
    """Tests that add_default_hyperparameters_if_not_overriden function works"""
    config = Config()
    default_hyperparameter_set = {'output_activation': 'None', 'hidden_activations': 'relu', 'dropout': 0.0, 'initialiser': 'default',
     'batch_norm': False, 'columns_of_data_to_be_embedded': [], 'embedding_dimensions': [], 'y_range': (),
     }
    alternative_hyperparmater_set = {'output_activation': "YESSS!!", 'hidden_activations': 'relu', 'dropout': 0.0, 'initialiser': 'default',
     'batch_norm': False, 'columns_of_data_to_be_embedded': [], 'embedding_dimensions': [], 'y_range': (),
     "helo": 20}

    config.hyperparameters = {"DQN_Agents": {}}
    config.hyperparameters = Trainer(config, []).add_default_hyperparameters_if_not_overriden(config.hyperparameters)
    assert config.hyperparameters == {"DQN_Agents": default_hyperparameter_set}

    config.hyperparameters = {"DQN_Agents": {}, "Test": {}}
    config.hyperparameters = Trainer(config, []).add_default_hyperparameters_if_not_overriden(config.hyperparameters)
    assert config.hyperparameters == {"DQN_Agents": default_hyperparameter_set, "Test": default_hyperparameter_set}

    config.hyperparameters = {"DQN_Agents": {"helo": 20,  "output_activation": "YESSS!!"}}
    config.hyperparameters = Trainer(config, []).add_default_hyperparameters_if_not_overriden(config.hyperparameters)
    assert config.hyperparameters == {"DQN_Agents": alternative_hyperparmater_set}
"""Tests for the hierarchical RL agent HIRO"""
import copy

import gym
import random
import numpy as np
import torch

from Hierarchical_Agents.HIRO import HIRO
from Utilities.Data_Structures.Config import Config

random.seed(1)
np.random.seed(1)
torch.manual_seed(1)

config = Config()
config.seed = 1
config.environment = gym.make("Pendulum-v0")
config.num_episodes_to_run = 1500
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False
from Hierarchical_Agents.SNN_HRL import SNN_HRL
from Agents.Trainer import Trainer
from Utilities.Data_Structures.Config import Config
from Agents.DQN_Agents.DQN import DQN
from Agents.Hierarchical_Agents.h_DQN import h_DQN
from Environments.Long_Corridor_Environment import Long_Corridor_Environment

config = Config()
config.seed = 1
config.env_parameters = {"stochasticity_of_action_right": 0.5}
config.environment = Long_Corridor_Environment(
    stochasticity_of_action_right=config.
    env_parameters["stochasticity_of_action_right"])
config.num_episodes_to_run = 10000
config.file_to_save_data_results = "Data_and_Graphs/Long_Corridor_Results_Data.pkl"
config.file_to_save_results_graph = "Data_and_Graphs/Long_Corridor_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

config.hyperparameters = {
    "h_DQN": {
        "CONTROLLER": {
            "batch_size":
            256,
示例#5
0
from Agents.DQN_Agents.DQN_HER_Agent import DQN_HER_Agent
from Environments.Other_Enrivonments.Bit_Flipping_Environment import Bit_Flipping_Environment
from Utilities.Data_Structures.Config import Config
from Agents.DQN_Agents.DQN_Agent import DQN_Agent
from Utilities.Utility_Functions import run_games_for_agents

config = Config()
config.seed = 100
config.environment = Bit_Flipping_Environment(14)
config.max_episodes_to_run = 6000
config.file_to_save_data_results = "Results_Data.pkl"
config.file_to_save_data_results_graph = "Results_Graph.png"
config.visualise_individual_results = True
config.visualise_overall_results = True
config.runs_per_agent = 3
config.use_GPU = False

config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.001,
        "batch_size": 128,
        "buffer_size": 100000,
        "epsilon": 0.1,
        "epsilon_decay_rate_denominator": 500,
        "discount_rate": 0.98,
        "tau": 0.1,
        "alpha_prioritised_replay": 0.6,
        "beta_prioritised_replay": 0.4,
        "incremental_td_error": 1e-8,
        "update_every_n_steps": 1,
        "nn_layers": 2,
示例#6
0
import gym

from A3C import A3C
from Agents.Policy_Gradient_Agents.PPO import PPO
from Agents.Trainer import Trainer
from Utilities.Data_Structures.Config import Config
from Agents.DQN_Agents.DDQN import DDQN
from Agents.DQN_Agents.DDQN_With_Prioritised_Experience_Replay import DDQN_With_Prioritised_Experience_Replay
from Agents.DQN_Agents.DQN import DQN
from Agents.DQN_Agents.DQN_With_Fixed_Q_Targets import DQN_With_Fixed_Q_Targets

config = Config()
config.seed = 1
config.environment = gym.make("CartPole-v0")
config.num_episodes_to_run = 1500
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.005,
        "batch_size": 128,
from Agents.DQN_Agents.DDQN import DDQN
from Agents.DQN_Agents.DDQN_With_Prioritised_Experience_Replay import DDQN_With_Prioritised_Experience_Replay
from Agents.DQN_Agents.DQN_With_Fixed_Q_Targets import DQN_With_Fixed_Q_Targets
from Environments.Bit_Flipping_Environment import Bit_Flipping_Environment
from Agents.Policy_Gradient_Agents.PPO import PPO
from Trainer import Trainer
from Utilities.Data_Structures.Config import Config
from Agents.DQN_Agents.DQN import DQN
import numpy as np
import torch

random.seed(1)
np.random.seed(1)
torch.manual_seed(1)

config = Config()
config.seed = 1
config.environment = Bit_Flipping_Environment(4)
config.num_episodes_to_run = 1
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.visualise_individual_results = False
config.visualise_overall_agent_results = False
config.randomise_random_seed = False
config.runs_per_agent = 1
config.use_GPU = False
config.hyperparameters = {

    "DQN_Agents": {

        "learning_rate": 0.005,
示例#8
0
from Environments.Bit_Flipping_Environment import Bit_Flipping_Environment
from Agents.Policy_Gradient_Agents.PPO import PPO
from Four_Rooms_Environment import Four_Rooms_Environment
from Hierarchical_Agents.SNN_HRL import SNN_HRL
from TD3 import TD3
from Trainer import Trainer
from Utilities.Data_Structures.Config import Config
from Agents.DQN_Agents.DQN import DQN
import numpy as np
import torch

random.seed(1)
np.random.seed(1)
torch.manual_seed(1)

config = Config()
config.seed = 1
config.environment = Bit_Flipping_Environment(4)
config.num_episodes_to_run = 2000
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.visualise_individual_results = False
config.visualise_overall_agent_results = False
config.randomise_random_seed = False
config.runs_per_agent = 1
config.use_GPU = False
config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.005,
        "batch_size": 64,
        "buffer_size": 40000,
示例#9
0
import gym

from Agents.Policy_Gradient_Agents.PPO import PPO
from Agents.Actor_Critic_Agents.DDPG import DDPG
from TD3 import TD3
from Agents.Trainer import Trainer
from Utilities.Data_Structures.Config import Config


config = Config()
config.seed = 1
config.environment = gym.make("MountainCarContinuous-v0")
config.num_episodes_to_run = 600
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False


config.hyperparameters = {
    "Policy_Gradient_Agents": {
            "learning_rate": 0.05,
            "linear_hidden_units": [30, 15],
            "final_layer_activation": "TANH",
示例#10
0
from A3C import A3C
from Agents.DQN_Agents.DQN_HER import DQN_HER
from DDQN import DDQN
from Environments.Four_Rooms_Environment import Four_Rooms_Environment
from Hierarchical_Agents.SNN_HRL import SNN_HRL
from Agents.Trainer import Trainer
from Utilities.Data_Structures.Config import Config
from Agents.DQN_Agents.DQN import DQN

config = Config()
config.seed = 1
config.environment = Four_Rooms_Environment(
    15,
    15,
    stochastic_actions_probability=0.25,
    random_start_user_place=True,
    random_goal_place=False)

config.num_episodes_to_run = 1000
config.file_to_save_data_results = "Data_and_Graphs/Four_Rooms.pkl"
config.file_to_save_results_graph = "Data_and_Graphs/Four_Rooms.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False
示例#11
0
from Agents.Trainer import Trainer
from DDPG import DDPG
from Hierarchical_Agents.HIRO import HIRO
from Utilities.Data_Structures.Config import Config
import gym

config = Config()
config.seed = 1
config.environment = gym.make("Hopper-v2")
config.num_episodes_to_run = 1500
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

DDPG_hyperparameters =  {  # hyperparameters taken from https://arxiv.org/pdf/1802.09477.pdf
        "Actor": {
            "learning_rate": 0.001,
            "linear_hidden_units": [30, 20],
            "final_layer_activation": "TANH",
            "batch_norm": False,
            "tau": 0.01,
            "gradient_clipping_norm": 5
        },
import gym

from Hierarchical_Agents.SNN_HRL import SNN_HRL
from Agents.Trainer import Trainer
from Utilities.Data_Structures.Config import Config
from Agents.DQN_Agents.DQN import DQN
from Agents.Hierarchical_Agents.h_DQN import h_DQN

config = Config()
config.seed = 1
config.environment = gym.make("Taxi-v2")
config.env_parameters = {}
config.num_episodes_to_run = 10000
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

config.hyperparameters = {
    "h_DQN": {
        "CONTROLLER": {
            "batch_size":
            256,
            "learning_rate":
示例#13
0
from Agents.Policy_Gradient_Agents.PPO_Agent import PPO_Agent
from Utilities.Data_Structures.Config import Config
from Agents.DQN_Agents.DDQN_Agent import DDQN_Agent
from Agents.DQN_Agents.DDQN_With_Prioritised_Experience_Replay import DDQN_With_Prioritised_Experience_Replay
from Agents.DQN_Agents.DQN_Agent import DQN_Agent
from Agents.DQN_Agents.DQN_Agent_With_Fixed_Q_Targets import DQN_Agent_With_Fixed_Q_Targets
from Environments.Open_AI_Gym_Environments.Cart_Pole_Environment import Cart_Pole_Environment
from Agents.Policy_Gradient_Agents.REINFORCE_Agent import REINFORCE_Agent
from Agents.Stochastic_Policy_Search_Agents.Genetic_Agent import Genetic_Agent
from Agents.Stochastic_Policy_Search_Agents.Hill_Climbing_Agent import Hill_Climbing_Agent
from Utilities.Utility_Functions import run_games_for_agents

config = Config()
config.seed = 1
config.environment = Cart_Pole_Environment()
config.max_episodes_to_run = 2000
config.file_to_save_data_results = "Results_Data.pkl"
config.file_to_save_data_results_graph = "Results_Graph.png"
config.visualise_individual_results = True
config.visualise_overall_results = True
config.runs_per_agent = 1
config.use_GPU = False

config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.005,
        "batch_size": 256,
        "buffer_size": 40000,
        "epsilon": 0.1,
        "epsilon_decay_rate_denominator": 200,
        "discount_rate": 0.99,
示例#14
0
from gym.wrappers import FlattenDictWrapper
from Agents.DQN_Agents.DQN_HER import DQN_HER
from Bit_Flipping_Environment import Bit_Flipping_Environment
from Agents.Trainer import Trainer
from Utilities.Data_Structures.Config import Config
from Agents.DQN_Agents.DQN import DQN

config = Config()
config.seed = 1
config.environment = Bit_Flipping_Environment(14)
config.num_episodes_to_run = 4500
config.file_to_save_data_results = None #"Data_and_Graphs/Bit_Flipping_Results_Data.pkl"
config.file_to_save_results_graph = None #"Data_and_Graphs/Bit_Flipping_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False


config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.001,
        "batch_size": 128,
        "buffer_size": 100000,
        "epsilon_decay_rate_denominator": 150,
        "discount_rate": 0.999,