Пример #1
0
    def __init__(self, config):
        Base_Agent.__init__(self, config)

        self.max_sub_policy_timesteps = config.hyperparameters["LOWER_LEVEL"][
            "max_lower_level_timesteps"]
        self.config.hyperparameters = Trainer.add_default_hyperparameters_if_not_overriden(
            {"OPEN": self.config.hyperparameters})
        self.config.hyperparameters = self.config.hyperparameters["OPEN"]

        self.higher_level_state = None  #true state of environment
        self.higher_level_next_state = None

        self.higher_level_reward = None
        self.lower_level_reward = None

        self.higher_level_done = False
        self.lower_level_done = False

        self.goal = None

        self.lower_level_state = None  #state of environment with goal appended
        self.lower_level_next_state = None

        self.lower_level_agent_config = copy.deepcopy(config)
        self.lower_level_agent_config.hyperparameters = self.lower_level_agent_config.hyperparameters[
            "LOWER_LEVEL"]

        self.lower_level_agent_config.environment = Lower_Level_Agent_Environment_Wrapper(
            self.environment, self, self.max_sub_policy_timesteps)
        self.lower_level_agent = DDPG(self.lower_level_agent_config)

        self.lower_level_agent.average_score_required_to_win = float("inf")

        print("LOWER LEVEL actor {} to {}".format(
            self.lower_level_agent.actor_local.input_dim,
            self.lower_level_agent.actor_local.output_dim))

        self.higher_level_agent_config = copy.deepcopy(config)
        self.higher_level_agent_config.hyperparameters = self.higher_level_agent_config.hyperparameters[
            "HIGHER_LEVEL"]
        self.higher_level_agent_config.environment = Higher_Level_Agent_Environment_Wrapper(
            self.environment, self)
        self.higher_level_agent = HIRO_Higher_Level_DDPG_Agent(
            self.higher_level_agent_config, self.lower_level_agent.actor_local)

        print("HIGHER LEVEL actor {} to {}".format(
            self.higher_level_agent.actor_local.input_dim,
            self.higher_level_agent.actor_local.output_dim))

        self.step_lower_level_states = []
        self.step_lower_level_action_seen = []
        "tau": 0.1,
        "alpha_prioritised_replay": 0.6,
        "beta_prioritised_replay": 0.4,
        "incremental_td_error": 1e-8,
        "update_every_n_steps": 3,
        "linear_hidden_units": [20, 20, 20],
        "final_layer_activation": "None",
        "batch_norm": False,
        "gradient_clipping_norm": 5,
        "HER_sample_proportion": 0.8
}
}


trainer = Trainer(config, [DQN_HER])
config.hyperparameters = trainer.add_default_hyperparameters_if_not_overriden(config.hyperparameters)
config.hyperparameters = config.hyperparameters["DQN_Agents"]
agent = DQN_HER(config)
agent.reset_game()

def test_initiation():
    """Tests whether DQN_HER initiates correctly"""
    config.hyperparameters["batch_size"] = 64
    agent = DQN_HER(config)
    agent.reset_game()


    assert agent.ordinary_buffer_batch_size == int(0.2 * 64)
    assert agent.HER_buffer_batch_size == 64 - int(0.2 * 64)

    assert agent.q_network_local.input_dim == 8