def __init__(self, config): Base_Agent.__init__(self, config) self.max_sub_policy_timesteps = config.hyperparameters["LOWER_LEVEL"][ "max_lower_level_timesteps"] self.config.hyperparameters = Trainer.add_default_hyperparameters_if_not_overriden( {"OPEN": self.config.hyperparameters}) self.config.hyperparameters = self.config.hyperparameters["OPEN"] self.higher_level_state = None #true state of environment self.higher_level_next_state = None self.higher_level_reward = None self.lower_level_reward = None self.higher_level_done = False self.lower_level_done = False self.goal = None self.lower_level_state = None #state of environment with goal appended self.lower_level_next_state = None self.lower_level_agent_config = copy.deepcopy(config) self.lower_level_agent_config.hyperparameters = self.lower_level_agent_config.hyperparameters[ "LOWER_LEVEL"] self.lower_level_agent_config.environment = Lower_Level_Agent_Environment_Wrapper( self.environment, self, self.max_sub_policy_timesteps) self.lower_level_agent = DDPG(self.lower_level_agent_config) self.lower_level_agent.average_score_required_to_win = float("inf") print("LOWER LEVEL actor {} to {}".format( self.lower_level_agent.actor_local.input_dim, self.lower_level_agent.actor_local.output_dim)) self.higher_level_agent_config = copy.deepcopy(config) self.higher_level_agent_config.hyperparameters = self.higher_level_agent_config.hyperparameters[ "HIGHER_LEVEL"] self.higher_level_agent_config.environment = Higher_Level_Agent_Environment_Wrapper( self.environment, self) self.higher_level_agent = HIRO_Higher_Level_DDPG_Agent( self.higher_level_agent_config, self.lower_level_agent.actor_local) print("HIGHER LEVEL actor {} to {}".format( self.higher_level_agent.actor_local.input_dim, self.higher_level_agent.actor_local.output_dim)) self.step_lower_level_states = [] self.step_lower_level_action_seen = []
"tau": 0.1, "alpha_prioritised_replay": 0.6, "beta_prioritised_replay": 0.4, "incremental_td_error": 1e-8, "update_every_n_steps": 3, "linear_hidden_units": [20, 20, 20], "final_layer_activation": "None", "batch_norm": False, "gradient_clipping_norm": 5, "HER_sample_proportion": 0.8 } } trainer = Trainer(config, [DQN_HER]) config.hyperparameters = trainer.add_default_hyperparameters_if_not_overriden(config.hyperparameters) config.hyperparameters = config.hyperparameters["DQN_Agents"] agent = DQN_HER(config) agent.reset_game() def test_initiation(): """Tests whether DQN_HER initiates correctly""" config.hyperparameters["batch_size"] = 64 agent = DQN_HER(config) agent.reset_game() assert agent.ordinary_buffer_batch_size == int(0.2 * 64) assert agent.HER_buffer_batch_size == 64 - int(0.2 * 64) assert agent.q_network_local.input_dim == 8