def _create_graph(self, task_parameters: TaskParameters) -> Tuple[List[LevelManager], List[Environment]]: env = short_dynamic_import(self.env_params.path)(**self.env_params.__dict__, visualization_parameters=self.visualization_parameters) for agent_params in self.agents_params: agent_params.task_parameters = task_parameters # we need to build the hierarchy in reverse order (from the bottom up) in order for the spaces of each level # to be known level_managers = [] current_env = env # out_action_space = env.action_space for level_idx, agent_params in reversed(list(enumerate(self.agents_params))): agent_params.name = "agent_{}".format(level_idx) agent_params.is_a_highest_level_agent = level_idx == 0 agent_params.is_a_lowest_level_agent = level_idx == len(self.agents_params) - 1 agent = short_dynamic_import(agent_params.path)(agent_params) level_manager = LevelManager( agents=agent, environment=current_env, real_environment=env, steps_limit=EnvironmentSteps(1) if level_idx == 0 else self.consecutive_steps_to_run_non_top_levels, should_reset_agent_state_after_time_limit_passes=level_idx > 0, name="level_{}".format(level_idx) ) current_env = level_manager level_managers.insert(0, level_manager) return level_managers, [env]
def _create_graph(self, task_parameters: TaskParameters) -> Tuple[List[LevelManager], List[Environment]]: if self.env_params: # environment loading self.env_params.seed = task_parameters.seed self.env_params.experiment_path = task_parameters.experiment_path env = short_dynamic_import(self.env_params.path)(**self.env_params.__dict__, visualization_parameters=self.visualization_parameters) else: env = None # Only DQN variants and NEC are supported at this point. assert(isinstance(self.agent_params, DQNAgentParameters) or isinstance(self.agent_params, NECAgentParameters)) # Only Episodic memories are supported, # for evaluating the sequential doubly robust estimator assert(isinstance(self.agent_params.memory, EpisodicExperienceReplayParameters)) # agent loading self.agent_params.task_parameters = task_parameters # TODO: this should probably be passed in a different way self.agent_params.name = "agent" self.agent_params.is_batch_rl_training = True self.agent_params.network_wrappers['main'].should_get_softmax_probabilities = True if 'reward_model' not in self.agent_params.network_wrappers: # user hasn't defined params for the reward model. we will use the same params as used for the 'main' # network. self.agent_params.network_wrappers['reward_model'] = deepcopy(self.agent_params.network_wrappers['main']) self.agent = short_dynamic_import(self.agent_params.path)(self.agent_params) agents = {'agent': self.agent} if not self.is_collecting_random_dataset: self.experience_generating_agent_params.visualization.dump_csv = False self.experience_generating_agent_params.task_parameters = task_parameters self.experience_generating_agent_params.name = "experience_gen_agent" self.experience_generating_agent_params.network_wrappers['main'].should_get_softmax_probabilities = True # we need to set these manually as these are usually being set for us only for the default agent self.experience_generating_agent_params.input_filter = self.agent_params.input_filter self.experience_generating_agent_params.output_filter = self.agent_params.output_filter self.experience_generating_agent = short_dynamic_import( self.experience_generating_agent_params.path)(self.experience_generating_agent_params) agents['experience_generating_agent'] = self.experience_generating_agent if not env and not self.agent_params.memory.load_memory_from_file_path: screen.warning("A BatchRLGraph requires setting a dataset to load into the agent's memory or alternatively " "using an environment to create a (random) dataset from. This agent should only be used for " "inference. ") # set level manager # - although we will be using each agent separately, we have to have both agents initialized together with the # LevelManager, so to have them both properly initialized level_manager = LevelManager(agents=agents, environment=env, name="main_level", spaces_definition=self.spaces_definition) if env: return [level_manager], [env] else: return [level_manager], []
def _create_graph( self, task_parameters: TaskParameters ) -> Tuple[List[LevelManager], List[Environment]]: if self.env_params: # environment loading self.env_params.seed = task_parameters.seed self.env_params.experiment_path = task_parameters.experiment_path env = short_dynamic_import(self.env_params.path)( **self.env_params.__dict__, visualization_parameters=self.visualization_parameters) else: env = None # Only DQN variants and NEC are supported at this point. assert (isinstance(self.agent_params, DQNAgentParameters) or isinstance(self.agent_params, NECAgentParameters)) # Only Episodic memories are supported, # for evaluating the sequential doubly robust estimator assert (isinstance(self.agent_params.memory, EpisodicExperienceReplayParameters)) # agent loading self.agent_params.task_parameters = task_parameters # TODO: this should probably be passed in a different way self.agent_params.name = "agent" self.agent_params.is_batch_rl_training = True if 'reward_model' not in self.agent_params.network_wrappers: # user hasn't defined params for the reward model. we will use the same params as used for the 'main' # network. self.agent_params.network_wrappers['reward_model'] = deepcopy( self.agent_params.network_wrappers['main']) agent = short_dynamic_import(self.agent_params.path)(self.agent_params) if not env and not self.agent_params.memory.load_memory_from_file_path: screen.warning( "A BatchRLGraph requires setting a dataset to load into the agent's memory or alternatively " "using an environment to create a (random) dataset from. This agent should only be used for " "inference. ") # set level manager level_manager = LevelManager(agents=agent, environment=env, name="main_level", spaces_definition=self.spaces_definition) if env: return [level_manager], [env] else: return [level_manager], []
def _create_graph( self, task_parameters: TaskParameters ) -> Tuple[List[LevelManager], List[Environment]]: self.env_params.seed = task_parameters.seed env = short_dynamic_import(self.env_params.path)( **self.env_params.__dict__, visualization_parameters=self.visualization_parameters) for agent_params in self.agents_params: agent_params.task_parameters = task_parameters # we need to build the hierarchy in reverse order (from the bottom up) in order for the spaces of each level # to be known level_managers = [] current_env = env # out_action_space = env.action_space for level_idx, agent_params in reversed( list(enumerate(self.agents_params))): # TODO: the code below is specific for HRL on observation scale # in action space # if level_idx == 0: # # top level agents do not get directives # in_action_space = None # else: # pass # attention_size = (env.state_space['observation'].shape - 1)//4 # in_action_space = AttentionActionSpace(shape=2, low=0, high=env.state_space['observation'].shape - 1, # forced_attention_size=attention_size) # agent_params.output_filter.action_filters['masking'].set_masking(0, attention_size) agent_params.name = "agent_{}".format(level_idx) agent_params.is_a_highest_level_agent = level_idx == 0 agent = short_dynamic_import(agent_params.path)(agent_params) level_manager = LevelManager( agents=agent, environment=current_env, real_environment=env, steps_limit=self. consecutive_steps_to_run_each_level[level_idx], should_reset_agent_state_after_time_limit_passes=level_idx > 0, name="level_{}".format(level_idx)) current_env = level_manager level_managers.insert(0, level_manager) # out_action_space = in_action_space return level_managers, [env]
def _create_graph(self, task_parameters: TaskParameters) -> Tuple[List[LevelManager], List[Environment]]: # environment loading self.env_params.seed = task_parameters.seed env = short_dynamic_import(self.env_params.path)(**self.env_params.__dict__, visualization_parameters=self.visualization_parameters) # agent loading self.agent_params.task_parameters = task_parameters # TODO: this should probably be passed in a different way self.agent_params.name = "agent" agent = short_dynamic_import(self.agent_params.path)(self.agent_params) # set level manager level_manager = LevelManager(agents=agent, environment=env, name="main_level") return [level_manager], [env]