示例#1
0
    def _create_graph(self, task_parameters: TaskParameters) -> Tuple[List[LevelManager], List[Environment]]:
        env = short_dynamic_import(self.env_params.path)(**self.env_params.__dict__,
                                                         visualization_parameters=self.visualization_parameters)

        for agent_params in self.agents_params:
            agent_params.task_parameters = task_parameters

        # we need to build the hierarchy in reverse order (from the bottom up) in order for the spaces of each level
        # to be known
        level_managers = []
        current_env = env
        # out_action_space = env.action_space
        for level_idx, agent_params in reversed(list(enumerate(self.agents_params))):
            agent_params.name = "agent_{}".format(level_idx)
            agent_params.is_a_highest_level_agent = level_idx == 0
            agent_params.is_a_lowest_level_agent = level_idx == len(self.agents_params) - 1

            agent = short_dynamic_import(agent_params.path)(agent_params)

            level_manager = LevelManager(
                agents=agent,
                environment=current_env,
                real_environment=env,
                steps_limit=EnvironmentSteps(1) if level_idx == 0
                            else self.consecutive_steps_to_run_non_top_levels,
                should_reset_agent_state_after_time_limit_passes=level_idx > 0,
                name="level_{}".format(level_idx)
            )
            current_env = level_manager
            level_managers.insert(0, level_manager)

        return level_managers, [env]
示例#2
0
    def _create_graph(self, task_parameters: TaskParameters) -> Tuple[List[LevelManager], List[Environment]]:
        if self.env_params:
            # environment loading
            self.env_params.seed = task_parameters.seed
            self.env_params.experiment_path = task_parameters.experiment_path
            env = short_dynamic_import(self.env_params.path)(**self.env_params.__dict__,
                                                             visualization_parameters=self.visualization_parameters)
        else:
            env = None

        # Only DQN variants and NEC are supported at this point.
        assert(isinstance(self.agent_params, DQNAgentParameters) or isinstance(self.agent_params, NECAgentParameters))
        # Only Episodic memories are supported,
        # for evaluating the sequential doubly robust estimator
        assert(isinstance(self.agent_params.memory, EpisodicExperienceReplayParameters))

        # agent loading
        self.agent_params.task_parameters = task_parameters  # TODO: this should probably be passed in a different way
        self.agent_params.name = "agent"
        self.agent_params.is_batch_rl_training = True
        self.agent_params.network_wrappers['main'].should_get_softmax_probabilities = True

        if 'reward_model' not in self.agent_params.network_wrappers:
            # user hasn't defined params for the reward model. we will use the same params as used for the 'main'
            # network.
            self.agent_params.network_wrappers['reward_model'] = deepcopy(self.agent_params.network_wrappers['main'])

        self.agent = short_dynamic_import(self.agent_params.path)(self.agent_params)
        agents = {'agent': self.agent}

        if not self.is_collecting_random_dataset:
            self.experience_generating_agent_params.visualization.dump_csv = False
            self.experience_generating_agent_params.task_parameters = task_parameters
            self.experience_generating_agent_params.name = "experience_gen_agent"
            self.experience_generating_agent_params.network_wrappers['main'].should_get_softmax_probabilities = True

            # we need to set these manually as these are usually being set for us only for the default agent
            self.experience_generating_agent_params.input_filter = self.agent_params.input_filter
            self.experience_generating_agent_params.output_filter = self.agent_params.output_filter

            self.experience_generating_agent = short_dynamic_import(
                self.experience_generating_agent_params.path)(self.experience_generating_agent_params)

            agents['experience_generating_agent'] = self.experience_generating_agent

        if not env and not self.agent_params.memory.load_memory_from_file_path:
            screen.warning("A BatchRLGraph requires setting a dataset to load into the agent's memory or alternatively "
                           "using an environment to create a (random) dataset from. This agent should only be used for "
                           "inference. ")
        # set level manager
        # - although we will be using each agent separately, we have to have both agents initialized together with the
        #   LevelManager, so to have them both properly initialized
        level_manager = LevelManager(agents=agents,
                                     environment=env, name="main_level",
                                     spaces_definition=self.spaces_definition)
        if env:
            return [level_manager], [env]
        else:
            return [level_manager], []
示例#3
0
    def _create_graph(
        self, task_parameters: TaskParameters
    ) -> Tuple[List[LevelManager], List[Environment]]:
        if self.env_params:
            # environment loading
            self.env_params.seed = task_parameters.seed
            self.env_params.experiment_path = task_parameters.experiment_path
            env = short_dynamic_import(self.env_params.path)(
                **self.env_params.__dict__,
                visualization_parameters=self.visualization_parameters)
        else:
            env = None

        # Only DQN variants and NEC are supported at this point.
        assert (isinstance(self.agent_params, DQNAgentParameters)
                or isinstance(self.agent_params, NECAgentParameters))
        # Only Episodic memories are supported,
        # for evaluating the sequential doubly robust estimator
        assert (isinstance(self.agent_params.memory,
                           EpisodicExperienceReplayParameters))

        # agent loading
        self.agent_params.task_parameters = task_parameters  # TODO: this should probably be passed in a different way
        self.agent_params.name = "agent"
        self.agent_params.is_batch_rl_training = True

        if 'reward_model' not in self.agent_params.network_wrappers:
            # user hasn't defined params for the reward model. we will use the same params as used for the 'main'
            # network.
            self.agent_params.network_wrappers['reward_model'] = deepcopy(
                self.agent_params.network_wrappers['main'])

        agent = short_dynamic_import(self.agent_params.path)(self.agent_params)

        if not env and not self.agent_params.memory.load_memory_from_file_path:
            screen.warning(
                "A BatchRLGraph requires setting a dataset to load into the agent's memory or alternatively "
                "using an environment to create a (random) dataset from. This agent should only be used for "
                "inference. ")
        # set level manager
        level_manager = LevelManager(agents=agent,
                                     environment=env,
                                     name="main_level",
                                     spaces_definition=self.spaces_definition)

        if env:
            return [level_manager], [env]
        else:
            return [level_manager], []
    def _create_graph(
        self, task_parameters: TaskParameters
    ) -> Tuple[List[LevelManager], List[Environment]]:
        self.env_params.seed = task_parameters.seed
        env = short_dynamic_import(self.env_params.path)(
            **self.env_params.__dict__,
            visualization_parameters=self.visualization_parameters)

        for agent_params in self.agents_params:
            agent_params.task_parameters = task_parameters

        # we need to build the hierarchy in reverse order (from the bottom up) in order for the spaces of each level
        # to be known
        level_managers = []
        current_env = env
        # out_action_space = env.action_space
        for level_idx, agent_params in reversed(
                list(enumerate(self.agents_params))):
            # TODO: the code below is specific for HRL on observation scale
            # in action space
            # if level_idx == 0:
            #     # top level agents do not get directives
            #     in_action_space = None
            # else:
            #     pass

            # attention_size = (env.state_space['observation'].shape - 1)//4
            # in_action_space = AttentionActionSpace(shape=2, low=0, high=env.state_space['observation'].shape - 1,
            #                             forced_attention_size=attention_size)
            # agent_params.output_filter.action_filters['masking'].set_masking(0, attention_size)

            agent_params.name = "agent_{}".format(level_idx)
            agent_params.is_a_highest_level_agent = level_idx == 0
            agent = short_dynamic_import(agent_params.path)(agent_params)

            level_manager = LevelManager(
                agents=agent,
                environment=current_env,
                real_environment=env,
                steps_limit=self.
                consecutive_steps_to_run_each_level[level_idx],
                should_reset_agent_state_after_time_limit_passes=level_idx > 0,
                name="level_{}".format(level_idx))
            current_env = level_manager
            level_managers.insert(0, level_manager)

            # out_action_space = in_action_space

        return level_managers, [env]
示例#5
0
    def _create_graph(self, task_parameters: TaskParameters) -> Tuple[List[LevelManager], List[Environment]]:
        # environment loading
        self.env_params.seed = task_parameters.seed
        env = short_dynamic_import(self.env_params.path)(**self.env_params.__dict__,
                                                         visualization_parameters=self.visualization_parameters)

        # agent loading
        self.agent_params.task_parameters = task_parameters  # TODO: this should probably be passed in a different way
        self.agent_params.name = "agent"
        agent = short_dynamic_import(self.agent_params.path)(self.agent_params)

        # set level manager
        level_manager = LevelManager(agents=agent, environment=env, name="main_level")

        return [level_manager], [env]