Python _unpack_obs示例

编程语言: Python

命名空间/包名称: ray.rllib.models.model

方法/功能: _unpack_obs

hotexamples.com的示例: 4

Python _unpack_obs - 已找到4个示例。这些是从开源项目中提取的最受好评的ray.rllib.models.model._unpack_obs现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

    def _unpack_observation(self, obs_batch):
        """Unpacks the action mask / tuple obs from agent grouping.

        Returns:
            obs (Tensor): flattened obs tensor of shape [B, n_agents, obs_size]
            mask (Tensor): action mask, if any
        """
        unpacked = _unpack_obs(
            np.array(obs_batch),
            self.observation_space.original_space,
            tensorlib=np)
        if self.has_action_mask:
            obs = np.concatenate(
                [o["obs"] for o in unpacked],
                axis=1).reshape([len(obs_batch), self.n_agents, self.obs_size])
            action_mask = np.concatenate(
                [o["action_mask"] for o in unpacked], axis=1).reshape(
                    [len(obs_batch), self.n_agents, self.n_actions])
        else:
            obs = np.concatenate(
                unpacked,
                axis=1).reshape([len(obs_batch), self.n_agents, self.obs_size])
            action_mask = np.ones(
                [len(obs_batch), self.n_agents, self.n_actions])
        return obs, action_mask

示例#2

显示文件

文件： qmix_policy_graph.py 项目： robertnishihara/ray

    def _unpack_observation(self, obs_batch):
        """Unpacks the action mask / tuple obs from agent grouping.

        Returns:
            obs (Tensor): flattened obs tensor of shape [B, n_agents, obs_size]
            mask (Tensor): action mask, if any
        """
        unpacked = _unpack_obs(
            np.array(obs_batch),
            self.observation_space.original_space,
            tensorlib=np)
        if self.has_action_mask:
            obs = np.concatenate(
                [o["obs"] for o in unpacked],
                axis=1).reshape([len(obs_batch), self.n_agents, self.obs_size])
            action_mask = np.concatenate(
                [o["action_mask"] for o in unpacked], axis=1).reshape(
                    [len(obs_batch), self.n_agents, self.n_actions])
        else:
            obs = np.concatenate(
                unpacked,
                axis=1).reshape([len(obs_batch), self.n_agents, self.obs_size])
            action_mask = np.ones(
                [len(obs_batch), self.n_agents, self.n_actions])
        return obs, action_mask

示例#3

显示文件

文件： qmix_policy_graph.py 项目： wym42/ray

 def _unpack_observation(self, obs_batch):
     unpacked = _unpack_obs(np.array(obs_batch),
                            self.observation_space.original_space,
                            tensorlib=np)
     if self.has_action_mask:
         obs = np.concatenate([o["obs"] for o in unpacked], axis=1).reshape(
             [len(obs_batch), self.n_agents, self.obs_size])
         action_mask = np.concatenate([o["action_mask"] for o in unpacked],
                                      axis=1).reshape([
                                          len(obs_batch), self.n_agents,
                                          self.n_actions
                                      ])
     else:
         obs = np.concatenate(unpacked, axis=1).reshape(
             [len(obs_batch), self.n_agents, self.obs_size])
         action_mask = np.ones(
             [len(obs_batch), self.n_agents, self.n_actions])
     return obs, action_mask

示例#4

显示文件

    def _unpack_observation(self, obs_batch):
        """Unpacks the observation, action mask, and state (if present)
        from agent grouping.

        Returns:
            obs (np.ndarray): obs tensor of shape [B, n_agents, obs_size]
            mask (np.ndarray): action mask, if any
            state (np.ndarray or None): state tensor of shape [B, state_size]
                or None if it is not in the batch
        """
        unpacked = _unpack_obs(
            np.array(obs_batch, dtype=np.float32),
            self.observation_space.original_space,
            tensorlib=np)
        if self.has_action_mask:
            obs = np.concatenate(
                [o["obs"] for o in unpacked],
                axis=1).reshape([len(obs_batch), self.n_agents, self.obs_size])
            action_mask = np.concatenate(
                [o["action_mask"] for o in unpacked], axis=1).reshape(
                    [len(obs_batch), self.n_agents, self.n_actions])
        else:
            if isinstance(unpacked[0], dict):
                unpacked_obs = [u["obs"] for u in unpacked]
            else:
                unpacked_obs = unpacked
            obs = np.concatenate(
                unpacked_obs,
                axis=1).reshape([len(obs_batch), self.n_agents, self.obs_size])
            action_mask = np.ones(
                [len(obs_batch), self.n_agents, self.n_actions],
                dtype=np.float32)

        if self.has_env_global_state:
            state = unpacked[0][ENV_STATE]
        else:
            state = None
        return obs, action_mask, state