Пример #1
0
    def step_async(self, actions):
        """Send the calls to :obj:`step` to each sub-environment.

        Parameters
        ----------
        actions : element of :attr:`~VectorEnv.action_space`
            Batch of actions.

        Raises
        ------
        ClosedEnvironmentError
            If the environment was closed (if :meth:`close` was previously called).

        AlreadyPendingCallError
            If the environment is already waiting for a pending call to another
            method (e.g. :meth:`reset_async`). This can be caused by two consecutive
            calls to :meth:`step_async`, with no call to :meth:`step_wait` in
            between.
        """
        self._assert_is_running()
        if self._state != AsyncState.DEFAULT:
            raise AlreadyPendingCallError(
                f"Calling `step_async` while waiting for a pending call to `{self._state.value}` to complete.",
                self._state.value,
            )

        actions = iterate(self.action_space, actions)
        for pipe, action in zip(self.parent_pipes, actions):
            pipe.send(("step", action))
        self._state = AsyncState.WAITING_STEP
Пример #2
0
def write_observations(vec_env, env_start_idx, shared_obs, obs):
    obs = list(iterate(vec_env.observation_space, obs))
    for i in range(vec_env.num_envs):
        write_to_shared_memory(
            vec_env.observation_space,
            env_start_idx + i,
            obs[i],
            shared_obs,
        )
Пример #3
0
 def concat_obs(self, observations):
     return concatenate(
         self.observation_space,
         [
             item
             for obs in observations
             for item in iterate(self.observation_space, obs)
         ],
         create_empty_array(self.observation_space, n=self.num_envs),
     )
Пример #4
0
 def step(self, actions):
     data = []
     idx = 0
     actions = list(iterate(self.action_space, actions))
     for venv in self.vec_envs:
         data.append(
             venv.step(
                 self.concatenate_actions(
                     actions[idx : idx + venv.num_envs], venv.num_envs
                 )
             )
         )
         idx += venv.num_envs
     observations, rewards, dones, infos = transpose(data)
     observations = self.concat_obs(observations)
     rewards = np.concatenate(rewards, axis=0)
     dones = np.concatenate(dones, axis=0)
     infos = sum(infos, [])
     return observations, rewards, dones, infos
Пример #5
0
    def step(self, actions):
        actions = list(iterate(self.action_space, actions))
        agent_set = set(self.par_env.agents)
        act_dict = {
            agent: actions[i]
            for i, agent in enumerate(self.par_env.possible_agents)
            if agent in agent_set
        }
        observations, rewards, dones, infos = self.par_env.step(act_dict)

        # adds last observation to info where user can get it
        if all(dones.values()):
            for agent, obs in observations.items():
                infos[agent]["terminal_observation"] = obs

        rews = np.array(
            [rewards.get(agent, 0) for agent in self.par_env.possible_agents],
            dtype=np.float32,
        )
        dns = np.array(
            [
                dones.get(agent, False)
                for agent in self.par_env.possible_agents
            ],
            dtype=np.uint8,
        )
        infs = [infos.get(agent, {}) for agent in self.par_env.possible_agents]

        if all(dones.values()):
            observations = self.reset()
        else:
            observations = self.concat_obs(observations)
        assert (
            self.black_death
            or self.par_env.agents == self.par_env.possible_agents
        ), "MarkovVectorEnv does not support environments with varying numbers of active agents unless black_death is set to True"
        return observations, rews, dns, infs
Пример #6
0
 def step_async(self, actions):
     self._actions = iterate(self.action_space, actions)
Пример #7
0
 def step_async(self, actions):
     """Sets :attr:`_actions` for use by the :meth:`step_wait` by converting the ``actions`` to an iterable version."""
     self._actions = iterate(self.action_space, actions)
Пример #8
0
 def step_async(self, actions):
     actions = list(iterate(self.action_space, actions))
     for i, pipe in enumerate(self.pipes):
         start, end = self.idx_starts[i:i + 2]
         pipe.send(("step", actions[start:end]))