Exemplo n.º 1
0
 def step_wait(self) -> VecEnvStepReturn:
     # wield the stored action for env.step
     for idx, env in enumerate(self.envs):
         obs, self.buf_rews[idx], self.buf_dones[idx], self.buf_infos[
             idx] = env.step(self.actions[idx])
         if any(self.buf_dones[idx].tolist()):
             # save final observation where user can get it, then reset
             self.buf_infos[idx]["terminal_observation"] = obs
             obs = self.envs[idx].reset()
         self._save_obs(idx, obs)
     return (dict_to_obs(self.observation_space,
                         copy_obs_dict(self.buf_obs)),
             np.copy(self.buf_rews), np.copy(self.buf_dones),
             copy.deepcopy(self.buf_infos))
Exemplo n.º 2
0
 def _obs_from_buf(self) -> VecEnvObs:
     return dict_to_obs(self.observation_space, copy_obs_dict(self.buf_obs))
Exemplo n.º 3
0
 def reset(self) -> VecEnvObs:
     for env_idx, each_env in enumerate(self.envs):
         obs = each_env.reset()
         self._save_obs(env_idx, obs)
     return dict_to_obs(self.observation_space, copy_obs_dict(self.buf_obs))