示例#1
0
    def step(self,
             action: Tensor) -> Tuple[Tensor, Tensor, Tensor, List[Dict]]:
        action = from_tensor(self.action_space, action)
        assert action in self.env.action_space, (action, self.env.action_space)

        result = self.env.step(action)
        observation, reward, done, info = result

        observation = to_tensor(self.observation_space, observation,
                                self.device)

        if hasattr(self, "reward_space"):
            reward = to_tensor(self.reward_space, reward, self.device)
        else:
            reward = torch.as_tensor(reward, device=self.device)
        done = torch.as_tensor(done, device=self.device)
        # We could actually do this!
        # info = np.ndarray(info)
        return type(result)([observation, reward, done, info])
示例#2
0
 def _contains(x: Union[Tensor, Any]) -> bool:
     x = from_tensor(space, x)
     return contains(x)
示例#3
0
 def action(self, action):
     return from_tensor(self.action_space, action)