def step(self, action: Tensor) -> Tuple[Tensor, Tensor, Tensor, List[Dict]]: action = from_tensor(self.action_space, action) assert action in self.env.action_space, (action, self.env.action_space) result = self.env.step(action) observation, reward, done, info = result observation = to_tensor(self.observation_space, observation, self.device) if hasattr(self, "reward_space"): reward = to_tensor(self.reward_space, reward, self.device) else: reward = torch.as_tensor(reward, device=self.device) done = torch.as_tensor(done, device=self.device) # We could actually do this! # info = np.ndarray(info) return type(result)([observation, reward, done, info])
def _contains(x: Union[Tensor, Any]) -> bool: x = from_tensor(space, x) return contains(x)
def action(self, action): return from_tensor(self.action_space, action)