示例#1
0
 def act(self, obs: rlt.FeatureData) -> rlt.ActorOutput:
     """ Act randomly regardless of the observation. """
     obs: torch.Tensor = obs.float_features
     assert obs.dim() >= 2, f"obs has shape {obs.shape} (dim < 2)"
     batch_size = obs.size(0)
     # pyre-fixme[6]: Expected `Union[torch.Size, torch.Tensor]` for 1st param
     #  but got `Tuple[int]`.
     action = self.dist.sample((batch_size, ))
     log_prob = self.dist.log_prob(action)
     return rlt.ActorOutput(action=action, log_prob=log_prob)
示例#2
0
 def act(
     self, obs: rlt.FeatureData, possible_actions_mask: Optional[np.ndarray] = None
 ) -> rlt.ActorOutput:
     """ Act randomly regardless of the observation. """
     obs: torch.Tensor = obs.float_features
     assert obs.dim() >= 2, f"obs has shape {obs.shape} (dim < 2)"
     batch_size = obs.size(0)
     # pyre-fixme[6]: Expected `Union[torch.Size, torch.Tensor]` for 1st param
     #  but got `Tuple[int]`.
     action = self.dist.sample((batch_size,))
     # sum over action_dim (since assuming i.i.d. per coordinate)
     log_prob = self.dist.log_prob(action).sum(1)
     return rlt.ActorOutput(action=action, log_prob=log_prob)