def encode_obs(self, obs):
     obs_shape = obs.shape
     per_image_shape = obs_shape[3:]
     effetive_batch_size = get_product_of_iterable(obs_shape[:2])
     num_frames = obs_shape[2]
     h_t = self.convolutional_encoder(obs.view(-1, *per_image_shape)).view(
         effetive_batch_size, num_frames, -1)
     h_t = torch.mean(h_t, dim=1)
     return h_t, effetive_batch_size
 def get_weights_dict(self):
     _latent_size = self.config.model.imagination_model.latent_size
     _action_size = get_product_of_iterable(
         self.config.env.action_space["shape"])
     return torch.nn.ModuleDict({
         "w_action":
         torch.nn.Sequential(nn.Linear(_action_size, _latent_size)),
         "w_z":
         torch.nn.Sequential(nn.Linear(_latent_size, _latent_size)),
     })
示例#3
0
 def __init__(self, config):
     super().__init__(config=config)
     self._input_size = self.config.model.imagination_model.hidden_state_size + self.config.model.imagination_model.latent_size
     # Note that the hidden state size corresponds to the encoding of the observation in the pixel space.
     self._output_size = get_product_of_iterable(
         self.config.env.action_space.shape)
     self.policy = nn.Sequential(
         nn.Linear(self._input_size, self._input_size), nn.ReLU(),
         nn.Linear(self._input_size, int(self._input_size / 2)), nn.ReLU(),
         nn.Linear(int(self._input_size / 2), int(self._input_size / 4)),
         nn.ReLU(), nn.Linear(int(self._input_size / 4), self._output_size))
     self.criteria = nn.MSELoss()
示例#4
0
def get_action_space_size(config):
    return get_product_of_iterable(config.env.action_space.shape)
 def loss(self, output, x):
     """ loss function """
     true_obs = x.next_obs
     # Not that we have to manually divide because of an issue in Pytorch. The fix is available only in master for now.
     return F.mse_loss(true_obs[:, :, 3, :, :, :], output) * 255 / (
         get_product_of_iterable(output.shape))
示例#6
0
def move_data_to_device(data, device):
    if (type(data) in [list, tuple]):
        return list(map(lambda x: x.to(device),
                        data)), get_product_of_iterable(data[1].shape[:2])
    else:
        return data.to(device), len(data)