def __init__(self,
                 obs_dim,
                 action_dim,
                 shared_layers,
                 eps=1e-7,
                 device=torch.device(CPU),
                 normalize_obs=False,
                 normalize_value=False):
        super().__init__(obs_dim=obs_dim,
                         norm_dim=(0, ),
                         device=device,
                         normalize_obs=normalize_obs,
                         normalize_value=normalize_value)
        self._eps = eps
        self._action_dim = action_dim
        self._flatten = Flatten()

        self._shared_network = construct_linear_layers(shared_layers)
        self._policy = nn.Sequential(nn.Linear(shared_layers[-1][1], 256),
                                     nn.ReLU(), nn.Linear(256, action_dim * 2))
        self._q1 = nn.Sequential(
            nn.Linear(shared_layers[-1][1] + action_dim, 256), nn.ReLU(),
            nn.Linear(256, 1))
        self._q2 = nn.Sequential(
            nn.Linear(shared_layers[-1][1] + action_dim, 256), nn.ReLU(),
            nn.Linear(256, 1))

        self.to(self.device)
Exemplo n.º 2
0
    def __init__(self,
                 obs_dim,
                 hidden_state_dim,
                 action_dim,
                 shared_layers,
                 initial_alpha=1.,
                 eps=1e-7,
                 device=torch.device(CPU),
                 normalize_obs=False,
                 normalize_value=False):
        super().__init__(obs_dim=obs_dim,
                         hidden_state_dim=hidden_state_dim,
                         initial_alpha=initial_alpha,
                         eps=eps,
                         norm_dim=(0, ),
                         device=device,
                         normalize_obs=normalize_obs,
                         normalize_value=False)
        self._action_dim = action_dim
        self._flatten = Flatten()

        self._shared_network = construct_linear_layers(shared_layers)
        self.lstm_layer = nn.LSTM(input_size=shared_layers[-1][1],
                                  hidden_size=self.hidden_state_dim,
                                  batch_first=True)
        self._policy = nn.Sequential(nn.Linear(self.hidden_state_dim, 256),
                                     nn.ReLU(), nn.Linear(256, action_dim * 2))
        self._q1 = nn.Sequential(
            nn.Linear(self.hidden_state_dim + action_dim, 256), nn.ReLU(),
            nn.Linear(256, 1))
        self._q2 = nn.Sequential(
            nn.Linear(self.hidden_state_dim + action_dim, 256), nn.ReLU(),
            nn.Linear(256, 1))

        self.to(self.device)
    def __init__(self,
                 obs_dim,
                 action_dim,
                 shared_layers,
                 eps=1e-7,
                 device=torch.device(CPU),
                 normalize_obs=False,
                 normalize_value=False):
        super().__init__(obs_dim=obs_dim,
                         norm_dim=(0, ),
                         device=device,
                         normalize_obs=normalize_obs,
                         normalize_value=normalize_value)
        self._eps = eps
        self._action_dim = action_dim
        self._flatten = Flatten()

        # NOTE: Separate architecture grants stable learning for GRAC
        self._policy = nn.Sequential(nn.Linear(obs_dim, 256), nn.ReLU(),
                                     nn.Linear(256, 256), nn.ReLU(),
                                     nn.Linear(256, action_dim * 2))
        self._q1 = nn.Sequential(nn.Linear(obs_dim + action_dim, 256),
                                 nn.ReLU(), nn.Linear(256, 256), nn.ReLU(),
                                 nn.Linear(256, 1))
        self._q2 = nn.Sequential(nn.Linear(obs_dim + action_dim, 256),
                                 nn.ReLU(), nn.Linear(256, 256), nn.ReLU(),
                                 nn.Linear(256, 1))

        self.to(self.device)
Exemplo n.º 4
0
 def __init__(self,
              obs_dim,
              action_dim,
              initial_alpha=1.,
              eps=1e-7,
              device=torch.device(CPU),
              normalize_obs=False,
              normalize_value=False,
              **kwargs):
     super().__init__(obs_dim=obs_dim,
                      initial_alpha=initial_alpha,
                      eps=eps,
                      norm_dim=(0, ),
                      device=device,
                      normalize_obs=normalize_obs,
                      normalize_value=normalize_value)
     self._action_dim = action_dim
     self._flatten = Flatten()
     self._policy = nn.Sequential(nn.Linear(obs_dim, 256), nn.ReLU(),
                                  nn.Linear(256, 256), nn.ReLU(),
                                  nn.Linear(256, action_dim * 2))
     self._q1 = nn.Sequential(nn.Linear(obs_dim + action_dim, 256),
                              nn.ReLU(), nn.Linear(256, 256), nn.ReLU(),
                              nn.Linear(256, 1))
     self._q2 = nn.Sequential(nn.Linear(obs_dim + action_dim, 256),
                              nn.ReLU(), nn.Linear(256, 256), nn.ReLU(),
                              nn.Linear(256, 1))
     self.to(self.device)
Exemplo n.º 5
0
    def __init__(self,
                 rec_dim,
                 batch_size,
                 encoder,
                 decoder,
                 dynamics,
                 opt,
                 buffer,
                 algo_params,
                 reduction=c.SUM,
                 loss_coef=1.,
                 device=torch.device(c.CPU),
                 **kwargs):
        # Image dim: (num_images, num_frames, height, width)
        assert len(rec_dim) == 4
        super().__init__()
        self._flat = Flatten()

        self._rec_dim = rec_dim
        self._flatten_dim = int(np.product(rec_dim))
        self._batch_size = batch_size

        self._buffer = buffer
        self._encoder = encoder
        self._decoder = decoder
        self._dynamics = dynamics
        self._opt = opt

        self._loss_coef = loss_coef
        self._mse = torch.nn.MSELoss(reduction=reduction)

        self.device = device
        self.algo_params = algo_params
        self.train_preprocessing = algo_params[c.TRAIN_PREPROCESSING]
Exemplo n.º 6
0
    def __init__(self, input_dim, output_dim, layers, device=torch.device(CPU)):
        super().__init__()
        self.device = device

        self._input_dim = input_dim
        self._output_dim = output_dim

        self._flatten = Flatten()
        self.fc_layers = construct_linear_layers(layers)

        # Assume independence between dimensions
        self.gaussian_parameters = nn.Linear(layers[-1][1], 2 * output_dim)

        self.to(device)
    def __init__(self,
                 obs_dim,
                 action_dim,
                 shared_layers,
                 device=torch.device(CPU),
                 normalize_obs=False,
                 normalize_value=False):
        super().__init__(obs_dim=obs_dim,
                         norm_dim=(0,),
                         device=device,
                         normalize_obs=normalize_obs,
                         normalize_value=normalize_value)

        self._flatten = Flatten()
        self.shared_network = construct_linear_layers(shared_layers)
        self.action = nn.Linear(shared_layers[-1][1], action_dim)
        self.value = nn.Linear(shared_layers[-1][1], 1)
        self.to(self.device)
    def __init__(self,
                 obs_dim,
                 action_dim,
                 output_dim,
                 layers,
                 device=torch.device(CPU)):
        super().__init__()
        self.device = device

        self._obs_dim = obs_dim
        self._action_dim = action_dim
        self._output_dim = output_dim

        self._flatten = Flatten()
        self.fc_layers = construct_linear_layers(layers)

        self.output = nn.Linear(layers[-1][1], output_dim)

        self.to(device)
Exemplo n.º 9
0
    def __init__(self,
                 obs_dim,
                 action_dim,
                 task_dim,
                 shared_layers,
                 initial_alpha=1.,
                 eps=1e-7,
                 device=torch.device(CPU),
                 normalize_obs=False,
                 normalize_value=False):
        super().__init__(obs_dim=obs_dim,
                         initial_alpha=initial_alpha,
                         eps=eps,
                         norm_dim=(0, ),
                         device=device,
                         normalize_obs=normalize_obs,
                         normalize_value=False)
        self._task_dim = task_dim
        self._action_dim = action_dim
        self._flatten = Flatten()

        self._shared_network = construct_linear_layers(shared_layers)
        self._policy = nn.Sequential(nn.Linear(shared_layers[-1][1], 256),
                                     nn.ReLU(),
                                     nn.Linear(256, task_dim * action_dim * 2))
        self._q1 = nn.Sequential(
            nn.Linear(shared_layers[-1][1] + action_dim, 256), nn.ReLU(),
            nn.Linear(256, task_dim))
        self._q2 = nn.Sequential(
            nn.Linear(shared_layers[-1][1] + action_dim, 256), nn.ReLU(),
            nn.Linear(256, task_dim))
        self._log_alpha = nn.Parameter(
            torch.ones(task_dim) * torch.log(torch.tensor(initial_alpha)))

        self.to(self.device)

        if normalize_value:
            self.value_rms = RunningMeanStd(shape=(self._task_dim, ),
                                            norm_dim=(0, ))
    def __init__(self,
                 obs_dim,
                 hidden_state_dim,
                 action_dim,
                 shared_layers,
                 device=torch.device(CPU),
                 normalize_obs=False,
                 normalize_value=False):
        super().__init__(obs_dim=obs_dim,
                         hidden_state_dim=hidden_state_dim,
                         norm_dim=(0,),
                         device=device,
                         normalize_obs=normalize_obs,
                         normalize_value=normalize_value)

        self._flatten = Flatten()
        self.shared_network = construct_linear_layers(shared_layers)
        self.lstm_layer = nn.LSTM(input_size=shared_layers[-1][1],
                                  hidden_size=self.hidden_state_dim,
                                  batch_first=True)
        self.action_mean = nn.Linear(self.hidden_state_dim, action_dim)
        self.action_raw_std = nn.Linear(self.hidden_state_dim, action_dim)
        self.value = nn.Linear(self.hidden_state_dim, 1)
        self.to(self.device)