Пример #1
0
    def __init__(self,
                 obs_dim,
                 hidden_state_dim,
                 action_dim,
                 shared_layers,
                 initial_alpha=1.,
                 eps=1e-7,
                 device=torch.device(CPU),
                 normalize_obs=False,
                 normalize_value=False):
        super().__init__(obs_dim=obs_dim,
                         hidden_state_dim=hidden_state_dim,
                         initial_alpha=initial_alpha,
                         eps=eps,
                         norm_dim=(0, ),
                         device=device,
                         normalize_obs=normalize_obs,
                         normalize_value=False)
        self._action_dim = action_dim
        self._flatten = Flatten()

        self._shared_network = construct_linear_layers(shared_layers)
        self.lstm_layer = nn.LSTM(input_size=shared_layers[-1][1],
                                  hidden_size=self.hidden_state_dim,
                                  batch_first=True)
        self._policy = nn.Sequential(nn.Linear(self.hidden_state_dim, 256),
                                     nn.ReLU(), nn.Linear(256, action_dim * 2))
        self._q1 = nn.Sequential(
            nn.Linear(self.hidden_state_dim + action_dim, 256), nn.ReLU(),
            nn.Linear(256, 1))
        self._q2 = nn.Sequential(
            nn.Linear(self.hidden_state_dim + action_dim, 256), nn.ReLU(),
            nn.Linear(256, 1))

        self.to(self.device)
    def __init__(self,
                 obs_dim,
                 action_dim,
                 shared_layers,
                 eps=1e-7,
                 device=torch.device(CPU),
                 normalize_obs=False,
                 normalize_value=False):
        super().__init__(obs_dim=obs_dim,
                         norm_dim=(0, ),
                         device=device,
                         normalize_obs=normalize_obs,
                         normalize_value=normalize_value)
        self._eps = eps
        self._action_dim = action_dim
        self._flatten = Flatten()

        self._shared_network = construct_linear_layers(shared_layers)
        self._policy = nn.Sequential(nn.Linear(shared_layers[-1][1], 256),
                                     nn.ReLU(), nn.Linear(256, action_dim * 2))
        self._q1 = nn.Sequential(
            nn.Linear(shared_layers[-1][1] + action_dim, 256), nn.ReLU(),
            nn.Linear(256, 1))
        self._q2 = nn.Sequential(
            nn.Linear(shared_layers[-1][1] + action_dim, 256), nn.ReLU(),
            nn.Linear(256, 1))

        self.to(self.device)
Пример #3
0
    def __init__(self, input_dim, output_dim, layers, device=torch.device(CPU)):
        super().__init__()
        self.device = device

        self._input_dim = input_dim
        self._output_dim = output_dim

        self._flatten = Flatten()
        self.fc_layers = construct_linear_layers(layers)

        # Assume independence between dimensions
        self.gaussian_parameters = nn.Linear(layers[-1][1], 2 * output_dim)

        self.to(device)
    def __init__(self,
                 obs_dim,
                 action_dim,
                 shared_layers,
                 device=torch.device(CPU),
                 normalize_obs=False,
                 normalize_value=False):
        super().__init__(obs_dim=obs_dim,
                         norm_dim=(0,),
                         device=device,
                         normalize_obs=normalize_obs,
                         normalize_value=normalize_value)

        self._flatten = Flatten()
        self.shared_network = construct_linear_layers(shared_layers)
        self.action = nn.Linear(shared_layers[-1][1], action_dim)
        self.value = nn.Linear(shared_layers[-1][1], 1)
        self.to(self.device)
    def __init__(self,
                 obs_dim,
                 action_dim,
                 output_dim,
                 layers,
                 device=torch.device(CPU)):
        super().__init__()
        self.device = device

        self._obs_dim = obs_dim
        self._action_dim = action_dim
        self._output_dim = output_dim

        self._flatten = Flatten()
        self.fc_layers = construct_linear_layers(layers)

        self.output = nn.Linear(layers[-1][1], output_dim)

        self.to(device)
Пример #6
0
    def __init__(self,
                 obs_dim,
                 action_dim,
                 task_dim,
                 shared_layers,
                 initial_alpha=1.,
                 eps=1e-7,
                 device=torch.device(CPU),
                 normalize_obs=False,
                 normalize_value=False):
        super().__init__(obs_dim=obs_dim,
                         initial_alpha=initial_alpha,
                         eps=eps,
                         norm_dim=(0, ),
                         device=device,
                         normalize_obs=normalize_obs,
                         normalize_value=False)
        self._task_dim = task_dim
        self._action_dim = action_dim
        self._flatten = Flatten()

        self._shared_network = construct_linear_layers(shared_layers)
        self._policy = nn.Sequential(nn.Linear(shared_layers[-1][1], 256),
                                     nn.ReLU(),
                                     nn.Linear(256, task_dim * action_dim * 2))
        self._q1 = nn.Sequential(
            nn.Linear(shared_layers[-1][1] + action_dim, 256), nn.ReLU(),
            nn.Linear(256, task_dim))
        self._q2 = nn.Sequential(
            nn.Linear(shared_layers[-1][1] + action_dim, 256), nn.ReLU(),
            nn.Linear(256, task_dim))
        self._log_alpha = nn.Parameter(
            torch.ones(task_dim) * torch.log(torch.tensor(initial_alpha)))

        self.to(self.device)

        if normalize_value:
            self.value_rms = RunningMeanStd(shape=(self._task_dim, ),
                                            norm_dim=(0, ))
    def __init__(self,
                 obs_dim,
                 hidden_state_dim,
                 action_dim,
                 shared_layers,
                 device=torch.device(CPU),
                 normalize_obs=False,
                 normalize_value=False):
        super().__init__(obs_dim=obs_dim,
                         hidden_state_dim=hidden_state_dim,
                         norm_dim=(0,),
                         device=device,
                         normalize_obs=normalize_obs,
                         normalize_value=normalize_value)

        self._flatten = Flatten()
        self.shared_network = construct_linear_layers(shared_layers)
        self.lstm_layer = nn.LSTM(input_size=shared_layers[-1][1],
                                  hidden_size=self.hidden_state_dim,
                                  batch_first=True)
        self.action_mean = nn.Linear(self.hidden_state_dim, action_dim)
        self.action_raw_std = nn.Linear(self.hidden_state_dim, action_dim)
        self.value = nn.Linear(self.hidden_state_dim, 1)
        self.to(self.device)