Exemplo n.º 1
0
    def make_networks(self, config):
        if config['network.recurrent']:
            self.feature_network = RNN(config,
                                       self.device,
                                       env_spec=self.env_spec)
        else:
            self.feature_network = NN(config,
                                      self.device,
                                      env_spec=self.env_spec)
        feature_dim = config['network.hidden_sizes'][-1]

        if self.env_spec.control_type == 'Discrete':
            self.action_head = CategoricalHead(config, self.device,
                                               feature_dim, self.env_spec)
        elif self.env_spec.control_type == 'Continuous':
            self.action_head = DiagGaussianHead(
                config,
                self.device,
                feature_dim,
                self.env_spec,
                min_std=config['agent.min_std'],
                std_style=config['agent.std_style'],
                constant_std=config['agent.constant_std'],
                std_state_dependent=config['agent.std_state_dependent'],
                init_std=config['agent.init_std'])
        if not config['network.independent_V']:
            self.V_head = StateValueHead(config, self.device, feature_dim)
Exemplo n.º 2
0
 def make_networks(self, config):
     input_dim = self.env_spec.observation_space.flat_dim
     feature_dim = 50
     self.featurize = nn.Sequential(nn.Linear(input_dim, 64), nn.ReLU(),
                                    nn.Linear(64, 64), nn.ReLU(),
                                    nn.Linear(64, feature_dim))
     self.action_head = CategoricalHead(config, self.device, feature_dim,
                                        self.env_spec)
     self.V_head = StateValueHead(config, self.device, feature_dim)
Exemplo n.º 3
0
 def make_params(self, config):
     self.feature_layers = make_fc(self.env_spec.observation_space.flat_dim,
                                   config['network.hidden_sizes'])
     self.layer_norms = nn.ModuleList([
         nn.LayerNorm(hidden_size)
         for hidden_size in config['network.hidden_sizes']
     ])
     self.output_layer = StateValueHead(config, self.device,
                                        config['network.hidden_sizes'][-1])
Exemplo n.º 4
0
 def make_params(self, config):
     super().make_params(config)
     self.output_layer = StateValueHead(config, self.device,
                                        config['network.hidden_sizes'][-1])
Exemplo n.º 5
0
 def make_params(self, config):
     super().make_params(config)
     self.output_layer = StateValueHead(config, self.device,
                                        self.rnn.hidden_size)
Exemplo n.º 6
0
def test_state_value_head():
    value_head = StateValueHead(None, None, 30)
    assert isinstance(value_head, BaseNetwork)
    assert value_head.feature_dim == 30
    x = value_head(torch.randn(3, 30))
    assert list(x.shape) == [3, 1]