Exemplo n.º 1
0
    def test_mlp(self):
        """
        test getting sequential MLP
        """
        sizes = [2, 3, 3, 2]
        mlp_nn = mlp(sizes)
        mlp_nn_sac = mlp(sizes, sac=True)

        assert len(mlp_nn) == 2 * (len(sizes) - 1)
        assert all(isinstance(mlp_nn[i], nn.Linear) for i in range(0, 5, 2))
        assert len(mlp_nn_sac) == 2 * (len(sizes) - 2)
        assert all(isinstance(mlp_nn_sac[i], nn.Linear) for i in range(0, 4, 2))

        inp = torch.randn((2,))
        assert mlp_nn(inp).shape == (2,)
        assert mlp_nn_sac(inp).shape == (3,)
Exemplo n.º 2
0
    def __init__(self, *args, **kwargs):
        super(CnnValue, self).__init__(*args, **kwargs)

        self.conv, self.output_size = cnn(
            (self.state_dim, 16, 32), activation=self.activation
        )
        self.model = mlp([self.output_size, *self.fc_layers, self.action_dim])
Exemplo n.º 3
0
    def __init__(self,
                 framestack: int,
                 action_dim: int,
                 hidden: Tuple = (32, 32),
                 discrete: bool = True,
                 *args,
                 **kwargs):
        super(CNNPolicy, self).__init__(framestack, action_dim, hidden,
                                        discrete, **kwargs)
        channels = (framestack, 16, 32)

        self.conv, output_size = cnn(channels)

        self.fc = mlp([output_size] + list(hidden) + [action_dim],
                      sac=self.sac)
Exemplo n.º 4
0
    def __init__(self,
                 state_dim: int,
                 action_dim: int,
                 hidden: Tuple = (32, 32),
                 discrete: bool = True,
                 *args,
                 **kwargs):
        super(MlpPolicy, self).__init__(state_dim, action_dim, hidden,
                                        discrete, **kwargs)
        self.activation = kwargs[
            "activation"] if "activation" in kwargs else "relu"

        self.model = mlp(
            [state_dim] + list(hidden) + [action_dim],
            activation=self.activation,
            sac=self.sac,
        )
Exemplo n.º 5
0
 def __init__(self, *args, **kwargs):
     super(CnnDuelingValue, self).__init__(*args, **kwargs)
     self.advantage = mlp(
         [self.output_size, *self.fc_layers, self.action_dim])
     self.value = mlp([self.output_size, *self.fc_layers, 1])
Exemplo n.º 6
0
 def __init__(self, *args, **kwargs):
     super(MlpDuelingValue, self).__init__(*args, **kwargs)
     self.feature = mlp([self.state_dim, *self.fc_layers[:-1]])
     self.advantage = mlp([self.fc_layers[-1], self.action_dim])
     self.value = mlp([self.fc_layers[-1], 1])