Exemplo n.º 1
0
    def __init__(self,
                 action_dim: int,
                 history_length: int = 4,
                 fc_layers: Tuple = (256, )):
        super(DuelingDQNValueCNN, self).__init__()

        self.action_dim = action_dim

        self.conv, output_size = cnn((history_length, 16, 32))

        self.advantage = mlp([output_size] + list(fc_layers) + [action_dim])
        self.value = mlp([output_size] + list(fc_layers) + [1])
Exemplo n.º 2
0
    def __init__(
            self,
            action_dim: int,
            history_length: int = 4,
            fc_layers: Tuple = (128, ),
            noisy_layers: Tuple = (128, 128),
    ):
        super(NoisyDQNValueCNN, self).__init__()

        self.conv, output_size = cnn((history_length, 16, 32))

        self.model = noisy_mlp([output_size] + list(fc_layers),
                               list(noisy_layers) + [action_dim])
Exemplo n.º 3
0
    def __init__(self,
                 action_dim,
                 history_length=4,
                 val_type="Qs",
                 fc_layers=(256, )):
        super(CNNValue, self).__init__()

        self.action_dim = action_dim

        self.conv, output_size = cnn((history_length, 16, 32))

        self.fc = _get_val_model(mlp, val_type, output_size, fc_layers,
                                 action_dim)
Exemplo n.º 4
0
    def __init__(self,
                 framestack: int,
                 action_dim: spaces.Space,
                 fc_layers: Tuple = (256, ),
                 val_type: str = "V",
                 discrete: bool = True,
                 *args,
                 **kwargs):
        super(CNNActorCritic, self).__init__()

        self.feature, output_size = cnn((framestack, 16, 32))
        self.actor = MlpPolicy(output_size, action_dim, fc_layers, discrete,
                               **kwargs)
        self.critic = MlpValue(output_size, action_dim, val_type, fc_layers)
Exemplo n.º 5
0
    def __init__(self,
                 framestack: int,
                 action_dim: int,
                 hidden: Tuple = (32, 32),
                 discrete: bool = True,
                 *args,
                 **kwargs):
        super(CNNPolicy, self).__init__(framestack, action_dim, hidden,
                                        discrete, **kwargs)
        self.action_dim = action_dim

        self.conv, output_size = cnn((framestack, 16, 32))

        self.fc = mlp([output_size] + list(hidden) + [action_dim],
                      sac=self.sac)
Exemplo n.º 6
0
    def __init__(
            self,
            action_dim: int,
            num_atoms: int,
            history_length: int = 4,
            fc_layers: Tuple = (128, 128),
            noisy_layers: Tuple = (128, 512),
    ):
        super(CategoricalDQNValueCNN, self).__init__()

        self.action_dim = action_dim
        self.num_atoms = num_atoms

        self.conv, output_size = cnn((history_length, 16, 32))
        self.model = noisy_mlp(
            [output_size] + list(fc_layers),
            list(noisy_layers) + [self.action_dim * self.num_atoms],
        )
Exemplo n.º 7
0
    def __init__(
            self,
            framestack: int,
            action_dim: int,
            val_type: str = "Qs",
            fc_layers: Tuple = (256, ),
            **kwargs,
    ):
        super(CNNValue, self).__init__()

        self.action_dim = action_dim

        activation = kwargs["activation"] if "activation" in kwargs else "relu"

        self.conv, output_size = cnn((framestack, 16, 32),
                                     activation=activation)

        self.fc = _get_val_model(mlp, val_type, output_size, fc_layers,
                                 action_dim)
Exemplo n.º 8
0
 def __init__(self, input_dim, action_dim, hidden_dims=[128, 64]):
     super(Discriminator, self).__init__()
     self.conv, self.conv_output_size = cnn()
     self.fc1 = nn.Linear(self.conv_output_size, hidden_dims[0])
     self.fc2 = nn.Linear(hidden_dims[0] + action_dim, hidden_dims[1])
     self.fc3 = nn.Linear(hidden_dims[1], 1)