Exemplo n.º 1
0
    def __init__(self, state_size):
        super(Value, self).__init__()
        self.state_size = state_size

        self.fc1 = rn.NoisyLinear(state_size, 64)
        self.fc_norm = nn.LayerNorm(64)

        self.fc2 = rn.NoisyLinear(64, 64)
        self.fc2_norm = nn.LayerNorm(64)

        self.fc3 = rn.NoisyLinear(64, 1)
Exemplo n.º 2
0
    def __init__(self, state_size, action_size):
        super(Policy, self).__init__()
        self.state_size = state_size
        self.action_size = action_size

        self.fc1 = rn.NoisyLinear(state_size, 64)
        self.fc_norm = nn.LayerNorm(64)

        self.fc2 = rn.NoisyLinear(64, 64)
        self.fc2_norm = nn.LayerNorm(64)

        self.fc3 = rn.NoisyLinear(64, action_size)
Exemplo n.º 3
0
    def __init__(self, state_size, action_size):
        super(Value, self).__init__()
        self.state_size = state_size
        self.action_size = action_size

        self.fc1 = rn.NoisyLinear(state_size, 255)
        self.fc_norm = nn.LayerNorm(255)

        self.value_fc = rn.NoisyLinear(255, 255)
        self.value_fc_norm = nn.LayerNorm(255)
        self.value = rn.NoisyLinear(255, 1)

        self.advantage_fc = rn.NoisyLinear(255, 255)
        self.advantage_fc_norm = nn.LayerNorm(255)
        self.advantage = rn.NoisyLinear(255, action_size)
Exemplo n.º 4
0
    def __init__(self, state_size, action_size):
        super(Value, self).__init__()
        self.state_size = state_size
        self.action_size = action_size

        self.conv1 = nn.Conv2d(4, 32, kernel_size=(8, 8), stride=(4, 4))
        self.conv2 = nn.Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2))
        self.conv3 = nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))

        self.fc1 = nn.Linear(3136, 512)
        self.fc1_norm = nn.LayerNorm(512)

        self.value_fc = rn.NoisyLinear(512, 512)
        self.value_fc_norm = nn.LayerNorm(512)
        self.value = nn.Linear(512, 1)

        self.advantage_fc = rn.NoisyLinear(512, 512)
        self.advantage_fc_norm = nn.LayerNorm(512)
        self.advantage = nn.Linear(512, action_size)