def __init__(self, state_size): super(Value, self).__init__() self.state_size = state_size self.fc1 = rn.NoisyLinear(state_size, 64) self.fc_norm = nn.LayerNorm(64) self.fc2 = rn.NoisyLinear(64, 64) self.fc2_norm = nn.LayerNorm(64) self.fc3 = rn.NoisyLinear(64, 1)
def __init__(self, state_size, action_size): super(Policy, self).__init__() self.state_size = state_size self.action_size = action_size self.fc1 = rn.NoisyLinear(state_size, 64) self.fc_norm = nn.LayerNorm(64) self.fc2 = rn.NoisyLinear(64, 64) self.fc2_norm = nn.LayerNorm(64) self.fc3 = rn.NoisyLinear(64, action_size)
def __init__(self, state_size, action_size): super(Value, self).__init__() self.state_size = state_size self.action_size = action_size self.fc1 = rn.NoisyLinear(state_size, 255) self.fc_norm = nn.LayerNorm(255) self.value_fc = rn.NoisyLinear(255, 255) self.value_fc_norm = nn.LayerNorm(255) self.value = rn.NoisyLinear(255, 1) self.advantage_fc = rn.NoisyLinear(255, 255) self.advantage_fc_norm = nn.LayerNorm(255) self.advantage = rn.NoisyLinear(255, action_size)
def __init__(self, state_size, action_size): super(Value, self).__init__() self.state_size = state_size self.action_size = action_size self.conv1 = nn.Conv2d(4, 32, kernel_size=(8, 8), stride=(4, 4)) self.conv2 = nn.Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2)) self.conv3 = nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1)) self.fc1 = nn.Linear(3136, 512) self.fc1_norm = nn.LayerNorm(512) self.value_fc = rn.NoisyLinear(512, 512) self.value_fc_norm = nn.LayerNorm(512) self.value = nn.Linear(512, 1) self.advantage_fc = rn.NoisyLinear(512, 512) self.advantage_fc_norm = nn.LayerNorm(512) self.advantage = nn.Linear(512, action_size)