def __init__(self, state_size, action_size, H=64): super().__init__() self.feature = nn.Linear(state_size, H) self.advantage_n1 = NoisyLinear(H, H) self.advantage_n2 = NoisyLinear(H, action_size) self.value_n1 = NoisyLinear(H, H) self.value_n2 = NoisyLinear(H, 1)
class NoisyDuelingRainbowNetwork(nn.Module): def __init__(self, state_size, action_size, H=64): super().__init__() self.feature = nn.Linear(state_size, H) self.advantage_n1 = NoisyLinear(H, H) self.advantage_n2 = NoisyLinear(H, action_size) self.value_n1 = NoisyLinear(H, H) self.value_n2 = NoisyLinear(H, 1) def forward(self, state, log=False): features = F.relu(self.feature(state)) advantage = F.relu(self.advantage_n1(features)) advantage = self.advantage_n2(advantage) value = F.relu(self.value_n1(features)) value = self.value_n2(value) q = value + (advantage - advantage.mean()) return q def reset_noise(self): self.advantage_n1.reset_noise() self.advantage_n2.reset_noise() self.value_n1.reset_noise() self.value_n2.reset_noise()
class NoisyRainbowNetwork(nn.Module): def __init__(self, state_size, action_size, H=64): super().__init__() self.linear = nn.Linear(state_size, H) self.n2 = NoisyLinear(H, H) self.n3 = NoisyLinear(H, action_size) def forward(self, state): x = F.relu(self.linear(state)) x = F.relu(self.n2(x)) return self.n3(x) def reset_noise(self): self.n2.reset_noise() self.n3.reset_noise()
def __init__(self, state_size, action_size, atoms, H=64): super(QRRainbowNetwork, self).__init__() self.action_size = action_size self.state_size = state_size self.atoms = atoms self.feature = nn.Linear(state_size, H) self.advantage_n1 = NoisyLinear(H, H) self.advantage_n2 = NoisyLinear(H, action_size*atoms) self.value_n1 = NoisyLinear(H, H) self.value_n2 = NoisyLinear(H, 1*atoms)
class RainbowNetwork(nn.Module): def __init__(self, state_size, action_size, atoms, H=64): super(RainbowNetwork, self).__init__() self.action_size = action_size self.state_size = state_size self.atoms = atoms self.feature = nn.Linear(state_size, H) self.advantage_n1 = NoisyLinear(H, H) self.advantage_n2 = NoisyLinear(H, action_size*atoms) self.value_n1 = NoisyLinear(H, H) self.value_n2 = NoisyLinear(H, 1*atoms) def forward(self, state, log=False): features = F.relu(self.feature(state)) advantage = F.relu(self.advantage_n1(features)) advantage = self.advantage_n2(advantage) value = F.relu(self.value_n1(features)) value = self.value_n2(value) advantage = advantage.view(-1, self.action_size, self.atoms) value = value.view(-1, 1, self.atoms) q = value + (advantage - advantage.mean(1, keepdim=True)) if log: q = F.log_softmax(q, dim=2) else: q = F.softmax(q, dim=2) return q def reset_noise(self): self.advantage_n1.reset_noise() self.advantage_n2.reset_noise() self.value_n1.reset_noise() self.value_n2.reset_noise()
def __init__(self, state_size, action_size, H=64): super().__init__() self.linear = nn.Linear(state_size, H) self.n2 = NoisyLinear(H, H) self.n3 = NoisyLinear(H, action_size)