Exemplo n.º 1
0
 def forward(self, input):
     """Computes Q-values through value and advantage heads; applies gradient
     scaling."""
     x = scale_grad(input, self._grad_scale)
     advantage = self.advantage(x)
     value = self.value(x)
     return value + (advantage - advantage.mean(dim=-1, keepdim=True))
Exemplo n.º 2
0
 def forward(self, input):
     x = scale_grad(input, self._grad_scale)
     advantage = self.advantage(x)
     value = self.value(x).view(-1, 1, self._n_atoms)
     return value + (advantage - advantage.mean(dim=1, keepdim=True))