def calc_error_loss(self, curr_errs1, curr_errs2, target_errs1, target_errs2): err1_loss = torch.mean((curr_errs1 - target_errs1).pow(2)) err2_loss = torch.mean((curr_errs2 - target_errs2).pow(2)) soft_update(self._tau1, curr_errs1.detach().mean(), self._target_update_coef) soft_update(self._tau2, curr_errs2.detach().mean(), self._target_update_coef) return err1_loss + err2_loss
def update_target_networks(self): soft_update(self._target_q_net, self._online_q_net, self._target_update_coef)
def update_target_networks(self): super().update_target_networks() if self.discor: soft_update(self._target_error_net, self._online_error_net, self._target_update_coef)