Пример #1
0
    def calc_error_loss(self, curr_errs1, curr_errs2, target_errs1,
                        target_errs2):
        err1_loss = torch.mean((curr_errs1 - target_errs1).pow(2))
        err2_loss = torch.mean((curr_errs2 - target_errs2).pow(2))

        soft_update(self._tau1,
                    curr_errs1.detach().mean(), self._target_update_coef)
        soft_update(self._tau2,
                    curr_errs2.detach().mean(), self._target_update_coef)

        return err1_loss + err2_loss
Пример #2
0
 def update_target_networks(self):
     soft_update(self._target_q_net, self._online_q_net,
                 self._target_update_coef)
Пример #3
0
 def update_target_networks(self):
     super().update_target_networks()
     if self.discor:
         soft_update(self._target_error_net, self._online_error_net,
                     self._target_update_coef)