def _compute_loss(self, estimate, target, name): q, logits_z = estimate["q_values"], estimate["logits"] target_q, target_p = target["target_q"], target["target_p"] head_loss = DQN_IDS._compute_loss(self, q, target_q, name) z_loss = C51._compute_loss(self, logits_z, target_p, "train/z_loss") return dict(head_loss=head_loss, z_loss=z_loss)
def _compute_loss(self, estimate, target, name): q, z = estimate["q_values"], estimate["quantiles"] target_q, target_z = target["target_q"], target["target_z"] head_loss = DQN_IDS._compute_loss(self, q, target_q, name) z_loss = QRDQN._compute_loss(self, z, target_z, "train/z_loss") return dict(head_loss=head_loss, z_loss=z_loss)