Exemplo n.º 1
0
    def _compute_loss(self, estimate, target, name):
        q, logits_z = estimate["q_values"], estimate["logits"]
        target_q, target_p = target["target_q"], target["target_p"]

        head_loss = DQN_IDS._compute_loss(self, q, target_q, name)
        z_loss = C51._compute_loss(self, logits_z, target_p, "train/z_loss")

        return dict(head_loss=head_loss, z_loss=z_loss)
Exemplo n.º 2
0
  def _compute_loss(self, estimate, target, name):
    q, z                = estimate["q_values"], estimate["quantiles"]
    target_q, target_z  = target["target_q"], target["target_z"]


    head_loss = DQN_IDS._compute_loss(self, q, target_q, name)
    z_loss    = QRDQN._compute_loss(self, z, target_z, "train/z_loss")

    return dict(head_loss=head_loss, z_loss=z_loss)