Exemplo n.º 1
0
 def extra_compute_action_fetches(self):
     return dict(TFPolicyGraph.extra_compute_action_fetches(self), **{
         "q_values": self.q_values,
     })
Exemplo n.º 2
0
 def extra_compute_action_fetches(self):
     return dict(TFPolicyGraph.extra_compute_action_fetches(self),
                 **self._extra_action_fetches)
Exemplo n.º 3
0
 def extra_compute_action_fetches(self):
     return dict(TFPolicyGraph.extra_compute_action_fetches(self),
                 **{"vf_preds": self.vf})
Exemplo n.º 4
0
 def extra_compute_action_fetches(self):
     return dict(TFPolicyGraph.extra_compute_action_fetches(self),
                 **{"behaviour_logits": self.model.outputs})
Exemplo n.º 5
0
 def extra_compute_action_fetches(self):
     out = {"behaviour_logits": self.model.outputs}
     if not self.config["vtrace"]:
         out["vf_preds"] = self.value_function
     return dict(TFPolicyGraph.extra_compute_action_fetches(self), **out)
Exemplo n.º 6
0
 def extra_compute_action_fetches(self):
     return dict(
         TFPolicyGraph.extra_compute_action_fetches(self), **{
             SampleBatch.VF_PREDS: self.value_function,
             BEHAVIOUR_LOGITS: self.logits
         })
Exemplo n.º 7
0
 def extra_compute_action_fetches(self):
     return dict(TFPolicyGraph.extra_compute_action_fetches(self),
                 **{SampleBatch.VF_PREDS: self.vf})
Exemplo n.º 8
0
 def extra_compute_action_fetches(self):
     return dict(
         TFPolicyGraph.extra_compute_action_fetches(self), **{
             "vf_preds": self.value_function,
             "logits": self.logits
         })
Exemplo n.º 9
0
 def extra_compute_action_fetches(self):
     out = {"behaviour_logits": self.model.outputs}
     if not self.config["vtrace"]:
         out["vf_preds"] = self.value_function
     return dict(TFPolicyGraph.extra_compute_action_fetches(self), **out)
Exemplo n.º 10
0
 def extra_compute_action_fetches(self):
     return dict(TFPolicyGraph.extra_compute_action_fetches(self),
                 **{"behavior_logp": self.sampled_logp})
Exemplo n.º 11
0
 def extra_compute_action_fetches(self):
     return dict(
         TFPolicyGraph.extra_compute_action_fetches(self), **{
             "q_values": self.q_values,
         })
Exemplo n.º 12
0
 def extra_compute_action_fetches(self):
     return dict(TFPolicyGraph.extra_compute_action_fetches(self), **{
         "vf_preds": self.value_function,
         "logits": self.logits
     })
Exemplo n.º 13
0
 def extra_compute_action_fetches(self):
     return dict(
         TFPolicyGraph.extra_compute_action_fetches(self),
         **{"vf_preds": self.vf})
Exemplo n.º 14
0
 def extra_compute_action_fetches(self):
     return dict(TFPolicyGraph.extra_compute_action_fetches(self),
                 **{BEHAVIOUR_LOGITS: self.model.outputs})
Exemplo n.º 15
0
 def extra_compute_action_fetches(self):
     return dict(
         TFPolicyGraph.extra_compute_action_fetches(self),
         **{"behaviour_logits": self.model.outputs})