def stats(policy, batch_tensors): values_batched = _make_time_major(policy, policy.value_function, drop_last=policy.config["vtrace"]) stats_dict = { "cur_lr": tf.cast(policy.cur_lr, tf.float64), "policy_loss": policy.loss.pi_loss, "entropy": policy.loss.entropy, "var_gnorm": tf.global_norm(policy.var_list), "vf_loss": policy.loss.vf_loss, "vf_explained_var": explained_variance(tf.reshape(policy.loss.value_targets, [-1]), tf.reshape(values_batched, [-1])), } if policy.config["vtrace"]: is_stat_mean, is_stat_var = tf.nn.moments(policy.loss.is_ratio, [0, 1]) stats_dict.update({"mean_IS": is_stat_mean}) stats_dict.update({"var_IS": is_stat_var}) if policy.config["use_kl_loss"]: stats_dict.update({"kl": policy.loss.mean_kl}) stats_dict.update({"KL_Coeff": policy.kl_coeff}) return stats_dict
def make_time_major(*args, **kw): return _make_time_major(policy, *args, **kw)
def make_time_major(*args, **kw): return _make_time_major(policy, train_batch.get("seq_lens"), *args, **kw)