def impala_loss(inputs, lables): """Compute loss for IMPALA.""" policy, value = inputs target_p, target_v, adv = lables log_policy = tf.log(policy + 1e-10) entropy = (-policy * log_policy) cross_entropy = (-target_p * log_policy) p_loss = tf.reduce_mean(adv * cross_entropy - ENTROPY_LOSS * entropy) v_loss = 0.5 * tf.reduce_mean(tf.square(value - target_v)) return p_loss + v_loss
def actor_loss_with_entropy(adv, old_logits, behavior_action, out_logits): """Calculate actor loss with entropy.""" old_log_p = neglog_prob(behavior_action, old_logits) action_log_prob = neglog_prob(behavior_action, out_logits) ratio = tf.exp(action_log_prob - old_log_p) surr_loss_1 = ratio * adv surr_loss_2 = tf.clip_by_value(ratio, 1.0 - LOSS_CLIPPING, 1.0 + LOSS_CLIPPING) * adv surr_loss = tf.reduce_mean(tf.minimum(surr_loss_1, surr_loss_2)) ent = entropy(out_logits) ent = tf.reduce_mean(ent) return -surr_loss - ENTROPY_LOSS * ent
def actor_loss_with_entropy(dist, adv, old_log_p, behavior_action, clip_ratio, ent_coef): """Calculate actor loss with entropy.""" action_log_prob = dist.log_prob(behavior_action) ratio = tf.exp(action_log_prob - old_log_p) surr_loss_1 = ratio * adv surr_loss_2 = tf.clip_by_value(ratio, 1.0 - clip_ratio, 1.0 + clip_ratio) * adv surr_loss = tf.reduce_mean(tf.minimum(surr_loss_1, surr_loss_2)) ent = dist.entropy() ent = tf.reduce_mean(ent) return -surr_loss - ent_coef * ent
def value_loss(target_v, out_v, old_v): """Compute value loss for PPO.""" vpredclipped = old_v + tf.clip_by_value(out_v - old_v, -VF_CLIP, VF_CLIP) vf_losses1 = tf.square(out_v - target_v) vf_losses2 = tf.square(vpredclipped - target_v) vf_loss = .5 * tf.reduce_mean(tf.maximum(vf_losses1, vf_losses2)) return vf_loss
def critic_loss(target_v, out_v, old_v, val_clip): """Use clipped value loss as default.""" vf_losses1 = tf.square(out_v - target_v) val_pred_clipped = old_v + tf.clip_by_value(out_v - old_v, -val_clip, val_clip) vf_losses2 = tf.square(val_pred_clipped - target_v) vf_loss = 0.5 * tf.reduce_mean(tf.maximum(vf_losses1, vf_losses2)) return vf_loss
def layer_normalize(x): """Normalize data.""" return tf.subtract(x, tf.reduce_mean(x, axis=1, keep_dims=True))
def mse_loss(logits, labels): return tf.reduce_mean(MSE(logits, labels))
def cross_entropy(pred_p, target_p, loss_weights): _cross_entropy = tf.reduce_mean(-target_p * tf.log(pred_p + 1e-10), axis=-1, keepdims=True) return tf.reduce_mean(_cross_entropy * 1.0)