Exemplo n.º 1
0
def impala_loss(inputs, lables):
    """Compute loss for IMPALA."""
    policy, value = inputs
    target_p, target_v, adv = lables

    log_policy = tf.log(policy + 1e-10)
    entropy = (-policy * log_policy)
    cross_entropy = (-target_p * log_policy)
    p_loss = tf.reduce_mean(adv * cross_entropy - ENTROPY_LOSS * entropy)

    v_loss = 0.5 * tf.reduce_mean(tf.square(value - target_v))

    return p_loss + v_loss
Exemplo n.º 2
0
def actor_loss_with_entropy(adv, old_logits, behavior_action, out_logits):
    """Calculate actor loss with entropy."""
    old_log_p = neglog_prob(behavior_action, old_logits)
    action_log_prob = neglog_prob(behavior_action, out_logits)
    ratio = tf.exp(action_log_prob - old_log_p)

    surr_loss_1 = ratio * adv
    surr_loss_2 = tf.clip_by_value(ratio, 1.0 - LOSS_CLIPPING, 1.0 + LOSS_CLIPPING) * adv
    surr_loss = tf.reduce_mean(tf.minimum(surr_loss_1, surr_loss_2))

    ent = entropy(out_logits)
    ent = tf.reduce_mean(ent)

    return -surr_loss - ENTROPY_LOSS * ent
Exemplo n.º 3
0
def actor_loss_with_entropy(dist, adv, old_log_p, behavior_action, clip_ratio,
                            ent_coef):
    """Calculate actor loss with entropy."""
    action_log_prob = dist.log_prob(behavior_action)
    ratio = tf.exp(action_log_prob - old_log_p)

    surr_loss_1 = ratio * adv
    surr_loss_2 = tf.clip_by_value(ratio, 1.0 - clip_ratio,
                                   1.0 + clip_ratio) * adv
    surr_loss = tf.reduce_mean(tf.minimum(surr_loss_1, surr_loss_2))

    ent = dist.entropy()
    ent = tf.reduce_mean(ent)

    return -surr_loss - ent_coef * ent
Exemplo n.º 4
0
def value_loss(target_v, out_v, old_v):
    """Compute value loss for PPO."""
    vpredclipped = old_v + tf.clip_by_value(out_v - old_v, -VF_CLIP, VF_CLIP)
    vf_losses1 = tf.square(out_v - target_v)
    vf_losses2 = tf.square(vpredclipped - target_v)
    vf_loss = .5 * tf.reduce_mean(tf.maximum(vf_losses1, vf_losses2))
    return vf_loss
Exemplo n.º 5
0
def critic_loss(target_v, out_v, old_v, val_clip):
    """Use clipped value loss as default."""
    vf_losses1 = tf.square(out_v - target_v)
    val_pred_clipped = old_v + tf.clip_by_value(out_v - old_v, -val_clip,
                                                val_clip)
    vf_losses2 = tf.square(val_pred_clipped - target_v)
    vf_loss = 0.5 * tf.reduce_mean(tf.maximum(vf_losses1, vf_losses2))
    return vf_loss
Exemplo n.º 6
0
def layer_normalize(x):
    """Normalize data."""
    return tf.subtract(x, tf.reduce_mean(x, axis=1, keep_dims=True))
Exemplo n.º 7
0
def mse_loss(logits, labels):
    return tf.reduce_mean(MSE(logits, labels))
Exemplo n.º 8
0
def cross_entropy(pred_p, target_p, loss_weights):
    _cross_entropy = tf.reduce_mean(-target_p * tf.log(pred_p + 1e-10), axis=-1, keepdims=True)
    return tf.reduce_mean(_cross_entropy * 1.0)