def _kl_divergence(self, time_steps, action_distribution_parameters,
                       current_policy_distribution):

        kl_divergence = losses.kullback_leibler_divergence(
            action_distribution_parameters, current_policy_distribution)

        return kl_divergence
    def __build_train_fn(self):
        """Create a train function
        It replaces `model.fit(X, y)` because we use the output of model and use it for training.
        """
        action_prob_placeholder = self.model.model.outputs
        advantage_placeholder = K.placeholder(shape=(None, ), name="advantage")

        action_placeholder = []
        old_mu_placeholder = []
        action_prob_old = []
        loss = []
        for i in range(len(self.output_dim)):
            o_mu_pl = K.placeholder(shape=(None, ),
                                    name="old_mu_placeholder" + str(i))
            old_mu_placeholder.append(o_mu_pl)

            act_pl = K.placeholder(shape=(None, ),
                                   name="action_placeholder" + str(i),
                                   dtype='int32')
            action_placeholder.append(act_pl)

            act_prob = K.sum(K.one_hot(act_pl, self.output_dim[i]) *
                             action_prob_placeholder[i],
                             axis=1)

            act_prob_old = K.sum(K.one_hot(act_pl, self.output_dim[i]) *
                                 o_mu_pl,
                                 axis=1)
            action_prob_old.append(K.mean(-K.log(act_prob_old)))

            logp = K.log(act_prob)
            old_logp = K.log(act_prob_old)
            kl = losses.kullback_leibler_divergence(old_mu_placeholder[i],
                                                    action_prob_placeholder[i])

            l = (act_prob - act_prob_old) * advantage_placeholder - kl
            loss.append(-K.mean(l))

        entropy = K.sum(action_prob_old)
        loss = K.stack(loss)
        loss_p = K.sum(loss)

        adam = optimizers.Adam(lr=self.pi_lr)
        updates = adam.get_updates(loss=loss,
                                   params=self.model.trainable_weights)

        self.train_fn = K.function(inputs=[
            *self.model.model.inputs, *old_mu_placeholder, *action_placeholder,
            advantage_placeholder
        ],
                                   outputs=[loss_p, entropy],
                                   updates=updates)
示例#3
0
    def value_estimation_loss(self, time_steps, returns, weights):
        """Computes the value estimation loss for actor-critic training.
        All tensors should have a single batch dimension.
        Args:
          time_steps: A batch of timesteps.
          returns: Per-timestep returns for value function to predict. (Should come
            from TD-lambda computation.)
          weights: Optional scalar or element-wise (per-batch-entry) importance
            weights.  Includes a mask for invalid timesteps.
          debug_summaries: True if debug summaries should be created.
        Returns:
          value_estimation_loss: A scalar value_estimation_loss loss.
        """
        observation = time_steps.observation
        value_preds = self.double_batch_pred(self._mod_net,
                                             observation,
                                             is_training=True)

        value_estimation_error = losses.kullback_leibler_divergence(
            returns, value_preds)
        value_estimation_error *= weights
        value_estimation_loss = tf.reduce_mean(
            input_tensor=value_estimation_error)
        return value_estimation_loss
示例#4
0
 def kullback_leibler_divergence(y_true, y_pred):
     y_true = tf.cast(y_true, 'float32')
     y_pred = tf.cast(y_pred, 'float32')
     return losses.kullback_leibler_divergence(y_true, y_pred)
示例#5
0
def custom_loss(y_true, y_pred, mae_weight=0.1):
    return losses.kullback_leibler_divergence(
        y_true, y_pred) + mae_weight * losses.mae(y_true, y_pred)
 def loss(y_true, y_pred):
     return loss_weight * kullback_leibler_divergence(y_true, y_pred)
示例#7
0
def custom_loss(y_true, y_pred):
    mae_loss = losses.mean_absolute_error(y_true, y_pred)
    y_true, y_pred = tf.math.sigmoid(y_true), tf.math.sigmoid(y_pred)
    return losses.kullback_leibler_divergence(
        y_true, y_pred) + mae_loss  # js_divergence(y_true, y_pred)
示例#8
0
def js_divergence(target, pred):
    m = 0.5 * (pred + target)
    loss = loss = 0.5 * losses.kullback_leibler_divergence(
        pred, m) + 0.5 * losses.kullback_leibler_divergence(target, m)
    return loss
def get_recon_error(x, x_recon):
    x = tf.math.softmax(tf.squeeze(x, -1), axis=-1)
    x_recon = tf.math.softmax(tf.squeeze(x_recon, -1), axis=-1)
    loss = losses.kullback_leibler_divergence(x, x_recon)
    return loss
示例#10
0
 def func(y_true, y_pred1, y_pred2):
     y_pred = (y_pred1 + y_pred2) / 2
     origin_loss = loss_func(y_true, y_pred)
     kld_loss = kullback_leibler_divergence(y_pred1, y_pred2)
     loss = origin_loss + K.mean(kld_loss) * alpha
     return loss