def _kl_divergence(self, time_steps, action_distribution_parameters, current_policy_distribution): kl_divergence = losses.kullback_leibler_divergence( action_distribution_parameters, current_policy_distribution) return kl_divergence
def __build_train_fn(self): """Create a train function It replaces `model.fit(X, y)` because we use the output of model and use it for training. """ action_prob_placeholder = self.model.model.outputs advantage_placeholder = K.placeholder(shape=(None, ), name="advantage") action_placeholder = [] old_mu_placeholder = [] action_prob_old = [] loss = [] for i in range(len(self.output_dim)): o_mu_pl = K.placeholder(shape=(None, ), name="old_mu_placeholder" + str(i)) old_mu_placeholder.append(o_mu_pl) act_pl = K.placeholder(shape=(None, ), name="action_placeholder" + str(i), dtype='int32') action_placeholder.append(act_pl) act_prob = K.sum(K.one_hot(act_pl, self.output_dim[i]) * action_prob_placeholder[i], axis=1) act_prob_old = K.sum(K.one_hot(act_pl, self.output_dim[i]) * o_mu_pl, axis=1) action_prob_old.append(K.mean(-K.log(act_prob_old))) logp = K.log(act_prob) old_logp = K.log(act_prob_old) kl = losses.kullback_leibler_divergence(old_mu_placeholder[i], action_prob_placeholder[i]) l = (act_prob - act_prob_old) * advantage_placeholder - kl loss.append(-K.mean(l)) entropy = K.sum(action_prob_old) loss = K.stack(loss) loss_p = K.sum(loss) adam = optimizers.Adam(lr=self.pi_lr) updates = adam.get_updates(loss=loss, params=self.model.trainable_weights) self.train_fn = K.function(inputs=[ *self.model.model.inputs, *old_mu_placeholder, *action_placeholder, advantage_placeholder ], outputs=[loss_p, entropy], updates=updates)
def value_estimation_loss(self, time_steps, returns, weights): """Computes the value estimation loss for actor-critic training. All tensors should have a single batch dimension. Args: time_steps: A batch of timesteps. returns: Per-timestep returns for value function to predict. (Should come from TD-lambda computation.) weights: Optional scalar or element-wise (per-batch-entry) importance weights. Includes a mask for invalid timesteps. debug_summaries: True if debug summaries should be created. Returns: value_estimation_loss: A scalar value_estimation_loss loss. """ observation = time_steps.observation value_preds = self.double_batch_pred(self._mod_net, observation, is_training=True) value_estimation_error = losses.kullback_leibler_divergence( returns, value_preds) value_estimation_error *= weights value_estimation_loss = tf.reduce_mean( input_tensor=value_estimation_error) return value_estimation_loss
def kullback_leibler_divergence(y_true, y_pred): y_true = tf.cast(y_true, 'float32') y_pred = tf.cast(y_pred, 'float32') return losses.kullback_leibler_divergence(y_true, y_pred)
def custom_loss(y_true, y_pred, mae_weight=0.1): return losses.kullback_leibler_divergence( y_true, y_pred) + mae_weight * losses.mae(y_true, y_pred)
def loss(y_true, y_pred): return loss_weight * kullback_leibler_divergence(y_true, y_pred)
def custom_loss(y_true, y_pred): mae_loss = losses.mean_absolute_error(y_true, y_pred) y_true, y_pred = tf.math.sigmoid(y_true), tf.math.sigmoid(y_pred) return losses.kullback_leibler_divergence( y_true, y_pred) + mae_loss # js_divergence(y_true, y_pred)
def js_divergence(target, pred): m = 0.5 * (pred + target) loss = loss = 0.5 * losses.kullback_leibler_divergence( pred, m) + 0.5 * losses.kullback_leibler_divergence(target, m) return loss
def get_recon_error(x, x_recon): x = tf.math.softmax(tf.squeeze(x, -1), axis=-1) x_recon = tf.math.softmax(tf.squeeze(x_recon, -1), axis=-1) loss = losses.kullback_leibler_divergence(x, x_recon) return loss
def func(y_true, y_pred1, y_pred2): y_pred = (y_pred1 + y_pred2) / 2 origin_loss = loss_func(y_true, y_pred) kld_loss = kullback_leibler_divergence(y_pred1, y_pred2) loss = origin_loss + K.mean(kld_loss) * alpha return loss