def wasserstein_discriminator_loss( discriminator_real_outputs, discriminator_gen_outputs, real_weights=1.0, generated_weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS, add_summaries=False): """Wasserstein discriminator loss for GANs. See `Wasserstein GAN` (https://arxiv.org/abs/1701.07875) for more details. Args: discriminator_real_outputs: Discriminator output on real data. discriminator_gen_outputs: Discriminator output on generated data. Expected to be in the range of (-inf, inf). real_weights: Optional `Tensor` whose rank is either 0, or the same rank as `discriminator_real_outputs`, and must be broadcastable to `discriminator_real_outputs` (i.e., all dimensions must be either `1`, or the same as the corresponding dimension). generated_weights: Same as `real_weights`, but for `discriminator_gen_outputs`. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which this loss will be added. reduction: A `tf.compat.v1.losses.Reduction` to apply to loss. add_summaries: Whether or not to add summaries for the loss. Returns: A loss Tensor. The shape depends on `reduction`. """ with ops.name_scope(scope, 'discriminator_wasserstein_loss', (discriminator_real_outputs, discriminator_gen_outputs, real_weights, generated_weights)) as scope: discriminator_real_outputs = _to_float(discriminator_real_outputs) discriminator_gen_outputs = _to_float(discriminator_gen_outputs) discriminator_real_outputs.shape.assert_is_compatible_with( discriminator_gen_outputs.shape) loss_on_generated = losses.compute_weighted_loss( discriminator_gen_outputs, generated_weights, scope, loss_collection=None, reduction=reduction) loss_on_real = losses.compute_weighted_loss( discriminator_real_outputs, real_weights, scope, loss_collection=None, reduction=reduction) loss = loss_on_generated - loss_on_real util.add_loss(loss, loss_collection) if add_summaries: summary.scalar('discriminator_gen_wass_loss', loss_on_generated) summary.scalar('discriminator_real_wass_loss', loss_on_real) summary.scalar('discriminator_wass_loss', loss) return loss
def compute_loss(losses, weights, loss_collection, reduction): losses = ops.convert_to_tensor(losses) input_dtype = losses.dtype losses = math_ops.cast(losses, dtype=dtypes.float32) weights = math_ops.cast(weights, dtype=dtypes.float32) weighted_losses = math_ops.multiply(losses, weights) if reduction == Reduction.NONE: loss = weighted_losses else: loss = math_ops.reduce_sum(weighted_losses) if reduction == Reduction.MEAN: loss = _safe_mean( loss, math_ops.reduce_sum( array_ops.ones_like(losses) * weights)) elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS): loss = _safe_mean(loss, _num_present(losses, weights)) elif reduction == Reduction.SUM_OVER_BATCH_SIZE: loss = _safe_mean(loss, _num_elements(losses)) # Convert the result back to the input type. loss = math_ops.cast(loss, input_dtype) util.add_loss(loss, loss_collection) return loss
def wasserstein_discriminator_loss( discriminator_real_outputs, discriminator_gen_outputs, real_weights=1.0, generated_weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS, add_summaries=False): with ops.name_scope(scope, 'discriminator_wasserstein_loss', ( discriminator_real_outputs, discriminator_gen_outputs, real_weights, generated_weights)) as scope: discriminator_real_outputs = math_ops.to_float(discriminator_real_outputs) discriminator_gen_outputs = math_ops.to_float(discriminator_gen_outputs) discriminator_real_outputs.shape.assert_is_compatible_with( discriminator_gen_outputs.shape) loss_on_generated = losses.compute_weighted_loss( discriminator_gen_outputs, generated_weights, scope, loss_collection=None, reduction=reduction) loss_on_real = losses.compute_weighted_loss( discriminator_real_outputs, real_weights, scope, loss_collection=None, reduction=reduction) loss = loss_on_generated - loss_on_real util.add_loss(loss, loss_collection) # summary.scalar('discriminator_gen_wass_loss', loss_on_generated) # summary.scalar('discriminator_real_wass_loss', loss_on_real) # summary.scalar('discriminator_wass_loss', loss) return loss
def compute_weighted_loss( losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES): """Computes the weighted loss. Args: losses: `Tensor` of shape `[batch_size, d1, ... dN]`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `losses`, and must be broadcastable to `losses` (i.e., all dimensions must be either `1`, or the same as the corresponding `losses` dimension). scope: the scope for the operations performed in computing the loss. loss_collection: the loss will be added to these collections. Returns: A scalar `Tensor` that returns the weighted loss. Raises: ValueError: If `weights` is `None` or the shape is not compatible with `losses`, or if the number of dimensions (rank) of either `losses` or `weights` is missing. """ with ops.name_scope(scope, "weighted_loss", (losses, weights)): with ops.control_dependencies(( weights_broadcast_ops.assert_broadcastable(weights, losses),)): losses = ops.convert_to_tensor(losses) input_dtype = losses.dtype losses = math_ops.to_float(losses) weights = math_ops.to_float(weights) total_loss = _scale_losses(losses, weights) num_present = _num_present(losses, weights) mean_loss = _safe_mean(total_loss, num_present) # Convert the result back to the input type. mean_loss = math_ops.cast(mean_loss, input_dtype) util.add_loss(mean_loss, loss_collection) return mean_loss
def compute_weighted_loss(losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): """Computes the weighted loss. Args: losses: `Tensor` of shape `[batch_size, d1, ... dN]`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `losses`, and must be broadcastable to `losses` (i.e., all dimensions must be either `1`, or the same as the corresponding `losses` dimension). scope: the scope for the operations performed in computing the loss. loss_collection: the loss will be added to these collections. reduction: Type of reduction to apply to loss. Returns: Weighted loss `Tensor` of the same type as `losses`. If `reduction` is `NONE`, this has the same shape as `losses`; otherwise, it is scalar. Raises: ValueError: If `weights` is `None` or the shape is not compatible with `losses`, or if the number of dimensions (rank) of either `losses` or `weights` is missing. Note: When calculating the gradient of a weighted loss contributions from both `losses` and `weights` are considered. If your `weights` depend on some model parameters but you do not want this to affect the loss gradient, you need to apply @{tf.stop_gradient} to `weights` before passing them to `compute_weighted_loss`. """ Reduction.validate(reduction) with ops.name_scope(scope, "weighted_loss", (losses, weights)): with ops.control_dependencies( (weights_broadcast_ops.assert_broadcastable(weights, losses), )): losses = ops.convert_to_tensor(losses) input_dtype = losses.dtype losses = math_ops.to_float(losses) weights = math_ops.to_float(weights) weighted_losses = math_ops.multiply(losses, weights) if reduction == Reduction.NONE: loss = weighted_losses else: loss = math_ops.reduce_sum(weighted_losses) if reduction == Reduction.MEAN: loss = _safe_mean( loss, math_ops.reduce_sum( array_ops.ones_like(losses) * weights)) elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS): loss = _safe_mean(loss, _num_present(losses, weights)) elif reduction == Reduction.SUM_OVER_BATCH_SIZE: loss = _safe_mean(loss, _num_elements(losses)) # Convert the result back to the input type. loss = math_ops.cast(loss, input_dtype) util.add_loss(loss, loss_collection) return loss
def compute_loss(labels, predictions, weights, loss_collection): predictions = math_ops.cast(predictions, dtype=dtypes.float32) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) diffs = math_ops.subtract(predictions, labels) axis = math_ops.range(1, array_ops.rank(diffs)) sum_squares_diff_per_batch = math_ops.reduce_sum( math_ops.square(diffs), axis=axis, keepdims=True) num_present_per_batch = _num_present(diffs, weights, per_batch=True) term1 = 2.0 * math_ops.div_no_nan( sum_squares_diff_per_batch, math_ops.maximum(num_present_per_batch - 1, 0), name="value") sum_diff = math_ops.reduce_sum(diffs, axis=axis, keepdims=True) term2 = 2.0 * math_ops.div_no_nan( math_ops.square(sum_diff), math_ops.maximum( math_ops.multiply(num_present_per_batch, num_present_per_batch - 1), 0), name="value") weighted_losses = math_ops.multiply(term1 - term2, weights) loss = math_ops.reduce_sum(weighted_losses) mean_loss = array_ops.where( math_ops.reduce_sum(num_present_per_batch) > 0, loss, array_ops.zeros_like(loss), name="value") util.add_loss(mean_loss, loss_collection) return mean_loss
def wasserstein_discriminator_loss( discriminator_real_outputs, discriminator_gen_outputs, real_weights=1.0, generated_weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS, add_summaries=False): """Wasserstein discriminator loss for GANs. See `Wasserstein GAN` (https://arxiv.org/abs/1701.07875) for more details. Args: discriminator_real_outputs: Discriminator output on real data. discriminator_gen_outputs: Discriminator output on generated data. Expected to be in the range of (-inf, inf). real_weights: Optional `Tensor` whose rank is either 0, or the same rank as `discriminator_real_outputs`, and must be broadcastable to `discriminator_real_outputs` (i.e., all dimensions must be either `1`, or the same as the corresponding dimension). generated_weights: Same as `real_weights`, but for `discriminator_gen_outputs`. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which this loss will be added. reduction: A `tf.compat.v1.losses.Reduction` to apply to loss. add_summaries: Whether or not to add summaries for the loss. Returns: A loss Tensor. The shape depends on `reduction`. """ with ops.name_scope(scope, 'discriminator_wasserstein_loss', (discriminator_real_outputs, discriminator_gen_outputs, real_weights, generated_weights)) as scope: discriminator_real_outputs = _to_float(discriminator_real_outputs) discriminator_gen_outputs = _to_float(discriminator_gen_outputs) discriminator_real_outputs.shape.assert_is_compatible_with( discriminator_gen_outputs.shape) loss_on_generated = losses.compute_weighted_loss( discriminator_gen_outputs, generated_weights, scope, loss_collection=None, reduction=reduction) loss_on_real = losses.compute_weighted_loss( discriminator_real_outputs, real_weights, scope, loss_collection=None, reduction=reduction) loss = loss_on_generated - loss_on_real util.add_loss(loss, loss_collection) if add_summaries: summary.scalar('discriminator_gen_wass_loss', loss_on_generated) summary.scalar('discriminator_real_wass_loss', loss_on_real) summary.scalar('discriminator_wass_loss', loss) return loss
def wasserstein_discriminator_loss( discriminator_real_outputs, discriminator_gen_outputs, real_weights=1.0, generated_weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS, add_summaries=False): """Wasserstein discriminator loss for GANs. See `Wasserstein GAN` (https://arxiv.org/abs/1701.07875) for more details. Args: discriminator_real_outputs: Discriminator output on real data. discriminator_gen_outputs: Discriminator output on generated data. Expected to be in the range of (-inf, inf). real_weights: A scalar or a `Tensor` of size [batch_size, K] used to rescale the real loss. generated_weights: A scalar or a `Tensor` of size [batch_size, K] used to rescale the generated loss. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which this loss will be added. reduction: A `tf.losses.Reduction` to apply to loss. add_summaries: Whether or not to add summaries for the loss. Returns: A loss Tensor. The shape depends on `reduction`. """ with ops.name_scope(scope, 'discriminator_wasserstein_loss', (discriminator_real_outputs, discriminator_gen_outputs, real_weights, generated_weights)) as scope: discriminator_real_outputs = math_ops.to_float( discriminator_real_outputs) discriminator_gen_outputs = math_ops.to_float( discriminator_gen_outputs) discriminator_real_outputs.shape.assert_is_compatible_with( discriminator_gen_outputs.shape) loss_on_generated = losses.compute_weighted_loss( discriminator_gen_outputs, generated_weights, scope, loss_collection=None, reduction=reduction) loss_on_real = losses.compute_weighted_loss(discriminator_real_outputs, real_weights, scope, loss_collection=None, reduction=reduction) loss = loss_on_generated - loss_on_real util.add_loss(loss, loss_collection) if add_summaries: summary.scalar('discriminator_gen_wass_loss', loss_on_generated) summary.scalar('discriminator_real_wass_loss', loss_on_real) summary.scalar('discriminator_wass_loss', loss) return loss
def compute_weighted_loss( losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): """Computes the weighted loss. Args: losses: `Tensor` of shape `[batch_size, d1, ... dN]`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `losses`, and must be broadcastable to `losses` (i.e., all dimensions must be either `1`, or the same as the corresponding `losses` dimension). scope: the scope for the operations performed in computing the loss. loss_collection: the loss will be added to these collections. reduction: Type of reduction to apply to loss. Returns: Weighted loss `Tensor` of the same type as `losses`. If `reduction` is `NONE`, this has the same shape as `losses`; otherwise, it is scalar. Raises: ValueError: If `weights` is `None` or the shape is not compatible with `losses`, or if the number of dimensions (rank) of either `losses` or `weights` is missing. Note: When calculating the gradient of a weighted loss contributions from both `losses` and `weights` are considered. If your `weights` depend on some model parameters but you do not want this to affect the loss gradient, you need to apply @{tf.stop_gradient} to `weights` before passing them to `compute_weighted_loss`. """ Reduction.validate(reduction) with ops.name_scope(scope, "weighted_loss", (losses, weights)): with ops.control_dependencies(( weights_broadcast_ops.assert_broadcastable(weights, losses),)): losses = ops.convert_to_tensor(losses) input_dtype = losses.dtype losses = math_ops.to_float(losses) weights = math_ops.to_float(weights) weighted_losses = math_ops.multiply(losses, weights) if reduction == Reduction.NONE: loss = weighted_losses else: loss = math_ops.reduce_sum(weighted_losses) if reduction == Reduction.MEAN: loss = _safe_mean( loss, math_ops.reduce_sum(array_ops.ones_like(losses) * weights)) elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS): loss = _safe_mean(loss, _num_present(losses, weights)) elif reduction == Reduction.SUM_OVER_BATCH_SIZE: loss = _safe_mean(loss, _num_elements(losses)) # Convert the result back to the input type. loss = math_ops.cast(loss, input_dtype) util.add_loss(loss, loss_collection) return loss
def kl_with_logits(q_logits, p_logits, scope=None, loss_collection=tf.GraphKeys.REGULARIZATION_LOSSES): """Helper function to compute kl-divergence KL(q || p) """ with tf.name_scope(scope, "kl_divergence") as name: q = tf.nn.softmax(q_logits) q_log = tf.nn.log_softmax(q_logits) p_log = tf.nn.log_softmax(p_logits) loss = tf.reduce_mean(tf.reduce_sum(q * (q_log - p_log), axis=1), name=name) add_loss(loss, loss_collection) return loss
def wasserstein_discriminator_loss( discriminator_real_outputs, discriminator_gen_outputs, real_weights=1.0, generated_weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS, add_summaries=False): """Wasserstein discriminator loss for GANs. See `Wasserstein GAN` (https://arxiv.org/abs/1701.07875) for more details. Args: discriminator_real_outputs: Discriminator output on real data. discriminator_gen_outputs: Discriminator output on generated data. Expected to be in the range of (-inf, inf). real_weights: A scalar or a `Tensor` of size [batch_size, K] used to rescale the real loss. generated_weights: A scalar or a `Tensor` of size [batch_size, K] used to rescale the generated loss. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which this loss will be added. reduction: A `tf.losses.Reduction` to apply to loss. add_summaries: Whether or not to add summaries for the loss. Returns: A loss Tensor. The shape depends on `reduction`. """ with ops.name_scope(scope, 'discriminator_wasserstein_loss', ( discriminator_real_outputs, discriminator_gen_outputs, real_weights, generated_weights)) as scope: discriminator_real_outputs = math_ops.to_float(discriminator_real_outputs) discriminator_gen_outputs = math_ops.to_float(discriminator_gen_outputs) discriminator_real_outputs.shape.assert_is_compatible_with( discriminator_gen_outputs.shape) loss_on_generated = losses.compute_weighted_loss( discriminator_gen_outputs, generated_weights, scope, loss_collection=None, reduction=reduction) loss_on_real = losses.compute_weighted_loss( discriminator_real_outputs, real_weights, scope, loss_collection=None, reduction=reduction) loss = loss_on_generated - loss_on_real util.add_loss(loss, loss_collection) if add_summaries: summary.scalar('discriminator_gen_wass_loss', loss_on_generated) summary.scalar('discriminator_real_wass_loss', loss_on_real) summary.scalar('discriminator_wass_loss', loss) return loss
def _laplace_nll(self, labels, mus, sigmas, alphas, scope=None, loss_collection=ops.GraphKeys.LOSSES): with ops.name_scope(scope, "log_Laplace_like", (mus, sigmas, alphas)) as scope: eps = 1e-6 dist = Normal(loc=mus, scale=sigmas) #labels = utils.tf_print(labels, "label!!:") loss = - math_ops.log(math_ops.reduce_sum( alphas * dist.prob(labels), axis=1) + eps) util.add_loss(loss, loss_collection) return loss
def compute_weighted_loss(losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES): with tf.name_scope(scope, "weighted_loss", (losses, weights)): with tf.control_dependencies( (weights_broadcast_ops.assert_broadcastable(weights, losses), )): losses = tf.convert_to_tensor(losses) input_type = losses.dtype losses = tf.to_float(losses) weights = tf.to_float(weights) weighted_losses = tf.multiply(losses, weights) loss = weighted_losses loss = tf.cast(loss, input_type) util.add_loss(loss, loss_collection) return loss
def compute_weighted_loss(losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES): """Computes the weighted loss. Args: losses: A tensor of size [batch_size, d1, ... dN]. weights: A tensor of size [1] or [batch_size, d1, ... dK] where K < N. scope: the scope for the operations performed in computing the loss. loss_collection: the loss will be added to these collections. Returns: A scalar `Tensor` that returns the weighted loss. Raises: ValueError: If `weights` is `None` or the shape is not compatible with `losses`, or if the number of dimensions (rank) of either `losses` or `weights` is missing. """ with ops.name_scope(scope, "weighted_loss", [losses, weights]): losses = ops.convert_to_tensor(losses) input_dtype = losses.dtype losses = math_ops.to_float(losses) weights = math_ops.to_float(ops.convert_to_tensor(weights)) if losses.get_shape().ndims is None: raise ValueError("losses.get_shape().ndims cannot be None") weights_shape = weights.get_shape() if weights_shape.ndims is None: raise ValueError("weight.get_shape().ndims cannot be None") if weights_shape.ndims > 1 and weights_shape.dims[ -1].is_compatible_with(1): weights = array_ops.squeeze(weights, [-1]) total_loss = _scale_losses(losses, weights) num_present = _num_present(losses, weights) mean_loss = _safe_mean(total_loss, num_present) # convert the result back to the input type mean_loss = math_ops.cast(mean_loss, input_dtype) util.add_loss(mean_loss, loss_collection) return mean_loss
def compute_weighted_loss(losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=Reduction.WEIGHTED_SUM_BY_NONZERO_WEIGHTS): """Computes the weighted loss. Args: losses: `Tensor` of shape `[batch_size, d1, ... dN]`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `losses`, and must be broadcastable to `losses` (i.e., all dimensions must be either `1`, or the same as the corresponding `losses` dimension). scope: the scope for the operations performed in computing the loss. loss_collection: the loss will be added to these collections. reduction: Type of reduction to apply to loss. Returns: A scalar `Tensor` that returns the weighted loss. Raises: ValueError: If `weights` is `None` or the shape is not compatible with `losses`, or if the number of dimensions (rank) of either `losses` or `weights` is missing. """ Reduction.validate(reduction) with ops.name_scope(scope, "weighted_loss", (losses, weights)): with ops.control_dependencies( (weights_broadcast_ops.assert_broadcastable(weights, losses), )): losses = ops.convert_to_tensor(losses) input_dtype = losses.dtype losses = math_ops.to_float(losses) weights = math_ops.to_float(weights) weighted_losses = math_ops.multiply(losses, weights) loss = math_ops.reduce_sum(weighted_losses) if reduction == Reduction.WEIGHTED_SUM_BY_NONZERO_WEIGHTS: loss = _safe_mean(loss, _num_present(losses, weights)) # Convert the result back to the input type. loss = math_ops.cast(loss, input_dtype) util.add_loss(loss, loss_collection) return loss
def compute_weighted_loss( losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES): """Computes the weighted loss. Args: losses: A tensor of size [batch_size, d1, ... dN]. weights: A tensor of size [1] or [batch_size, d1, ... dK] where K < N. scope: the scope for the operations performed in computing the loss. loss_collection: the loss will be added to these collections. Returns: A scalar `Tensor` that returns the weighted loss. Raises: ValueError: If `weights` is `None` or the shape is not compatible with `losses`, or if the number of dimensions (rank) of either `losses` or `weights` is missing. """ with ops.name_scope(scope, "weighted_loss", [losses, weights]): losses = ops.convert_to_tensor(losses) input_dtype = losses.dtype losses = math_ops.to_float(losses) weights = math_ops.to_float(ops.convert_to_tensor(weights)) if losses.get_shape().ndims is None: raise ValueError("losses.get_shape().ndims cannot be None") weights_shape = weights.get_shape() if weights_shape.ndims is None: raise ValueError("weight.get_shape().ndims cannot be None") if weights_shape.ndims > 1 and weights_shape.dims[-1].is_compatible_with(1): weights = array_ops.squeeze(weights, [-1]) total_loss = _scale_losses(losses, weights) num_present = _num_present(losses, weights) mean_loss = _safe_mean(total_loss, num_present) # convert the result back to the input type mean_loss = math_ops.cast(mean_loss, input_dtype) util.add_loss(mean_loss, loss_collection) return mean_loss
def compute_weighted_loss(losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): """Computes the weighted loss. Args: losses: `Tensor` of shape `[batch_size, d1, ... dN]`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `losses`, and must be broadcastable to `losses` (i.e., all dimensions must be either `1`, or the same as the corresponding `losses` dimension). scope: the scope for the operations performed in computing the loss. loss_collection: the loss will be added to these collections. reduction: Type of reduction to apply to loss. Returns: Weighted loss `Tensor` of the same type as `losses`. If `reduction` is `NONE`, this has the same shape as `losses`; otherwise, it is scalar. Raises: ValueError: If `weights` is `None` or the shape is not compatible with `losses`, or if the number of dimensions (rank) of either `losses` or `weights` is missing. Note: When calculating the gradient of a weighted loss contributions from both `losses` and `weights` are considered. If your `weights` depend on some model parameters but you do not want this to affect the loss gradient, you need to apply `tf.stop_gradient` to `weights` before passing them to `compute_weighted_loss`. @compatibility(eager) The `loss_collection` argument is ignored when executing eagerly. Consider holding on to the return value or collecting losses via a `tf.keras.Model`. @end_compatibility """ Reduction.validate(reduction) with ops.name_scope(scope, "weighted_loss", (losses, weights)): # Save the `reduction` argument for loss normalization when distributing # to multiple replicas. # TODO(josh11b): Associate it with the returned op for more precision. ops.get_default_graph()._last_loss_reduction = reduction # pylint: disable=protected-access with ops.control_dependencies( (weights_broadcast_ops.assert_broadcastable(weights, losses), )): losses = ops.convert_to_tensor(losses) input_dtype = losses.dtype losses = math_ops.cast(losses, dtype=dtypes.float32) weights = math_ops.cast(weights, dtype=dtypes.float32) weighted_losses = math_ops.multiply(losses, weights) if reduction == Reduction.NONE: loss = weighted_losses else: loss = math_ops.reduce_sum(weighted_losses) if reduction == Reduction.MEAN: loss = _safe_mean( loss, math_ops.reduce_sum( array_ops.ones_like(losses) * weights)) elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS): loss = _safe_mean(loss, _num_present(losses, weights)) elif reduction == Reduction.SUM_OVER_BATCH_SIZE: loss = _safe_mean(loss, _num_elements(losses)) # Convert the result back to the input type. loss = math_ops.cast(loss, input_dtype) util.add_loss(loss, loss_collection) return loss
def minimax_discriminator_loss( discriminator_real_outputs, discriminator_gen_outputs, label_smoothing=0.25, real_weights=1.0, generated_weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS, add_summaries=False): """Original minimax discriminator loss for GANs, with label smoothing. Note that the authors don't recommend using this loss. A more practically useful loss is `modified_discriminator_loss`. L = - real_weights * log(sigmoid(D(x))) - generated_weights * log(1 - sigmoid(D(G(z)))) See `Generative Adversarial Nets` (https://arxiv.org/abs/1406.2661) for more details. Args: discriminator_real_outputs: Discriminator output on real data. discriminator_gen_outputs: Discriminator output on generated data. Expected to be in the range of (-inf, inf). label_smoothing: The amount of smoothing for positive labels. This technique is taken from `Improved Techniques for Training GANs` (https://arxiv.org/abs/1606.03498). `0.0` means no smoothing. real_weights: Optional `Tensor` whose rank is either 0, or the same rank as `real_data`, and must be broadcastable to `real_data` (i.e., all dimensions must be either `1`, or the same as the corresponding dimension). generated_weights: Same as `real_weights`, but for `generated_data`. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which this loss will be added. reduction: A `tf.losses.Reduction` to apply to loss. add_summaries: Whether or not to add summaries for the loss. Returns: A loss Tensor. The shape depends on `reduction`. """ with ops.name_scope(scope, 'discriminator_minimax_loss', ( discriminator_real_outputs, discriminator_gen_outputs, real_weights, generated_weights, label_smoothing)) as scope: # -log((1 - label_smoothing) - sigmoid(D(x))) loss_on_real = losses.sigmoid_cross_entropy( array_ops.ones_like(discriminator_real_outputs), discriminator_real_outputs, real_weights, label_smoothing, scope, loss_collection=None, reduction=reduction) # -log(- sigmoid(D(G(x)))) loss_on_generated = losses.sigmoid_cross_entropy( array_ops.zeros_like(discriminator_gen_outputs), discriminator_gen_outputs, generated_weights, scope=scope, loss_collection=None, reduction=reduction) loss = loss_on_real + loss_on_generated util.add_loss(loss, loss_collection) if add_summaries: summary.scalar('discriminator_gen_minimax_loss', loss_on_generated) summary.scalar('discriminator_real_minimax_loss', loss_on_real) summary.scalar('discriminator_minimax_loss', loss) return loss
def least_squares_discriminator_loss( discriminator_real_outputs, discriminator_gen_outputs, real_label=1, fake_label=0, real_weights=1.0, generated_weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS, add_summaries=False): """Least squares discriminator loss. This loss comes from `Least Squares Generative Adversarial Networks` (https://arxiv.org/abs/1611.04076). L = 1/2 * (D(x) - `real`) ** 2 + 1/2 * (D(G(z)) - `fake_label`) ** 2 where D(y) are discriminator logits. Args: discriminator_real_outputs: Discriminator output on real data. discriminator_gen_outputs: Discriminator output on generated data. Expected to be in the range of (-inf, inf). real_label: The value that the discriminator tries to output for real data. fake_label: The value that the discriminator tries to output for fake data. real_weights: Optional `Tensor` whose rank is either 0, or the same rank as `discriminator_real_outputs`, and must be broadcastable to `discriminator_real_outputs` (i.e., all dimensions must be either `1`, or the same as the corresponding dimension). generated_weights: Same as `real_weights`, but for `discriminator_gen_outputs`. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which this loss will be added. reduction: A `tf.losses.Reduction` to apply to loss. add_summaries: Whether or not to add summaries for the loss. Returns: A loss Tensor. The shape depends on `reduction`. """ with ops.name_scope(scope, 'lsq_discriminator_loss', (discriminator_gen_outputs, real_label)) as scope: discriminator_real_outputs = math_ops.to_float(discriminator_real_outputs) discriminator_gen_outputs = math_ops.to_float(discriminator_gen_outputs) discriminator_real_outputs.shape.assert_is_compatible_with( discriminator_gen_outputs.shape) real_losses = math_ops.squared_difference( discriminator_real_outputs, real_label) / 2.0 fake_losses = math_ops.squared_difference( discriminator_gen_outputs, fake_label) / 2.0 loss_on_real = losses.compute_weighted_loss( real_losses, real_weights, scope, loss_collection=None, reduction=reduction) loss_on_generated = losses.compute_weighted_loss( fake_losses, generated_weights, scope, loss_collection=None, reduction=reduction) loss = loss_on_real + loss_on_generated util.add_loss(loss, loss_collection) if add_summaries: summary.scalar('discriminator_gen_lsq_loss', loss_on_generated) summary.scalar('discriminator_real_lsq_loss', loss_on_real) summary.scalar('discriminator_lsq_loss', loss) return loss
def compute_weighted_loss( losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES): """Computes the weighted loss. WARNING: `weights` also supports dimensions of 1, but the broadcasting does not work as advertised, you'll wind up with weighted sum instead of weighted mean for any but the last dimension. This will be cleaned up soon, so please do not rely on the current behavior for anything but the shapes documented for `weights` below. Args: losses: `Tensor` of shape `[batch_size, d1, ... dN]`. weights: `Tensor` of shape `[]`, `[batch_size]` or `[batch_size, d1, ... dK]`, where K < N. scope: the scope for the operations performed in computing the loss. loss_collection: the loss will be added to these collections. Returns: A scalar `Tensor` that returns the weighted loss. Raises: ValueError: If `weights` is `None` or the shape is not compatible with `losses`, or if the number of dimensions (rank) of either `losses` or `weights` is missing. """ with ops.name_scope(scope, "weighted_loss", [losses, weights]): losses = ops.convert_to_tensor(losses) input_dtype = losses.dtype losses = math_ops.to_float(losses) weights = math_ops.to_float(ops.convert_to_tensor(weights)) losses_shape = losses.get_shape() if losses_shape.ndims is None: raise ValueError("losses.get_shape().ndims cannot be None") weights_shape = weights.get_shape() if weights_shape.ndims is None: raise ValueError("weight.get_shape().ndims cannot be None") # TODO(b/33556118): Remove `ndims > 1` check so shapes [] and [1] behave the # same. if weights_shape.ndims > 1 and weights_shape.dims[-1].is_compatible_with(1): weights = array_ops.squeeze(weights, [-1]) # TODO(b/33556118): Remove this when we require weights shape be either # scalar or the same as losses. weights_dims = weights_shape.as_list() losses_dims = losses_shape.as_list() if len(weights_dims) > len(losses_dims): raise ValueError( "Invalid weights shape %s can not be broadcast to losses %s." % ( weights_shape, losses_shape)) for i in range(len(weights_dims)): if ((losses_dims[i] is not None) and (losses_dims[i] == 1) and (weights_dims[i] is not None) and (weights_dims[i] != 1)): raise ValueError( "Invalid weights shape %s can not be broadcast to losses %s." % ( weights_shape, losses_shape)) for i in range(len(weights_dims)): if ((losses_dims[i] is not None) and (losses_dims[i] != 1) and (weights_dims[i] is not None) and (weights_dims[i] == 1)): logging.warn( "WARNING: Weights %s with dimension 1 will result in a sum" ", not average, across dimension %d.", weights_shape, i) total_loss = _scale_losses(losses, weights) num_present = _num_present(losses, weights) mean_loss = _safe_mean(total_loss, num_present) # Convert the result back to the input type. mean_loss = math_ops.cast(mean_loss, input_dtype) util.add_loss(mean_loss, loss_collection) return mean_loss
def acgan_discriminator_loss( discriminator_real_classification_logits, discriminator_gen_classification_logits, one_hot_labels, label_smoothing=0.0, real_weights=1.0, generated_weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS, add_summaries=False): """ACGAN loss for the discriminator. The ACGAN loss adds a classification loss to the conditional discriminator. Therefore, the discriminator must output a tuple consisting of (1) the real/fake prediction and (2) the logits for the classification (usually the last conv layer, flattened). For more details: ACGAN: https://arxiv.org/abs/1610.09585 Args: discriminator_real_classification_logits: Classification logits for real data. discriminator_gen_classification_logits: Classification logits for generated data. one_hot_labels: A Tensor holding one-hot labels for the batch. label_smoothing: A float in [0, 1]. If greater than 0, smooth the labels for "discriminator on real data" as suggested in https://arxiv.org/pdf/1701.00160 real_weights: Optional `Tensor` whose rank is either 0, or the same rank as `discriminator_real_outputs`, and must be broadcastable to `discriminator_real_outputs` (i.e., all dimensions must be either `1`, or the same as the corresponding dimension). generated_weights: Same as `real_weights`, but for `discriminator_gen_classification_logits`. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which this loss will be added. reduction: A `tf.losses.Reduction` to apply to loss. add_summaries: Whether or not to add summaries for the loss. Returns: A loss Tensor. Shape depends on `reduction`. Raises: TypeError: If the discriminator does not output a tuple. """ with ops.name_scope( scope, 'acgan_discriminator_loss', (discriminator_real_classification_logits, discriminator_gen_classification_logits, one_hot_labels)) as scope: loss_on_generated = losses.softmax_cross_entropy( one_hot_labels, discriminator_gen_classification_logits, weights=generated_weights, scope=scope, loss_collection=None, reduction=reduction) loss_on_real = losses.softmax_cross_entropy( one_hot_labels, discriminator_real_classification_logits, weights=real_weights, label_smoothing=label_smoothing, scope=scope, loss_collection=None, reduction=reduction) loss = loss_on_generated + loss_on_real util.add_loss(loss, loss_collection) if add_summaries: summary.scalar('discriminator_gen_ac_loss', loss_on_generated) summary.scalar('discriminator_real_ac_loss', loss_on_real) summary.scalar('discriminator_ac_loss', loss) return loss
def minimax_discriminator_loss( discriminator_real_outputs, discriminator_gen_outputs, label_smoothing=0.25, real_weights=1.0, generated_weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS, add_summaries=False): """Original minimax discriminator loss for GANs, with label smoothing. Note that the authors don't recommend using this loss. A more practically useful loss is `modified_discriminator_loss`. L = - real_weights * log(sigmoid(D(x))) - generated_weights * log(1 - sigmoid(D(G(z)))) See `Generative Adversarial Nets` (https://arxiv.org/abs/1406.2661) for more details. Args: discriminator_real_outputs: Discriminator output on real data. discriminator_gen_outputs: Discriminator output on generated data. Expected to be in the range of (-inf, inf). label_smoothing: The amount of smoothing for positive labels. This technique is taken from `Improved Techniques for Training GANs` (https://arxiv.org/abs/1606.03498). `0.0` means no smoothing. real_weights: Optional `Tensor` whose rank is either 0, or the same rank as `real_data`, and must be broadcastable to `real_data` (i.e., all dimensions must be either `1`, or the same as the corresponding dimension). generated_weights: Same as `real_weights`, but for `generated_data`. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which this loss will be added. reduction: A `tf.compat.v1.losses.Reduction` to apply to loss. add_summaries: Whether or not to add summaries for the loss. Returns: A loss Tensor. The shape depends on `reduction`. """ with ops.name_scope( scope, 'discriminator_minimax_loss', (discriminator_real_outputs, discriminator_gen_outputs, real_weights, generated_weights, label_smoothing)) as scope: # -log((1 - label_smoothing) - sigmoid(D(x))) loss_on_real = losses.sigmoid_cross_entropy( array_ops.ones_like(discriminator_real_outputs), discriminator_real_outputs, real_weights, label_smoothing, scope, loss_collection=None, reduction=reduction) # -log(- sigmoid(D(G(x)))) loss_on_generated = losses.sigmoid_cross_entropy( array_ops.zeros_like(discriminator_gen_outputs), discriminator_gen_outputs, generated_weights, scope=scope, loss_collection=None, reduction=reduction) loss = loss_on_real + loss_on_generated util.add_loss(loss, loss_collection) if add_summaries: summary.scalar('discriminator_gen_minimax_loss', loss_on_generated) summary.scalar('discriminator_real_minimax_loss', loss_on_real) summary.scalar('discriminator_minimax_loss', loss) return loss
def least_squares_discriminator_loss( discriminator_real_outputs, discriminator_gen_outputs, real_label=1, fake_label=0, real_weights=1.0, generated_weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS, add_summaries=False): """Least squares discriminator loss. This loss comes from `Least Squares Generative Adversarial Networks` (https://arxiv.org/abs/1611.04076). L = 1/2 * (D(x) - `real`) ** 2 + 1/2 * (D(G(z)) - `fake_label`) ** 2 where D(y) are discriminator logits. Args: discriminator_real_outputs: Discriminator output on real data. discriminator_gen_outputs: Discriminator output on generated data. Expected to be in the range of (-inf, inf). real_label: The value that the discriminator tries to output for real data. fake_label: The value that the discriminator tries to output for fake data. real_weights: Optional `Tensor` whose rank is either 0, or the same rank as `discriminator_real_outputs`, and must be broadcastable to `discriminator_real_outputs` (i.e., all dimensions must be either `1`, or the same as the corresponding dimension). generated_weights: Same as `real_weights`, but for `discriminator_gen_outputs`. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which this loss will be added. reduction: A `tf.compat.v1.losses.Reduction` to apply to loss. add_summaries: Whether or not to add summaries for the loss. Returns: A loss Tensor. The shape depends on `reduction`. """ with ops.name_scope(scope, 'lsq_discriminator_loss', (discriminator_gen_outputs, real_label)) as scope: discriminator_real_outputs = _to_float(discriminator_real_outputs) discriminator_gen_outputs = _to_float(discriminator_gen_outputs) discriminator_real_outputs.shape.assert_is_compatible_with( discriminator_gen_outputs.shape) real_losses = math_ops.squared_difference(discriminator_real_outputs, real_label) / 2.0 fake_losses = math_ops.squared_difference(discriminator_gen_outputs, fake_label) / 2.0 loss_on_real = losses.compute_weighted_loss(real_losses, real_weights, scope, loss_collection=None, reduction=reduction) loss_on_generated = losses.compute_weighted_loss(fake_losses, generated_weights, scope, loss_collection=None, reduction=reduction) loss = loss_on_real + loss_on_generated util.add_loss(loss, loss_collection) if add_summaries: summary.scalar('discriminator_gen_lsq_loss', loss_on_generated) summary.scalar('discriminator_real_lsq_loss', loss_on_real) summary.scalar('discriminator_lsq_loss', loss) return loss
def acgan_discriminator_loss(discriminator_real_classification_logits, discriminator_gen_classification_logits, one_hot_labels, label_smoothing=0.0, real_weights=1.0, generated_weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS, add_summaries=False): """ACGAN loss for the discriminator. The ACGAN loss adds a classification loss to the conditional discriminator. Therefore, the discriminator must output a tuple consisting of (1) the real/fake prediction and (2) the logits for the classification (usually the last conv layer, flattened). For more details: ACGAN: https://arxiv.org/abs/1610.09585 Args: discriminator_real_classification_logits: Classification logits for real data. discriminator_gen_classification_logits: Classification logits for generated data. one_hot_labels: A Tensor holding one-hot labels for the batch. label_smoothing: A float in [0, 1]. If greater than 0, smooth the labels for "discriminator on real data" as suggested in https://arxiv.org/pdf/1701.00160 real_weights: Optional `Tensor` whose rank is either 0, or the same rank as `discriminator_real_outputs`, and must be broadcastable to `discriminator_real_outputs` (i.e., all dimensions must be either `1`, or the same as the corresponding dimension). generated_weights: Same as `real_weights`, but for `discriminator_gen_classification_logits`. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which this loss will be added. reduction: A `tf.compat.v1.losses.Reduction` to apply to loss. add_summaries: Whether or not to add summaries for the loss. Returns: A loss Tensor. Shape depends on `reduction`. Raises: TypeError: If the discriminator does not output a tuple. """ with ops.name_scope( scope, 'acgan_discriminator_loss', (discriminator_real_classification_logits, discriminator_gen_classification_logits, one_hot_labels)) as scope: loss_on_generated = losses.softmax_cross_entropy( one_hot_labels, discriminator_gen_classification_logits, weights=generated_weights, scope=scope, loss_collection=None, reduction=reduction) loss_on_real = losses.softmax_cross_entropy( one_hot_labels, discriminator_real_classification_logits, weights=real_weights, label_smoothing=label_smoothing, scope=scope, loss_collection=None, reduction=reduction) loss = loss_on_generated + loss_on_real util.add_loss(loss, loss_collection) if add_summaries: summary.scalar('discriminator_gen_ac_loss', loss_on_generated) summary.scalar('discriminator_real_ac_loss', loss_on_real) summary.scalar('discriminator_ac_loss', loss) return loss
def mean_pairwise_squared_error( labels, predictions, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES): """Adds a pairwise-errors-squared loss to the training procedure. Unlike `mean_squared_error`, which is a measure of the differences between corresponding elements of `predictions` and `labels`, `mean_pairwise_squared_error` is a measure of the differences between pairs of corresponding elements of `predictions` and `labels`. For example, if `labels`=[a, b, c] and `predictions`=[x, y, z], there are three pairs of differences are summed to compute the loss: loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3 Note that since the inputs are of shape `[batch_size, d0, ... dN]`, the corresponding pairs are computed within each batch sample but not across samples within a batch. For example, if `predictions` represents a batch of 16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs is drawn from each image, but not across images. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a tensor of size [batch_size], then the total loss for each sample of the batch is rescaled by the corresponding element in the `weights` vector. Args: labels: The ground truth output tensor, whose shape must match the shape of `predictions`. predictions: The predicted outputs, a tensor of size `[batch_size, d0, .. dN]` where N+1 is the total number of dimensions in `predictions`. weights: Coefficients for the loss a scalar, a tensor of shape `[batch_size]` or a tensor whose shape matches `predictions`. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. Returns: A scalar `Tensor` that returns the weighted loss. Raises: ValueError: If the shape of `predictions` doesn't match that of `labels` or if the shape of `weights` is invalid. Also if `labels` or `predictions` is None. """ if labels is None: raise ValueError("labels must not be None.") if predictions is None: raise ValueError("predictions must not be None.") with ops.name_scope(scope, "mean_pairwise_squared_error", (predictions, labels, weights)) as scope: weights = math_ops.to_float(weights) labels = math_ops.to_float(labels) with ops.control_dependencies(( weights_broadcast_ops.assert_broadcastable(weights, labels),)): predictions = math_ops.to_float(predictions) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) diffs = math_ops.subtract(predictions, labels) reduction_indices = math_ops.range(1, array_ops.rank(diffs)) sum_squares_diff_per_batch = math_ops.reduce_sum( math_ops.square(diffs), reduction_indices=reduction_indices, keep_dims=True) num_present_per_batch = _num_present(diffs, weights, per_batch=True) term1 = 2.0 * _safe_div(sum_squares_diff_per_batch, num_present_per_batch-1) sum_diff = math_ops.reduce_sum( diffs, reduction_indices=reduction_indices, keep_dims=True) term2 = 2.0 * _safe_div( math_ops.square(sum_diff), math_ops.multiply(num_present_per_batch, num_present_per_batch-1)) weighted_losses = math_ops.multiply(term1 - term2, weights) loss = math_ops.reduce_sum(weighted_losses) mean_loss = array_ops.where( math_ops.reduce_sum(num_present_per_batch) > 0, loss, array_ops.zeros_like(loss), name="value") util.add_loss(mean_loss, loss_collection) return mean_loss
def compute_weighted_loss( losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): """Computes the weighted loss. Args: losses: `Tensor` of shape `[batch_size, d1, ... dN]`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `losses`, and must be broadcastable to `losses` (i.e., all dimensions must be either `1`, or the same as the corresponding `losses` dimension). scope: the scope for the operations performed in computing the loss. loss_collection: the loss will be added to these collections. reduction: Type of reduction to apply to loss. Returns: Weighted loss `Tensor` of the same type as `losses`. If `reduction` is `NONE`, this has the same shape as `losses`; otherwise, it is scalar. Raises: ValueError: If `weights` is `None` or the shape is not compatible with `losses`, or if the number of dimensions (rank) of either `losses` or `weights` is missing. Note: When calculating the gradient of a weighted loss contributions from both `losses` and `weights` are considered. If your `weights` depend on some model parameters but you do not want this to affect the loss gradient, you need to apply `tf.stop_gradient` to `weights` before passing them to `compute_weighted_loss`. @compatibility(eager) The `loss_collection` argument is ignored when executing eagerly. Consider holding on to the return value or collecting losses via a `tf.keras.Model`. @end_compatibility """ Reduction.validate(reduction) with ops.name_scope(scope, "weighted_loss", (losses, weights)): # Save the `reduction` argument for loss normalization when distributing # to multiple replicas. Used only for estimator + v1 optimizer flow. ops.get_default_graph()._last_loss_reduction = reduction # pylint: disable=protected-access with ops.control_dependencies(( weights_broadcast_ops.assert_broadcastable(weights, losses),)): losses = ops.convert_to_tensor(losses) input_dtype = losses.dtype losses = math_ops.cast(losses, dtype=dtypes.float32) weights = math_ops.cast(weights, dtype=dtypes.float32) weighted_losses = math_ops.multiply(losses, weights) if reduction == Reduction.NONE: loss = weighted_losses else: loss = math_ops.reduce_sum(weighted_losses) if reduction == Reduction.MEAN: loss = _safe_mean( loss, math_ops.reduce_sum(array_ops.ones_like(losses) * weights)) elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS): loss = _safe_mean(loss, _num_present(losses, weights)) elif reduction == Reduction.SUM_OVER_BATCH_SIZE: loss = _safe_mean(loss, _num_elements(losses)) # Convert the result back to the input type. loss = math_ops.cast(loss, input_dtype) util.add_loss(loss, loss_collection) return loss
def hinge_discriminator_loss(discriminator_real_outputs, discriminator_gen_outputs, real_weights=1.0, generated_weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS, add_summaries=False): """Hinge discriminator loss. Args: discriminator_real_outputs: Discriminator output on real data. discriminator_gen_outputs: Discriminator output on generated data. Expected to be in the range of (-inf, inf). real_label: The value that the discriminator tries to output for real data. fake_label: The value that the discriminator tries to output for fake data. real_weights: Optional `Tensor` whose rank is either 0, or the same rank as `discriminator_real_outputs`, and must be broadcastable to `discriminator_real_outputs` (i.e., all dimensions must be either `1`, or the same as the corresponding dimension). generated_weights: Same as `real_weights`, but for `discriminator_gen_outputs`. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which this loss will be added. reduction: A `tf.losses.Reduction` to apply to loss. add_summaries: Whether or not to add summaries for the loss. Returns: A loss Tensor. The shape depends on `reduction`. """ with ops.name_scope(scope, 'hinge_discriminator_loss', (discriminator_gen_outputs, 1)) as scope: print(discriminator_gen_outputs) discriminator_real_outputs = math_ops.to_float( discriminator_real_outputs) discriminator_gen_outputs = math_ops.to_float( discriminator_gen_outputs) discriminator_real_outputs.shape.assert_is_compatible_with( discriminator_gen_outputs.shape) real_losses = tf.reduce_mean(tf.nn.relu(1 - discriminator_real_outputs)) fake_losses = tf.reduce_mean(tf.nn.relu(1 + discriminator_gen_outputs)) loss_on_real = losses.compute_weighted_loss(real_losses, real_weights, scope, loss_collection=None, reduction=reduction) loss_on_generated = losses.compute_weighted_loss(fake_losses, generated_weights, scope, loss_collection=None, reduction=reduction) loss = loss_on_real + loss_on_generated util.add_loss(loss, loss_collection) if add_summaries: tf.summary.scalar('discriminator_gen_hinge_loss', loss_on_generated) tf.summary.scalar('discriminator_real_hinge_loss', loss_on_real) tf.summary.scalar('discriminator_hinge_loss', loss) return loss
def mean_pairwise_squared_error(labels, predictions, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES): """Adds a pairwise-errors-squared loss to the training procedure. Unlike `mean_squared_error`, which is a measure of the differences between corresponding elements of `predictions` and `labels`, `mean_pairwise_squared_error` is a measure of the differences between pairs of corresponding elements of `predictions` and `labels`. For example, if `labels`=[a, b, c] and `predictions`=[x, y, z], there are three pairs of differences are summed to compute the loss: loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3 Note that since the inputs are of shape `[batch_size, d0, ... dN]`, the corresponding pairs are computed within each batch sample but not across samples within a batch. For example, if `predictions` represents a batch of 16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs is drawn from each image, but not across images. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a tensor of size [batch_size], then the total loss for each sample of the batch is rescaled by the corresponding element in the `weights` vector. Args: labels: The ground truth output tensor, whose shape must match the shape of `predictions`. predictions: The predicted outputs, a tensor of size `[batch_size, d0, .. dN]` where N+1 is the total number of dimensions in `predictions`. weights: Coefficients for the loss a scalar, a tensor of shape `[batch_size]` or a tensor whose shape matches `predictions`. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. Returns: A scalar `Tensor` representing the loss value. Raises: ValueError: If the shape of `predictions` doesn't match that of `labels` or if the shape of `weights` is invalid. """ with ops.name_scope(scope, "mean_pairwise_squared_error", [predictions, labels, weights]) as scope: predictions.get_shape().assert_is_compatible_with(labels.get_shape()) predictions = math_ops.to_float(predictions) labels = math_ops.to_float(labels) weights = math_ops.to_float(ops.convert_to_tensor(weights)) diffs = math_ops.subtract(predictions, labels) # Need to verify here since the function doesn't use compute_weighted_loss if diffs.get_shape().ndims is None: raise ValueError("diffs.get_shape().ndims cannot be None") if weights.get_shape().ndims is None: raise ValueError("weights.get_shape().ndims cannot be None") reduction_indices = list(range(1, diffs.get_shape().ndims)) sum_squares_diff_per_batch = math_ops.reduce_sum( math_ops.square(diffs), reduction_indices=reduction_indices) num_present_per_batch = _num_present(diffs, weights, per_batch=True) term1 = 2.0 * _safe_div(sum_squares_diff_per_batch, num_present_per_batch) sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices) term2 = 2.0 * _safe_div(math_ops.square(sum_diff), math_ops.square(num_present_per_batch)) loss = _scale_losses(term1 - term2, weights) mean_loss = array_ops.where( math_ops.reduce_sum(num_present_per_batch) > 0, loss, array_ops.zeros_like(loss), name="value") util.add_loss(mean_loss, loss_collection) return mean_loss
def mean_pairwise_squared_error(labels, predictions, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES): """Adds a pairwise-errors-squared loss to the training procedure. Unlike `mean_squared_error`, which is a measure of the differences between corresponding elements of `predictions` and `labels`, `mean_pairwise_squared_error` is a measure of the differences between pairs of corresponding elements of `predictions` and `labels`. For example, if `labels`=[a, b, c] and `predictions`=[x, y, z], there are three pairs of differences are summed to compute the loss: loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3 Note that since the inputs are of shape `[batch_size, d0, ... dN]`, the corresponding pairs are computed within each batch sample but not across samples within a batch. For example, if `predictions` represents a batch of 16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs is drawn from each image, but not across images. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a tensor of size `[batch_size]`, then the total loss for each sample of the batch is rescaled by the corresponding element in the `weights` vector. Args: labels: The ground truth output tensor, whose shape must match the shape of `predictions`. predictions: The predicted outputs, a tensor of size `[batch_size, d0, .. dN]` where N+1 is the total number of dimensions in `predictions`. weights: Coefficients for the loss a scalar, a tensor of shape `[batch_size]` or a tensor whose shape matches `predictions`. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. Returns: A scalar `Tensor` that returns the weighted loss. Raises: ValueError: If the shape of `predictions` doesn't match that of `labels` or if the shape of `weights` is invalid. Also if `labels` or `predictions` is None. @compatibility(eager) The `loss_collection` argument is ignored when executing eagerly. Consider holding on to the return value or collecting losses via a `tf.keras.Model`. @end_compatibility """ if labels is None: raise ValueError("labels must not be None.") if predictions is None: raise ValueError("predictions must not be None.") with ops.name_scope(scope, "mean_pairwise_squared_error", (predictions, labels, weights)) as scope: weights = math_ops.to_float(weights) labels = math_ops.to_float(labels) with ops.control_dependencies( (weights_broadcast_ops.assert_broadcastable(weights, labels), )): predictions = math_ops.to_float(predictions) predictions.get_shape().assert_is_compatible_with( labels.get_shape()) diffs = math_ops.subtract(predictions, labels) axis = math_ops.range(1, array_ops.rank(diffs)) sum_squares_diff_per_batch = math_ops.reduce_sum( math_ops.square(diffs), axis=axis, keepdims=True) num_present_per_batch = _num_present(diffs, weights, per_batch=True) term1 = 2.0 * math_ops.div_no_nan( sum_squares_diff_per_batch, math_ops.maximum(num_present_per_batch - 1, 0), name="value") sum_diff = math_ops.reduce_sum(diffs, axis=axis, keepdims=True) term2 = 2.0 * math_ops.div_no_nan( math_ops.square(sum_diff), math_ops.maximum( math_ops.multiply(num_present_per_batch, num_present_per_batch - 1), 0), name="value") weighted_losses = math_ops.multiply(term1 - term2, weights) loss = math_ops.reduce_sum(weighted_losses) mean_loss = array_ops.where( math_ops.reduce_sum(num_present_per_batch) > 0, loss, array_ops.zeros_like(loss), name="value") util.add_loss(mean_loss, loss_collection) return mean_loss
def compute_weighted_loss(losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES): """Computes the weighted loss. WARNING: `weights` also supports dimensions of 1, but the broadcasting does not work as advertised, you'll wind up with weighted sum instead of weighted mean for any but the last dimension. This will be cleaned up soon, so please do not rely on the current behavior for anything but the shapes documented for `weights` below. Args: losses: `Tensor` of shape `[batch_size, d1, ... dN]`. weights: `Tensor` of shape `[]`, `[batch_size]` or `[batch_size, d1, ... dK]`, where K < N. scope: the scope for the operations performed in computing the loss. loss_collection: the loss will be added to these collections. Returns: A scalar `Tensor` that returns the weighted loss. Raises: ValueError: If `weights` is `None` or the shape is not compatible with `losses`, or if the number of dimensions (rank) of either `losses` or `weights` is missing. """ with ops.name_scope(scope, "weighted_loss", [losses, weights]): losses = ops.convert_to_tensor(losses) input_dtype = losses.dtype losses = math_ops.to_float(losses) weights = math_ops.to_float(ops.convert_to_tensor(weights)) losses_shape = losses.get_shape() if losses_shape.ndims is None: raise ValueError("losses.get_shape().ndims cannot be None") weights_shape = weights.get_shape() if weights_shape.ndims is None: raise ValueError("weight.get_shape().ndims cannot be None") # TODO(b/33556118): Remove `ndims > 1` check so shapes [] and [1] behave the # same. if weights_shape.ndims > 1 and weights_shape.dims[ -1].is_compatible_with(1): weights = array_ops.squeeze(weights, [-1]) # TODO(b/33556118): Remove this when we require weights shape be either # scalar or the same as losses. weights_dims = weights_shape.as_list() losses_dims = losses_shape.as_list() if len(weights_dims) > len(losses_dims): raise ValueError( "Invalid weights shape %s can not be broadcast to losses %s." % (weights_shape, losses_shape)) for i in range(len(weights_dims)): if ((losses_dims[i] is not None) and (losses_dims[i] != 1) and (weights_dims[i] is not None) and (weights_dims[i] == 1)): logging.warn( "WARNING: Weights %s with dimension 1 will result in a sum" ", not average, across dimension %d.", weights_shape, i) total_loss = _scale_losses(losses, weights) num_present = _num_present(losses, weights) mean_loss = _safe_mean(total_loss, num_present) # Convert the result back to the input type. mean_loss = math_ops.cast(mean_loss, input_dtype) util.add_loss(mean_loss, loss_collection) return mean_loss