Пример #1
0
 def _test_valid(self, weights, values):
   static_op = weights_broadcast_ops.assert_broadcastable(
       weights=weights, values=values)
   weights_placeholder = array_ops.placeholder(dtypes_lib.float32)
   values_placeholder = array_ops.placeholder(dtypes_lib.float32)
   dynamic_op = weights_broadcast_ops.assert_broadcastable(
       weights=weights_placeholder, values=values_placeholder)
   with self.cached_session():
     static_op.run()
     dynamic_op.run(feed_dict={
         weights_placeholder: weights,
         values_placeholder: values,
     })
 def _test_valid(self, weights, values):
   static_op = weights_broadcast_ops.assert_broadcastable(
       weights=weights, values=values)
   weights_placeholder = array_ops.placeholder(dtypes_lib.float32)
   values_placeholder = array_ops.placeholder(dtypes_lib.float32)
   dynamic_op = weights_broadcast_ops.assert_broadcastable(
       weights=weights_placeholder, values=values_placeholder)
   with self.test_session():
     static_op.run()
     dynamic_op.run(feed_dict={
         weights_placeholder: weights,
         values_placeholder: values,
     })
Пример #3
0
 def _test_invalid(self, weights, values):
   error_msg = 'weights can not be broadcast to values'
   with self.assertRaisesRegex(ValueError, error_msg):
     weights_broadcast_ops.assert_broadcastable(weights=weights, values=values)
   weights_placeholder = array_ops.placeholder(dtypes_lib.float32)
   values_placeholder = array_ops.placeholder(dtypes_lib.float32)
   dynamic_op = weights_broadcast_ops.assert_broadcastable(
       weights=weights_placeholder, values=values_placeholder)
   with self.cached_session():
     with self.assertRaisesRegex(errors_impl.OpError, error_msg):
       dynamic_op.run(feed_dict={
           weights_placeholder: weights,
           values_placeholder: values,
       })
 def _test_invalid(self, weights, values):
   error_msg = 'weights can not be broadcast to values'
   with self.assertRaisesRegexp(ValueError, error_msg):
     weights_broadcast_ops.assert_broadcastable(weights=weights, values=values)
   weights_placeholder = array_ops.placeholder(dtypes_lib.float32)
   values_placeholder = array_ops.placeholder(dtypes_lib.float32)
   dynamic_op = weights_broadcast_ops.assert_broadcastable(
       weights=weights_placeholder, values=values_placeholder)
   with self.test_session():
     with self.assertRaisesRegexp(errors_impl.OpError, error_msg):
       dynamic_op.run(feed_dict={
           weights_placeholder: weights,
           values_placeholder: values,
       })
Пример #5
0
def compute_weighted_loss(losses,
                          weights=1.0,
                          scope=None,
                          loss_collection=ops.GraphKeys.LOSSES,
                          reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
    """Computes the weighted loss.

  Args:
    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `losses`, and must be broadcastable to `losses` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: the scope for the operations performed in computing the loss.
    loss_collection: the loss will be added to these collections.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
    `NONE`, this has the same shape as `losses`; otherwise, it is scalar.

  Raises:
    ValueError: If `weights` is `None` or the shape is not compatible with
      `losses`, or if the number of dimensions (rank) of either `losses` or
      `weights` is missing.

  Note:
    When calculating the gradient of a weighted loss contributions from
    both `losses` and `weights` are considered. If your `weights` depend
    on some model parameters but you do not want this to affect the loss
    gradient, you need to apply @{tf.stop_gradient} to `weights` before
    passing them to `compute_weighted_loss`.
  """
    Reduction.validate(reduction)
    with ops.name_scope(scope, "weighted_loss", (losses, weights)):
        with ops.control_dependencies(
            (weights_broadcast_ops.assert_broadcastable(weights, losses), )):
            losses = ops.convert_to_tensor(losses)
            input_dtype = losses.dtype
            losses = math_ops.to_float(losses)
            weights = math_ops.to_float(weights)
            weighted_losses = math_ops.multiply(losses, weights)
            if reduction == Reduction.NONE:
                loss = weighted_losses
            else:
                loss = math_ops.reduce_sum(weighted_losses)
                if reduction == Reduction.MEAN:
                    loss = _safe_mean(
                        loss,
                        math_ops.reduce_sum(
                            array_ops.ones_like(losses) * weights))
                elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS
                      or reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS):
                    loss = _safe_mean(loss, _num_present(losses, weights))
                elif reduction == Reduction.SUM_OVER_BATCH_SIZE:
                    loss = _safe_mean(loss, _num_elements(losses))

            # Convert the result back to the input type.
            loss = math_ops.cast(loss, input_dtype)
            util.add_loss(loss, loss_collection)
            return loss
Пример #6
0
def compute_weighted_loss(
    losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES):
  """Computes the weighted loss.

  Args:
    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `losses`, and must be broadcastable to `losses` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: the scope for the operations performed in computing the loss.
    loss_collection: the loss will be added to these collections.

  Returns:
    A scalar `Tensor` that returns the weighted loss.

  Raises:
    ValueError: If `weights` is `None` or the shape is not compatible with
      `losses`, or if the number of dimensions (rank) of either `losses` or
      `weights` is missing.
  """
  with ops.name_scope(scope, "weighted_loss", (losses, weights)):
    with ops.control_dependencies((
        weights_broadcast_ops.assert_broadcastable(weights, losses),)):
      losses = ops.convert_to_tensor(losses)
      input_dtype = losses.dtype
      losses = math_ops.to_float(losses)
      weights = math_ops.to_float(weights)
      total_loss = _scale_losses(losses, weights)
      num_present = _num_present(losses, weights)
      mean_loss = _safe_mean(total_loss, num_present)
      # Convert the result back to the input type.
      mean_loss = math_ops.cast(mean_loss, input_dtype)
      util.add_loss(mean_loss, loss_collection)
      return mean_loss
Пример #7
0
def compute_weighted_loss(
    losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES,
    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
  """Computes the weighted loss.

  Args:
    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `losses`, and must be broadcastable to `losses` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: the scope for the operations performed in computing the loss.
    loss_collection: the loss will be added to these collections.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
    `NONE`, this has the same shape as `losses`; otherwise, it is scalar.

  Raises:
    ValueError: If `weights` is `None` or the shape is not compatible with
      `losses`, or if the number of dimensions (rank) of either `losses` or
      `weights` is missing.

  Note:
    When calculating the gradient of a weighted loss contributions from
    both `losses` and `weights` are considered. If your `weights` depend
    on some model parameters but you do not want this to affect the loss
    gradient, you need to apply @{tf.stop_gradient} to `weights` before
    passing them to `compute_weighted_loss`.
  """
  Reduction.validate(reduction)
  with ops.name_scope(scope, "weighted_loss", (losses, weights)):
    with ops.control_dependencies((
        weights_broadcast_ops.assert_broadcastable(weights, losses),)):
      losses = ops.convert_to_tensor(losses)
      input_dtype = losses.dtype
      losses = math_ops.to_float(losses)
      weights = math_ops.to_float(weights)
      weighted_losses = math_ops.multiply(losses, weights)
      if reduction == Reduction.NONE:
        loss = weighted_losses
      else:
        loss = math_ops.reduce_sum(weighted_losses)
        if reduction == Reduction.MEAN:
          loss = _safe_mean(
              loss,
              math_ops.reduce_sum(array_ops.ones_like(losses) * weights))
        elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or
              reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS):
          loss = _safe_mean(loss, _num_present(losses, weights))
        elif reduction == Reduction.SUM_OVER_BATCH_SIZE:
          loss = _safe_mean(loss, _num_elements(losses))

      # Convert the result back to the input type.
      loss = math_ops.cast(loss, input_dtype)
      util.add_loss(loss, loss_collection)
      return loss
def compute_weighted_loss(losses,
                          weights=1.0,
                          scope=None,
                          loss_collection=ops.GraphKeys.LOSSES):
    with tf.name_scope(scope, "weighted_loss", (losses, weights)):
        with tf.control_dependencies(
            (weights_broadcast_ops.assert_broadcastable(weights, losses), )):
            losses = tf.convert_to_tensor(losses)
            input_type = losses.dtype
            losses = tf.to_float(losses)
            weights = tf.to_float(weights)
            weighted_losses = tf.multiply(losses, weights)
            loss = weighted_losses

            loss = tf.cast(loss, input_type)
            util.add_loss(loss, loss_collection)
            return loss
Пример #9
0
def compute_weighted_loss(losses,
                          weights=1.0,
                          scope=None,
                          loss_collection=ops.GraphKeys.LOSSES,
                          reduction=Reduction.WEIGHTED_SUM_BY_NONZERO_WEIGHTS):
    """Computes the weighted loss.

  Args:
    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `losses`, and must be broadcastable to `losses` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: the scope for the operations performed in computing the loss.
    loss_collection: the loss will be added to these collections.
    reduction: Type of reduction to apply to loss.

  Returns:
    A scalar `Tensor` that returns the weighted loss.

  Raises:
    ValueError: If `weights` is `None` or the shape is not compatible with
      `losses`, or if the number of dimensions (rank) of either `losses` or
      `weights` is missing.
  """
    Reduction.validate(reduction)
    with ops.name_scope(scope, "weighted_loss", (losses, weights)):
        with ops.control_dependencies(
            (weights_broadcast_ops.assert_broadcastable(weights, losses), )):
            losses = ops.convert_to_tensor(losses)
            input_dtype = losses.dtype
            losses = math_ops.to_float(losses)
            weights = math_ops.to_float(weights)
            weighted_losses = math_ops.multiply(losses, weights)
            loss = math_ops.reduce_sum(weighted_losses)
            if reduction == Reduction.WEIGHTED_SUM_BY_NONZERO_WEIGHTS:
                loss = _safe_mean(loss, _num_present(losses, weights))

            # Convert the result back to the input type.
            loss = math_ops.cast(loss, input_dtype)
            util.add_loss(loss, loss_collection)
            return loss
Пример #10
0
def mean_pairwise_squared_error(labels,
                                predictions,
                                weights=1.0,
                                scope=None,
                                loss_collection=ops.GraphKeys.LOSSES):
    """Adds a pairwise-errors-squared loss to the training procedure.

  Unlike `mean_squared_error`, which is a measure of the differences between
  corresponding elements of `predictions` and `labels`,
  `mean_pairwise_squared_error` is a measure of the differences between pairs of
  corresponding elements of `predictions` and `labels`.

  For example, if `labels`=[a, b, c] and `predictions`=[x, y, z], there are
  three pairs of differences are summed to compute the loss:
    loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3

  Note that since the inputs are of shape `[batch_size, d0, ... dN]`, the
  corresponding pairs are computed within each batch sample but not across
  samples within a batch. For example, if `predictions` represents a batch of
  16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs
  is drawn from each image, but not across images.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  `[batch_size]`, then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector.

  Args:
    labels: The ground truth output tensor, whose shape must match the shape of
      `predictions`.
    predictions: The predicted outputs, a tensor of size
      `[batch_size, d0, .. dN]` where N+1 is the total number of dimensions in
      `predictions`.
    weights: Coefficients for the loss a scalar, a tensor of shape
      `[batch_size]` or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` that returns the weighted loss.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.  Also if `labels` or `predictions`
      is None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  """
    if labels is None:
        raise ValueError("labels must not be None.")
    if predictions is None:
        raise ValueError("predictions must not be None.")
    with ops.name_scope(scope, "mean_pairwise_squared_error",
                        (predictions, labels, weights)) as scope:
        weights = math_ops.to_float(weights)
        labels = math_ops.to_float(labels)
        with ops.control_dependencies(
            (weights_broadcast_ops.assert_broadcastable(weights, labels), )):
            predictions = math_ops.to_float(predictions)
            predictions.get_shape().assert_is_compatible_with(
                labels.get_shape())

            diffs = math_ops.subtract(predictions, labels)

            axis = math_ops.range(1, array_ops.rank(diffs))

            sum_squares_diff_per_batch = math_ops.reduce_sum(
                math_ops.square(diffs), axis=axis, keepdims=True)
            num_present_per_batch = _num_present(diffs,
                                                 weights,
                                                 per_batch=True)

            term1 = 2.0 * math_ops.div_no_nan(
                sum_squares_diff_per_batch,
                math_ops.maximum(num_present_per_batch - 1, 0),
                name="value")

            sum_diff = math_ops.reduce_sum(diffs, axis=axis, keepdims=True)
            term2 = 2.0 * math_ops.div_no_nan(
                math_ops.square(sum_diff),
                math_ops.maximum(
                    math_ops.multiply(num_present_per_batch,
                                      num_present_per_batch - 1), 0),
                name="value")

            weighted_losses = math_ops.multiply(term1 - term2, weights)
            loss = math_ops.reduce_sum(weighted_losses)

            mean_loss = array_ops.where(
                math_ops.reduce_sum(num_present_per_batch) > 0,
                loss,
                array_ops.zeros_like(loss),
                name="value")
            util.add_loss(mean_loss, loss_collection)
            return mean_loss
Пример #11
0
def mean_pairwise_squared_error(
    labels, predictions, weights=1.0, scope=None,
    loss_collection=ops.GraphKeys.LOSSES):
  """Adds a pairwise-errors-squared loss to the training procedure.

  Unlike `mean_squared_error`, which is a measure of the differences between
  corresponding elements of `predictions` and `labels`,
  `mean_pairwise_squared_error` is a measure of the differences between pairs of
  corresponding elements of `predictions` and `labels`.

  For example, if `labels`=[a, b, c] and `predictions`=[x, y, z], there are
  three pairs of differences are summed to compute the loss:
    loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3

  Note that since the inputs are of shape `[batch_size, d0, ... dN]`, the
  corresponding pairs are computed within each batch sample but not across
  samples within a batch. For example, if `predictions` represents a batch of
  16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs
  is drawn from each image, but not across images.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector.

  Args:
    labels: The ground truth output tensor, whose shape must match the shape of
      `predictions`.
    predictions: The predicted outputs, a tensor of size
      `[batch_size, d0, .. dN]` where N+1 is the total number of dimensions in
      `predictions`.
    weights: Coefficients for the loss a scalar, a tensor of shape
      `[batch_size]` or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` that returns the weighted loss.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.  Also if `labels` or `predictions`
      is None.
  """
  if labels is None:
    raise ValueError("labels must not be None.")
  if predictions is None:
    raise ValueError("predictions must not be None.")
  with ops.name_scope(scope, "mean_pairwise_squared_error",
                      (predictions, labels, weights)) as scope:
    weights = math_ops.to_float(weights)
    labels = math_ops.to_float(labels)
    with ops.control_dependencies((
        weights_broadcast_ops.assert_broadcastable(weights, labels),)):
      predictions = math_ops.to_float(predictions)
      predictions.get_shape().assert_is_compatible_with(labels.get_shape())

      diffs = math_ops.subtract(predictions, labels)

      reduction_indices = math_ops.range(1, array_ops.rank(diffs))

      sum_squares_diff_per_batch = math_ops.reduce_sum(
          math_ops.square(diffs), reduction_indices=reduction_indices,
          keep_dims=True)
      num_present_per_batch = _num_present(diffs, weights, per_batch=True)

      term1 = 2.0 * _safe_div(sum_squares_diff_per_batch,
                              num_present_per_batch-1)

      sum_diff = math_ops.reduce_sum(
          diffs, reduction_indices=reduction_indices, keep_dims=True)
      term2 = 2.0 * _safe_div(
          math_ops.square(sum_diff),
          math_ops.multiply(num_present_per_batch, num_present_per_batch-1))

      weighted_losses = math_ops.multiply(term1 - term2, weights)
      loss = math_ops.reduce_sum(weighted_losses)

      mean_loss = array_ops.where(
          math_ops.reduce_sum(num_present_per_batch) > 0,
          loss,
          array_ops.zeros_like(loss),
          name="value")
      util.add_loss(mean_loss, loss_collection)
      return mean_loss
Пример #12
0
def compute_weighted_loss(losses,
                          weights=1.0,
                          scope=None,
                          loss_collection=ops.GraphKeys.LOSSES,
                          reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
    """Computes the weighted loss.

  Args:
    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `losses`, and must be broadcastable to `losses` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: the scope for the operations performed in computing the loss.
    loss_collection: the loss will be added to these collections.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
    `NONE`, this has the same shape as `losses`; otherwise, it is scalar.

  Raises:
    ValueError: If `weights` is `None` or the shape is not compatible with
      `losses`, or if the number of dimensions (rank) of either `losses` or
      `weights` is missing.

  Note:
    When calculating the gradient of a weighted loss contributions from
    both `losses` and `weights` are considered. If your `weights` depend
    on some model parameters but you do not want this to affect the loss
    gradient, you need to apply `tf.stop_gradient` to `weights` before
    passing them to `compute_weighted_loss`.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  """
    Reduction.validate(reduction)
    with ops.name_scope(scope, "weighted_loss", (losses, weights)):
        # Save the `reduction` argument for loss normalization when distributing
        # to multiple replicas.
        # TODO(josh11b): Associate it with the returned op for more precision.
        ops.get_default_graph()._last_loss_reduction = reduction  # pylint: disable=protected-access

        with ops.control_dependencies(
            (weights_broadcast_ops.assert_broadcastable(weights, losses), )):
            losses = ops.convert_to_tensor(losses)
            input_dtype = losses.dtype
            losses = math_ops.cast(losses, dtype=dtypes.float32)
            weights = math_ops.cast(weights, dtype=dtypes.float32)
            weighted_losses = math_ops.multiply(losses, weights)
            if reduction == Reduction.NONE:
                loss = weighted_losses
            else:
                loss = math_ops.reduce_sum(weighted_losses)
                if reduction == Reduction.MEAN:
                    loss = _safe_mean(
                        loss,
                        math_ops.reduce_sum(
                            array_ops.ones_like(losses) * weights))
                elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS
                      or reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS):
                    loss = _safe_mean(loss, _num_present(losses, weights))
                elif reduction == Reduction.SUM_OVER_BATCH_SIZE:
                    loss = _safe_mean(loss, _num_elements(losses))

            # Convert the result back to the input type.
            loss = math_ops.cast(loss, input_dtype)
            util.add_loss(loss, loss_collection)
            return loss
Пример #13
0
def compute_weighted_loss(
    losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES,
    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
  """Computes the weighted loss.

  Args:
    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `losses`, and must be broadcastable to `losses` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: the scope for the operations performed in computing the loss.
    loss_collection: the loss will be added to these collections.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
    `NONE`, this has the same shape as `losses`; otherwise, it is scalar.

  Raises:
    ValueError: If `weights` is `None` or the shape is not compatible with
      `losses`, or if the number of dimensions (rank) of either `losses` or
      `weights` is missing.

  Note:
    When calculating the gradient of a weighted loss contributions from
    both `losses` and `weights` are considered. If your `weights` depend
    on some model parameters but you do not want this to affect the loss
    gradient, you need to apply `tf.stop_gradient` to `weights` before
    passing them to `compute_weighted_loss`.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  """
  Reduction.validate(reduction)
  with ops.name_scope(scope, "weighted_loss", (losses, weights)):
    # Save the `reduction` argument for loss normalization when distributing
    # to multiple replicas. Used only for estimator + v1 optimizer flow.
    ops.get_default_graph()._last_loss_reduction = reduction  # pylint: disable=protected-access

    with ops.control_dependencies((
        weights_broadcast_ops.assert_broadcastable(weights, losses),)):
      losses = ops.convert_to_tensor(losses)
      input_dtype = losses.dtype
      losses = math_ops.cast(losses, dtype=dtypes.float32)
      weights = math_ops.cast(weights, dtype=dtypes.float32)
      weighted_losses = math_ops.multiply(losses, weights)
      if reduction == Reduction.NONE:
        loss = weighted_losses
      else:
        loss = math_ops.reduce_sum(weighted_losses)
        if reduction == Reduction.MEAN:
          loss = _safe_mean(
              loss, math_ops.reduce_sum(array_ops.ones_like(losses) * weights))
        elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or
              reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS):
          loss = _safe_mean(loss, _num_present(losses, weights))
        elif reduction == Reduction.SUM_OVER_BATCH_SIZE:
          loss = _safe_mean(loss, _num_elements(losses))

      # Convert the result back to the input type.
      loss = math_ops.cast(loss, input_dtype)
      util.add_loss(loss, loss_collection)
      return loss