示例#1
0
def calculate_reshape(original_shape, new_shape, validate=False, name=None):
  """Calculates the reshaped dimensions (replacing up to one -1 in reshape)."""
  batch_shape_static = tensor_util.constant_value_as_shape(new_shape)
  if batch_shape_static.is_fully_defined():
    return np.int32(batch_shape_static.as_list()), batch_shape_static, []
  with ops.name_scope(name, "calculate_reshape", [original_shape, new_shape]):
    original_size = math_ops.reduce_prod(original_shape)
    implicit_dim = math_ops.equal(new_shape, -1)
    size_implicit_dim = (
        original_size // math_ops.maximum(1, -math_ops.reduce_prod(new_shape)))
    new_ndims = array_ops.shape(new_shape)
    expanded_new_shape = array_ops.where(  # Assumes exactly one `-1`.
        implicit_dim, array_ops.fill(new_ndims, size_implicit_dim), new_shape)
    validations = [] if not validate else [
        check_ops.assert_rank(
            original_shape, 1, message="Original shape must be a vector."),
        check_ops.assert_rank(
            new_shape, 1, message="New shape must be a vector."),
        check_ops.assert_less_equal(
            math_ops.count_nonzero(implicit_dim, dtype=dtypes.int32),
            1,
            message="At most one dimension can be unknown."),
        check_ops.assert_positive(
            expanded_new_shape, message="Shape elements must be >=-1."),
        check_ops.assert_equal(
            math_ops.reduce_prod(expanded_new_shape),
            original_size,
            message="Shape sizes do not match."),
    ]
    return expanded_new_shape, batch_shape_static, validations
示例#2
0
 def testDegenerate(self):
   for use_gpu in False, True:
     with self.test_session(use_gpu=use_gpu):
       for dtype in (dtypes.bool,):
         # A large number is needed to get Eigen to die
         x = array_ops.zeros((0, 9938), dtype=dtype)
         y = math_ops.count_nonzero(x, [0])
         self.assertAllEqual(y.eval(), np.zeros(9938))
 def _compare(self, x, reduction_axes, keepdims, use_gpu=False, zero=0,
              feed_dict=None):
   np_ans = (x != zero).astype(np.int32)
   if reduction_axes is None:
     np_ans = np.sum(np_ans, keepdims=keepdims)
   else:
     reduction_axes = np.array(reduction_axes).astype(np.int32)
     for ra in reduction_axes.ravel()[::-1]:
       np_ans = np.sum(np_ans, axis=ra, keepdims=keepdims)
   with self.test_session(use_gpu=use_gpu) as sess:
     tf_ans = math_ops.count_nonzero(x, reduction_axes, keepdims)
     out = sess.run(tf_ans, feed_dict)
   self.assertAllClose(np_ans, out)
   self.assertShapeEqual(np_ans, tf_ans)
 def testStringReduce(self):
   # Test case for GitHub issue 18712
   with self.cached_session() as sess:
     v = math_ops.count_nonzero(constant_op.constant(["test"]))
     self.assertAllClose(sess.run(v), 1)
示例#5
0
def count_nonzero(a, axis=None):
    return np_arrays.tensor_to_ndarray(
        math_ops.count_nonzero(np_array_ops.array(a).data, axis))
示例#6
0
def boolean_mask(data, mask, keepdims=False, name=None):
  """Applies a boolean mask to `data`.

  Returns a potentially ragged tensor that is formed by retaining the elements
  in `data` where the corresponding value in `mask` is `True`.

  If `keepdims` is true then outer dimensions (corresponding to the `mask`
  dimensions) are preserved, and:

  * `output[a1...aA, i, b1...bB] = data[a1...aA, j, b1...bB]`

     Where `j` is the `i`th `True` entry of `mask[a1...aA]`.

  If `keepdims` is false, then the outer dimensions are collapsed (similar to
  the behavior of `tf.boolean_mask`), and:

  * `output[i, b1...bB] = data[a1...aA, b1...bB]`

     Where `(a1...aA)` is the `i`th `True` entry of `mask`
     (in row-major order).

  Args:
    data: A potentially ragged tensor.
    mask: A potentially ragged boolean tensor.  `mask`'s shape must be a prefix
      of `data`'s shape.  `rank(mask)` must be known statically.
    keepdims: Whether to preserve the outer dimensions (`keepdims=True`) or
      flatten them (`keepdims=False`).
    name: A name prefix for the returned tensor (optional).

  Returns:
    A potentially ragged tensor that is formed by retaining the elements in
    `data` where the corresponding value in `mask` is `True`.

    If `keepdims` is false:

    * `rank(output) = rank(data) - rank(mask) + 1`.
    * `output.ragged_rank = max(data.ragged_rank - rank(mask) + 1, 0)`.

    If `keepdims` is true:

    * `rank(output) = rank(data)`.
    * `output.ragged_rank = max(data.ragged_rank, rank(mask) - 1)`.

  Raises:
    ValueError: if `rank(mask)` is not known statically; or if `mask.shape` is
      not a prefix of `data.shape`.

  #### Examples:
    ```python
    >>> # Aliases for True & False so data and mask line up.
    >>> T, F = (True, False)

    >>> tf.ragged.boolean_mask(  # Mask a 2D Tensor.  Flatten outer dims.
    ...     data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
    ...     mask=[[T, F, T], [F, F, F], [T, F, F]],
    ...     keepdims=False).tolist()
    [1, 3, 7]

    >>> tf.ragged.boolean_mask(  # Mask a 2D Tensor.  Preserve outer dims.
    ...     data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
    ...     mask=[[T, F, T], [F, F, F], [T, F, F]],
    ...     keepdims=True).tolist()
    [[1, 3], [], [7]]

    >>> tf.ragged.boolean_mask(  # Mask a 2D RaggedTensor.  Flatten outer dims.
    ...     tf.ragged.constant([[1, 2, 3], [4], [5, 6]]),
    ...     tf.ragged.constant([[F, F, T], [F], [T, T]]),
    ...     keepdims=False).tolist()
    [3, 5, 6]

    >>> tf.ragged.boolean_mask(  # Mask a 2D RaggedTensor.  Preserve outer dims.
    ...     tf.ragged.constant([[1, 2, 3], [4], [5, 6]]),
    ...     tf.ragged.constant([[F, F, T], [F], [T, T]]),
    ...     keepdims=True).tolist()
    [[3], [], [5, 6]]

    >>> tf.ragged.boolean_mask(  # Mask rows of a 2D RaggedTensor.
    ...     tf.ragged.constant([[1, 2, 3], [4], [5, 6]]),
    ...     tf.ragged.constant([True, False, True]),
    ...     keepdims=True).tolist()
    [[1, 2, 3], [5, 6]]
    ```
  """
  with ops.name_scope(name, 'RaggedMask', [data, mask]):
    # Convert inputs to tensors.
    data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data')
    mask = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        mask, dtypes.bool, name='mask')
    row_splits_dtype, (data, mask) = ragged_tensor.match_row_splits_dtypes(
        data, mask, return_dtype=True)

    # Get static rank of mask.
    if mask.shape.ndims is None:
      raise ValueError('mask.shape.ndims must be known statically.')
    elif mask.shape.ndims == 0:
      raise ValueError('mask cannot be scalar.')

    # If mask is ragged, then recurse with a non-ragged mask.
    if ragged_tensor.is_ragged(mask):
      if not ragged_tensor.is_ragged(data):
        data = ragged_tensor.RaggedTensor.from_tensor(
            data, ragged_rank=mask.ragged_rank,
            row_splits_dtype=mask.row_splits.dtype)
      # Check that mask.nested_row_splits is a prefix of
      # data.nested_row_splits.
      splits_list = [
          mask.nested_row_splits, data.nested_row_splits[:mask.ragged_rank]
      ]
      with ops.control_dependencies(
          ragged_util.assert_splits_match(splits_list)):
        # Strip off ragged `splits` until `mask` is non-ragged.  Keep the splits
        # that we strip off in `splits`, so we can add them back on after
        # we recursively mask the non-ragged data.
        splits = []
        while ragged_tensor.is_ragged(mask):
          if mask.shape.ndims > 2:
            splits.append(mask.row_splits)
          else:
            # Count the number of True mask values in each row to find the
            # lengths of the filtered rows; then convert to splits.
            int_mask = ragged_functional_ops.map_flat_values(
                math_ops.cast, mask, dtype=row_splits_dtype)
            masked_row_lengths = ragged_math_ops.reduce_sum(int_mask, axis=1)
            splits.append(ragged_util.lengths_to_splits(masked_row_lengths))
          mask = mask.values
          data = data.values

        # Recursively apply the nested non-ragged mask to the nested data.
        masked_values = boolean_mask(data, mask, keepdims)

        # Add the ragged `splits` back to the result.
        if keepdims:
          masked_values = ragged_tensor.RaggedTensor.from_nested_row_splits(
              masked_values, splits, validate=False)

        return masked_values

    # If mask is non-ragged and has rank 1, and data is ragged, then build a
    # ragged tensor with the indicated rows.
    elif ragged_tensor.is_ragged(data) and mask.shape.ndims == 1:
      # Get the masked splits: first get the length of each row, then filter
      # out the rows that we are deleting, and convert that filtered set of
      # masks back to a splits tensor.
      lengths = data.row_lengths()
      masked_lengths = array_ops.boolean_mask(lengths, mask)
      masked_splits = ragged_util.lengths_to_splits(masked_lengths)

      # Get the masked values: first get row ids corresponding to each
      # value, then use tf.gather to build a boolean mask that's false for
      # values that come from rows that we are deleting, and use that mask to
      # construct the masked values tensor.
      segment_ids = segment_id_ops.row_splits_to_segment_ids(data.row_splits)
      segment_mask = array_ops.gather(mask, segment_ids)
      masked_values = boolean_mask(data.values, segment_mask, keepdims=False)

      return ragged_tensor.RaggedTensor.from_row_splits(masked_values,
                                                        masked_splits,
                                                        validate=False)

    # If mask is non-ragged and has rank>1, then convert it to be ragged,
    # with a ragged rank matching data.
    if ragged_tensor.is_ragged(data):
      mask = ragged_tensor.RaggedTensor.from_tensor(
          mask, ragged_rank=min(data.ragged_rank, mask.shape.ndims - 1),
          row_splits_dtype=data.row_splits.dtype)
      return boolean_mask(data, mask, keepdims)

    # Otherwise, data and mask are both `Tensor`s.
    else:
      # Apply `boolean_mask` to get the masked values.
      masked_values = array_ops.boolean_mask(data, mask)

      if mask.shape.ndims >= 2 and keepdims:
        # Add the innermost ragged dimension.  For each innermost cell, get the
        # number of values it contains.  Then flatten that to get a list of
        # cell lengths, and convert it to splits.  Finally, combine the splits
        # and values to get the innermost ragged tensor.
        masked_lengths = math_ops.count_nonzero(mask, axis=-1,
                                                dtype=row_splits_dtype)
        flattened_masked_lengths = array_ops.reshape(masked_lengths, [-1])
        masked_values = ragged_tensor.RaggedTensor.from_row_lengths(
            masked_values, flattened_masked_lengths, validate=False)

        # Wrap remaining ragged dimensions.
        if mask.shape.ndims > 2 and keepdims:
          mask_shape = array_ops.shape(mask, out_type=row_splits_dtype)
          split_size = math_ops.cumprod(mask_shape) + 1
          for dim in range(mask.shape.ndims - 3, -1, -1):
            elt_size = mask_shape[dim + 1]
            masked_splits = math_ops.range(split_size[dim]) * elt_size
            masked_values = ragged_tensor.RaggedTensor.from_row_splits(
                masked_values, masked_splits, validate=False)

      return masked_values
示例#7
0
 def testStringReduce(self):
     # Test case for GitHub issue 18712
     with self.test_session() as sess:
         v = math_ops.count_nonzero(constant_op.constant(["test"]))
         self.assertAllClose(sess.run(v), 1)
def _is_all_zeros(grad):
    all_zeros = math_ops.equal(math_ops.count_nonzero(grad), 0)
    return all_zeros
示例#9
0
def boolean_mask(data, mask, name=None):
    """Applies a boolean mask to `data` without flattening the mask dimensions.

  Returns a potentially ragged tensor that is formed by retaining the elements
  in `data` where the corresponding value in `mask` is `True`.

  * `output[a1...aA, i, b1...bB] = data[a1...aA, j, b1...bB]`

     Where `j` is the `i`th `True` entry of `mask[a1...aA]`.

  Note that `output` preserves the mask dimensions `a1...aA`; this differs
  from `tf.boolean_mask`, which flattens those dimensions.

  Args:
    data: A potentially ragged tensor.
    mask: A potentially ragged boolean tensor.  `mask`'s shape must be a prefix
      of `data`'s shape.  `rank(mask)` must be known statically.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A potentially ragged tensor that is formed by retaining the elements in
    `data` where the corresponding value in `mask` is `True`.

    * `rank(output) = rank(data)`.
    * `output.ragged_rank = max(data.ragged_rank, rank(mask) - 1)`.

  Raises:
    ValueError: if `rank(mask)` is not known statically; or if `mask.shape` is
      not a prefix of `data.shape`.

  #### Examples:

  >>> # Aliases for True & False so data and mask line up.
  >>> T, F = (True, False)

  >>> tf.ragged.boolean_mask(  # Mask a 2D Tensor.
  ...     data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
  ...     mask=[[T, F, T], [F, F, F], [T, F, F]]).to_list()
  [[1, 3], [], [7]]

  >>> tf.ragged.boolean_mask(  # Mask a 2D RaggedTensor.
  ...     tf.ragged.constant([[1, 2, 3], [4], [5, 6]]),
  ...     tf.ragged.constant([[F, F, T], [F], [T, T]])).to_list()
  [[3], [], [5, 6]]

  >>> tf.ragged.boolean_mask(  # Mask rows of a 2D RaggedTensor.
  ...     tf.ragged.constant([[1, 2, 3], [4], [5, 6]]),
  ...     tf.ragged.constant([True, False, True])).to_list()
  [[1, 2, 3], [5, 6]]
  """
    with ops.name_scope(name, 'RaggedMask', [data, mask]):
        # Convert inputs to tensors.
        data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data,
                                                                name='data')
        mask = ragged_tensor.convert_to_tensor_or_ragged_tensor(mask,
                                                                dtypes.bool,
                                                                name='mask')
        row_splits_dtype, (data, mask) = ragged_tensor.match_row_splits_dtypes(
            data, mask, return_dtype=True)

        # Get static rank of mask.
        if mask.shape.ndims is None:
            raise ValueError('mask.shape.ndims must be known statically.')
        elif mask.shape.ndims == 0:
            raise ValueError('mask cannot be scalar.')

        # If mask is ragged, then recurse with a non-ragged mask.
        if ragged_tensor.is_ragged(mask):
            if not ragged_tensor.is_ragged(data):
                data = ragged_tensor.RaggedTensor.from_tensor(
                    data,
                    ragged_rank=mask.ragged_rank,
                    row_splits_dtype=mask.row_splits.dtype)
            # Check that mask.nested_row_splits is a prefix of
            # data.nested_row_splits.
            splits_list = [
                mask.nested_row_splits,
                data.nested_row_splits[:mask.ragged_rank]
            ]
            with ops.control_dependencies(
                    ragged_util.assert_splits_match(splits_list)):
                # Strip off ragged `splits` until `mask` is non-ragged.  Keep the splits
                # that we strip off in `splits`, so we can add them back on after
                # we recursively mask the non-ragged data.
                splits = []
                while ragged_tensor.is_ragged(mask):
                    if mask.shape.ndims > 2:
                        splits.append(mask.row_splits)
                    else:
                        # Count the number of True mask values in each row to find the
                        # lengths of the filtered rows; then convert to splits.
                        int_mask = ragged_functional_ops.map_flat_values(
                            math_ops.cast, mask, dtype=row_splits_dtype)
                        masked_row_lengths = ragged_math_ops.reduce_sum(
                            int_mask, axis=1)
                        splits.append(
                            ragged_util.lengths_to_splits(masked_row_lengths))
                    mask = mask.values
                    data = data.values

                # Recursively apply the nested non-ragged mask to the nested data.
                masked_values = boolean_mask(data, mask)

                # Add the ragged `splits` back to the result.
                masked_values = ragged_tensor.RaggedTensor.from_nested_row_splits(
                    masked_values, splits, validate=False)

                return masked_values

        # If mask is non-ragged and has rank 1, and data is ragged, then build a
        # ragged tensor with the indicated rows.
        elif ragged_tensor.is_ragged(data) and mask.shape.ndims == 1:
            # Get the masked splits: first get the length of each row, then filter
            # out the rows that we are deleting, and convert that filtered set of
            # masks back to a splits tensor.
            lengths = data.row_lengths()
            masked_lengths = array_ops.boolean_mask(lengths, mask)
            masked_splits = ragged_util.lengths_to_splits(masked_lengths)

            # Get the masked values: first get row ids corresponding to each
            # value, then use tf.gather to build a boolean mask that's false for
            # values that come from rows that we are deleting, and use that mask to
            # construct the masked values tensor.
            segment_ids = segment_id_ops.row_splits_to_segment_ids(
                data.row_splits)
            segment_mask = array_ops.gather(mask, segment_ids)
            masked_values = boolean_mask(data.values, segment_mask)

            return ragged_tensor.RaggedTensor.from_row_splits(masked_values,
                                                              masked_splits,
                                                              validate=False)

        # If mask is non-ragged and has rank>1, then convert it to be ragged,
        # with a ragged rank matching data.
        if ragged_tensor.is_ragged(data):
            mask = ragged_tensor.RaggedTensor.from_tensor(
                mask,
                ragged_rank=min(data.ragged_rank, mask.shape.ndims - 1),
                row_splits_dtype=data.row_splits.dtype)
            return boolean_mask(data, mask)

        # Otherwise, data and mask are both `Tensor`s.
        else:
            # Apply `boolean_mask` to get the masked values.
            masked_values = array_ops.boolean_mask(data, mask)

            if mask.shape.ndims >= 2:
                # Add the innermost ragged dimension.  For each innermost cell, get the
                # number of values it contains.  Then flatten that to get a list of
                # cell lengths, and convert it to splits.  Finally, combine the splits
                # and values to get the innermost ragged tensor.
                masked_lengths = math_ops.count_nonzero(mask,
                                                        axis=-1,
                                                        dtype=row_splits_dtype)
                flattened_masked_lengths = array_ops.reshape(
                    masked_lengths, [-1])
                masked_values = ragged_tensor.RaggedTensor.from_row_lengths(
                    masked_values, flattened_masked_lengths, validate=False)

                # Wrap remaining ragged dimensions.
                if mask.shape.ndims > 2:
                    mask_shape = array_ops.shape(mask,
                                                 out_type=row_splits_dtype)
                    split_size = math_ops.cumprod(mask_shape) + 1
                    for dim in range(mask.shape.ndims - 3, -1, -1):
                        elt_size = mask_shape[dim + 1]
                        masked_splits = math_ops.range(
                            split_size[dim]) * elt_size
                        masked_values = ragged_tensor.RaggedTensor.from_row_splits(
                            masked_values, masked_splits, validate=False)

            return masked_values
示例#10
0
def count_nonzero(a, axis=None):
    return math_ops.count_nonzero(np_array_ops.array(a), axis)
示例#11
0
def sequence_loss(logits,
                  targets,
                  weights,
                  average_across_timesteps=True,
                  average_across_batch=True,
                  sum_over_timesteps=False,
                  sum_over_batch=False,
                  softmax_loss_function=None,
                  name=None):
  """Weighted cross-entropy loss for a sequence of logits.

  Depending on the values of `average_across_timesteps` / `sum_over_timesteps`
  and `average_across_batch` / `sum_over_batch`, the return Tensor will have
  rank 0, 1, or 2 as these arguments reduce the cross-entropy at each target,
  which has shape `[batch_size, sequence_length]`, over their respective
  dimensions. For example, if `average_across_timesteps` is `True` and
  `average_across_batch` is `False`, then the return Tensor will have shape
  `[batch_size]`.

  Note that `average_across_timesteps` and `sum_over_timesteps` cannot be True
  at same time. Same for `average_across_batch` and `sum_over_batch`.

  The recommended loss reduction in tf 2.0 has been changed to sum_over, instead
  of weighted average. User are recommend to use `sum_over_timesteps` and
  `sum_over_batch` for reduction.

  Args:
    logits: A Tensor of shape
      `[batch_size, sequence_length, num_decoder_symbols]` and dtype float.
      The logits correspond to the prediction across all classes at each
      timestep.
    targets: A Tensor of shape `[batch_size, sequence_length]` and dtype
      int. The target represents the true class at each timestep.
    weights: A Tensor of shape `[batch_size, sequence_length]` and dtype
      float. `weights` constitutes the weighting of each prediction in the
      sequence. When using `weights` as masking, set all valid timesteps to 1
      and all padded timesteps to 0, e.g. a mask returned by `tf.sequence_mask`.
    average_across_timesteps: If set, sum the cost across the sequence
      dimension and divide the cost by the total label weight across timesteps.
    average_across_batch: If set, sum the cost across the batch dimension and
      divide the returned cost by the batch size.
    sum_over_timesteps: If set, sum the cost across the sequence dimension and
      divide the size of the sequence. Note that any element with 0 weights will
      be excluded from size calculation.
    sum_over_batch: if set, sum the cost across the batch dimension and divide
      the total cost by the batch size. Not that any element with 0 weights will
      be excluded from size calculation.
    softmax_loss_function: Function (labels, logits) -> loss-batch
      to be used instead of the standard softmax (the default if this is None).
      **Note that to avoid confusion, it is required for the function to accept
      named arguments.**
    name: Optional name for this operation, defaults to "sequence_loss".

  Returns:
    A float Tensor of rank 0, 1, or 2 depending on the
    `average_across_timesteps` and `average_across_batch` arguments. By default,
    it has rank 0 (scalar) and is the weighted average cross-entropy
    (log-perplexity) per symbol.

  Raises:
    ValueError: logits does not have 3 dimensions or targets does not have 2
                dimensions or weights does not have 2 dimensions.
  """
  if len(logits.get_shape()) != 3:
    raise ValueError("Logits must be a "
                     "[batch_size x sequence_length x logits] tensor")
  if len(targets.get_shape()) != 2:
    raise ValueError("Targets must be a [batch_size x sequence_length] tensor")
  if len(weights.get_shape()) != 2:
    raise ValueError("Weights must be a [batch_size x sequence_length] tensor")
  if average_across_timesteps and sum_over_timesteps:
    raise ValueError("average_across_timesteps and sum_over_timesteps cannot "
                     "be set to True at same time.")
  if average_across_batch and sum_over_batch:
    raise ValueError("average_across_batch and sum_over_batch cannot be set "
                     "to True at same time.")
  with ops.name_scope(name, "sequence_loss", [logits, targets, weights]):
    num_classes = array_ops.shape(logits)[2]
    logits_flat = array_ops.reshape(logits, [-1, num_classes])
    targets = array_ops.reshape(targets, [-1])
    if softmax_loss_function is None:
      crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
          labels=targets, logits=logits_flat)
    else:
      crossent = softmax_loss_function(labels=targets, logits=logits_flat)
    crossent *= array_ops.reshape(weights, [-1])
    if average_across_timesteps and average_across_batch:
      crossent = math_ops.reduce_sum(crossent)
      total_size = math_ops.reduce_sum(weights)
      crossent = math_ops.div_no_nan(crossent, total_size)
    elif sum_over_timesteps and sum_over_batch:
      crossent = math_ops.reduce_sum(crossent)
      total_count = math_ops.cast(math_ops.count_nonzero(weights),
                                  crossent.dtype)
      crossent = math_ops.div_no_nan(crossent, total_count)
    else:
      crossent = array_ops.reshape(crossent, array_ops.shape(logits)[0:2])
      if average_across_timesteps or average_across_batch:
        reduce_axis = [0] if average_across_batch else [1]
        crossent = math_ops.reduce_sum(crossent, axis=reduce_axis)
        total_size = math_ops.reduce_sum(weights, axis=reduce_axis)
        crossent = math_ops.div_no_nan(crossent, total_size)
      elif sum_over_timesteps or sum_over_batch:
        reduce_axis = [0] if sum_over_batch else [1]
        crossent = math_ops.reduce_sum(crossent, axis=reduce_axis)
        total_count = math_ops.cast(
            math_ops.count_nonzero(weights, axis=reduce_axis),
            dtype=crossent.dtype)
        crossent = math_ops.div_no_nan(crossent, total_count)
    return crossent
示例#12
0
def sequence_loss(logits,
                  targets,
                  weights,
                  average_across_timesteps=True,
                  average_across_batch=True,
                  sum_over_timesteps=False,
                  sum_over_batch=False,
                  softmax_loss_function=None,
                  name=None):
    """Weighted cross-entropy loss for a sequence of logits.

    Depending on the values of `average_across_timesteps` /
    `sum_over_timesteps` and `average_across_batch` / `sum_over_batch`, the
    return Tensor will have rank 0, 1, or 2 as these arguments reduce the
    cross-entropy at each target, which has shape
    `[batch_size, sequence_length]`, over their respective dimensions. For
    example, if `average_across_timesteps` is `True` and `average_across_batch`
    is `False`, then the return Tensor will have shape `[batch_size]`.

    Note that `average_across_timesteps` and `sum_over_timesteps` cannot be
    True at same time. Same for `average_across_batch` and `sum_over_batch`.

    The recommended loss reduction in tf 2.0 has been changed to sum_over,
    instead of weighted average. User are recommend to use `sum_over_timesteps`
    and `sum_over_batch` for reduction.

    Args:
      logits: A Tensor of shape
        `[batch_size, sequence_length, num_decoder_symbols]` and dtype float.
        The logits correspond to the prediction across all classes at each
        timestep.
      targets: A Tensor of shape `[batch_size, sequence_length]` and dtype
        int. The target represents the true class at each timestep.
      weights: A Tensor of shape `[batch_size, sequence_length]` and dtype
        float. `weights` constitutes the weighting of each prediction in the
        sequence. When using `weights` as masking, set all valid timesteps to 1
        and all padded timesteps to 0, e.g. a mask returned by
        `tf.sequence_mask`.
      average_across_timesteps: If set, sum the cost across the sequence
        dimension and divide the cost by the total label weight across
        timesteps.
      average_across_batch: If set, sum the cost across the batch dimension and
        divide the returned cost by the batch size.
      sum_over_timesteps: If set, sum the cost across the sequence dimension
        and divide the size of the sequence. Note that any element with 0
        weights will be excluded from size calculation.
      sum_over_batch: if set, sum the cost across the batch dimension and
        divide the total cost by the batch size. Not that any element with 0
        weights will be excluded from size calculation.
      softmax_loss_function: Function (labels, logits) -> loss-batch
        to be used instead of the standard softmax (the default if this is
        None). **Note that to avoid confusion, it is required for the function
        to accept named arguments.**
      name: Optional name for this operation, defaults to "sequence_loss".

    Returns:
      A float Tensor of rank 0, 1, or 2 depending on the
      `average_across_timesteps` and `average_across_batch` arguments. By
      default, it has rank 0 (scalar) and is the weighted average cross-entropy
      (log-perplexity) per symbol.

    Raises:
      ValueError: logits does not have 3 dimensions or targets does not have 2
                  dimensions or weights does not have 2 dimensions.
    """
    if len(logits.get_shape()) != 3:
        raise ValueError("Logits must be a "
                         "[batch_size x sequence_length x logits] tensor")
    if len(targets.get_shape()) != 2:
        raise ValueError(
            "Targets must be a [batch_size x sequence_length] tensor")
    if len(weights.get_shape()) != 2:
        raise ValueError(
            "Weights must be a [batch_size x sequence_length] tensor")
    if average_across_timesteps and sum_over_timesteps:
        raise ValueError(
            "average_across_timesteps and sum_over_timesteps cannot "
            "be set to True at same time.")
    if average_across_batch and sum_over_batch:
        raise ValueError(
            "average_across_batch and sum_over_batch cannot be set "
            "to True at same time.")
    with ops.name_scope(name, "sequence_loss", [logits, targets, weights]):
        num_classes = array_ops.shape(logits)[2]
        logits_flat = array_ops.reshape(logits, [-1, num_classes])
        targets = array_ops.reshape(targets, [-1])
        if softmax_loss_function is None:
            crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
                labels=targets, logits=logits_flat)
        else:
            crossent = softmax_loss_function(labels=targets,
                                             logits=logits_flat)
        crossent *= array_ops.reshape(weights, [-1])
        if average_across_timesteps and average_across_batch:
            crossent = math_ops.reduce_sum(crossent)
            total_size = math_ops.reduce_sum(weights)
            crossent = math_ops.div_no_nan(crossent, total_size)
        elif sum_over_timesteps and sum_over_batch:
            crossent = math_ops.reduce_sum(crossent)
            total_count = math_ops.cast(math_ops.count_nonzero(weights),
                                        crossent.dtype)
            crossent = math_ops.div_no_nan(crossent, total_count)
        else:
            crossent = array_ops.reshape(crossent,
                                         array_ops.shape(logits)[0:2])
            if average_across_timesteps or average_across_batch:
                reduce_axis = [0] if average_across_batch else [1]
                crossent = math_ops.reduce_sum(crossent, axis=reduce_axis)
                total_size = math_ops.reduce_sum(weights, axis=reduce_axis)
                crossent = math_ops.div_no_nan(crossent, total_size)
            elif sum_over_timesteps or sum_over_batch:
                reduce_axis = [0] if sum_over_batch else [1]
                crossent = math_ops.reduce_sum(crossent, axis=reduce_axis)
                total_count = math_ops.cast(math_ops.count_nonzero(
                    weights, axis=reduce_axis),
                                            dtype=crossent.dtype)
                crossent = math_ops.div_no_nan(crossent, total_count)
        return crossent
def _is_all_zeros(grad):
  all_zeros = math_ops.equal(math_ops.count_nonzero(grad), 0)
  return all_zeros