Exemplo n.º 1
0
def _lower_triangular_mask(shape):
  """Creates a lower-triangular boolean mask over the last 2 dimensions."""
  row_index = math_ops.cumsum(
      array_ops.ones(shape=shape, dtype=dtypes.int32), axis=-2)
  col_index = math_ops.cumsum(
      array_ops.ones(shape=shape, dtype=dtypes.int32), axis=-1)
  return math_ops.greater_equal(row_index, col_index)
Exemplo n.º 2
0
 def testAxisType(self):
   for dtype in self.valid_dtypes:
     x = np.arange(1, 6).reshape([5]).astype(dtype)
     for axis_dtype in self.axis_dtypes():
       with self.cached_session(), self.test_scope():
         p = array_ops.placeholder(x.dtype)
         axis = constant_op.constant(0, axis_dtype)
         math_ops.cumsum(p, axis).eval(feed_dict={p: x})
Exemplo n.º 3
0
 def testAxisType(self):
   for dtype in self.valid_dtypes:
     x = np.arange(1, 6).reshape([5]).astype(dtype)
     for axis_dtype in [dtypes.int64, dtypes.int32]:
       with self.cached_session(use_gpu=True):
         axis = constant_op.constant(0, axis_dtype)
         tf_out = math_ops.cumsum(x, axis).eval()
Exemplo n.º 4
0
def power_sums_tensor(array_size, power_matrix, multiplier):
  r"""Computes \sum_{i=0}^{N-1} A^i B (A^i)^T for N=0..(array_size + 1).

  Args:
    array_size: The number of non-trivial sums to pre-compute.
    power_matrix: The "A" matrix above.
    multiplier: The "B" matrix above
  Returns:
    A Tensor with S[N] = \sum_{i=0}^{N-1} A^i B (A^i)^T
      S[0] is the zero matrix
      S[1] is B
      S[2] is A B A^T + B
      ...and so on
  """
  array_size = math_ops.cast(array_size, dtypes.int32)
  power_matrix = ops.convert_to_tensor(power_matrix)
  identity_like_power_matrix = linalg_ops.eye(
      array_ops.shape(power_matrix)[0], dtype=power_matrix.dtype)
  identity_like_power_matrix.set_shape(
      ops.convert_to_tensor(power_matrix).get_shape())
  transition_powers = functional_ops.scan(
      lambda previous_power, _: math_ops.matmul(previous_power, power_matrix),
      math_ops.range(array_size - 1),
      initializer=identity_like_power_matrix)
  summed = math_ops.cumsum(
      array_ops.concat([
          array_ops.expand_dims(multiplier, 0), math_ops.matmul(
              batch_times_matrix(transition_powers, multiplier),
              transition_powers,
              adjoint_b=True)
      ], 0))
  return array_ops.concat(
      [array_ops.expand_dims(array_ops.zeros_like(multiplier), 0), summed], 0)
Exemplo n.º 5
0
 def _compareGradient(self, shape, axis, exclusive, reverse):
   x = np.arange(0, 50).reshape(shape).astype(np.float64)
   with self.cached_session(use_gpu=True):
     t = ops.convert_to_tensor(x)
     result = math_ops.cumsum(t, axis, exclusive, reverse)
     jacob_t, jacob_n = gradient_checker.compute_gradient(
         t, shape, result, shape, x_init_value=x, delta=1)
   self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
Exemplo n.º 6
0
  def _compare(self, x, axis, exclusive, reverse):
    np_out = handle_options(np.cumsum, x, axis, exclusive, reverse)
    with self.cached_session(), self.test_scope():
      p = array_ops.placeholder(x.dtype)
      tf_out = math_ops.cumsum(p, axis, exclusive, reverse).eval(
          feed_dict={p: x})

    self.assertAllClose(np_out, tf_out)
Exemplo n.º 7
0
def _CumsumGrad(op, grad):
  axis = op.inputs[1]
  exclusive = op.get_attr("exclusive")
  reverse = op.get_attr("reverse")
  return [
      math_ops.cumsum(grad, axis, exclusive=exclusive, reverse=not reverse),
      None
  ]
Exemplo n.º 8
0
def segment_ids_to_row_splits(segment_ids, num_segments=None,
                              out_type=None, name=None):
  """Generates the RaggedTensor `row_splits` corresponding to a segmentation.

  Returns an integer vector `splits`, where `splits[0] = 0` and
  `splits[i] = splits[i-1] + count(segment_ids==i)`.  Example:

  ```python
  >>> ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4]).eval()
  [ 0 3 3 5 6 9 ]
  ```

  Args:
    segment_ids: A 1-D integer Tensor.
    num_segments: A scalar integer indicating the number of segments.  Defaults
      to `max(segment_ids) + 1` (or zero if `segment_ids` is empty).
    out_type: The dtype for the return value.  Defaults to `segment_ids.dtype`,
      or `tf.int64` if `segment_ids` does not have a dtype.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A sorted 1-D integer Tensor, with `shape=[num_segments + 1]`.
  """
  if out_type is None:
    if isinstance(segment_ids, ops.Tensor):
      out_type = segment_ids.dtype
    elif isinstance(num_segments, ops.Tensor):
      out_type = num_segments.dtype
    else:
      out_type = dtypes.int64
  else:
    out_type = dtypes.as_dtype(out_type)
  with ops.name_scope(name, "SegmentIdsToRaggedSplits", [segment_ids]) as name:
    # Note: we cast int64 tensors to int32, since bincount currently only
    # supports int32 inputs.
    segment_ids = ragged_util.convert_to_int_tensor(segment_ids, "segment_ids",
                                                    dtype=dtypes.int32)
    segment_ids.shape.assert_has_rank(1)
    if num_segments is not None:
      num_segments = ragged_util.convert_to_int_tensor(num_segments,
                                                       "num_segments",
                                                       dtype=dtypes.int32)
      num_segments.shape.assert_has_rank(0)

    row_lengths = math_ops.bincount(
        segment_ids,
        minlength=num_segments,
        maxlength=num_segments,
        dtype=out_type)
    splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0)

    # Update shape information, if possible.
    if num_segments is not None:
      const_num_segments = tensor_util.constant_value(num_segments)
      if const_num_segments is not None:
        splits.set_shape(tensor_shape.TensorShape([const_num_segments + 1]))

    return splits
Exemplo n.º 9
0
def _CumprodGrad(op, grad):
  x = op.inputs[0]
  axis = op.inputs[1]
  reverse = op.get_attr("reverse")

  # TODO This fails when x contains 0 and should be fixed
  prod = math_ops.cumprod(x, axis=axis, reverse=reverse)
  out = math_ops.cumsum(prod * grad, axis=axis, reverse=(not reverse))
  return [out / x, None]
Exemplo n.º 10
0
def _effective_sample_size_single_state(states, filter_beyond_lag,
                                        filter_threshold):
  """ESS computation for one single Tensor argument."""

  with ops.name_scope(
      "effective_sample_size_single_state",
      values=[states, filter_beyond_lag, filter_threshold]):

    states = ops.convert_to_tensor(states, name="states")
    dt = states.dtype

    # filter_beyond_lag == None ==> auto_corr is the full sequence.
    auto_corr = sample_stats.auto_correlation(
        states, axis=0, max_lags=filter_beyond_lag)
    if filter_threshold is not None:
      filter_threshold = ops.convert_to_tensor(
          filter_threshold, dtype=dt, name="filter_threshold")
      # Get a binary mask to zero out values of auto_corr below the threshold.
      #   mask[i, ...] = 1 if auto_corr[j, ...] > threshold for all j <= i,
      #   mask[i, ...] = 0, otherwise.
      # So, along dimension zero, the mask will look like [1, 1, ..., 0, 0,...]
      # Building step by step,
      #   Assume auto_corr = [1, 0.5, 0.0, 0.3], and filter_threshold = 0.2.
      # Step 1:  mask = [False, False, True, False]
      mask = auto_corr < filter_threshold
      # Step 2:  mask = [0, 0, 1, 1]
      mask = math_ops.cast(mask, dtype=dt)
      # Step 3:  mask = [0, 0, 1, 2]
      mask = math_ops.cumsum(mask, axis=0)
      # Step 4:  mask = [1, 1, 0, 0]
      mask = math_ops.maximum(1. - mask, 0.)
      auto_corr *= mask

    # With R[k] := auto_corr[k, ...],
    # ESS = N / {1 + 2 * Sum_{k=1}^N (N - k) / N * R[k]}
    #     = N / {-1 + 2 * Sum_{k=0}^N (N - k) / N * R[k]} (since R[0] = 1)
    #     approx N / {-1 + 2 * Sum_{k=0}^M (N - k) / N * R[k]}
    # where M is the filter_beyond_lag truncation point chosen above.

    # Get the factor (N - k) / N, and give it shape [M, 1,...,1], having total
    # ndims the same as auto_corr
    n = _axis_size(states, axis=0)
    k = math_ops.range(0., _axis_size(auto_corr, axis=0))
    nk_factor = (n - k) / n
    if auto_corr.shape.ndims is not None:
      new_shape = [-1] + [1] * (auto_corr.shape.ndims - 1)
    else:
      new_shape = array_ops.concat(
          ([-1],
           array_ops.ones([array_ops.rank(auto_corr) - 1], dtype=dtypes.int32)),
          axis=0)
    nk_factor = array_ops.reshape(nk_factor, new_shape)

    return n / (-1 + 2 * math_ops.reduce_sum(nk_factor * auto_corr, axis=0))
Exemplo n.º 11
0
 def make_tril_ids(n):
   """Internal helper to create vector of linear indices into y."""
   cols = array_ops.reshape(array_ops.tile(math_ops.range(n), [n]), [n, n])
   rows = array_ops.tile(
       array_ops.expand_dims(math_ops.range(n), -1), [1, n])
   pred = math_ops.greater(cols, rows)
   tril_ids = array_ops.tile(array_ops.reshape(
       math_ops.cumsum(math_ops.range(n)), [n, 1]), [1, n]) + cols
   tril_ids = math_ops.select(pred,
                              array_ops.zeros([n, n], dtype=dtypes.int32),
                              tril_ids + 1)
   tril_ids = array_ops.reshape(tril_ids, [-1])
   return tril_ids
Exemplo n.º 12
0
  def test_searchsorted(self):
    sorted_inputs = math_ops.cumsum(random_ops.random_uniform([3, 2, 4]),
                                    axis=-1)
    values = random_ops.random_uniform([2, 3], minval=-1, maxval=4.5)

    def loop_fn(i):
      inputs_i = array_ops.gather(sorted_inputs, i)
      return [array_ops.searchsorted(inputs_i, values, out_type=dtypes.int32,
                                     side="left"),  # creates LowerBound op.
              array_ops.searchsorted(inputs_i, values, out_type=dtypes.int64,
                                     side="right")]  # creates UpperBound op.

    self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=[dtypes.int32, dtypes.int64])
Exemplo n.º 13
0
  def testReadmeExample(self):
    data = random_ops.random_uniform((128, 128), 0, 10, dtype=dtypes.int32)
    histogram = math_ops.bincount(data, minlength=10, maxlength=10)
    cdf = math_ops.cumsum(histogram, exclusive=False)
    cdf = array_ops.pad(cdf, [[1, 0]])
    cdf = array_ops.reshape(cdf, [1, 1, -1])

    data = math_ops.cast(data, dtypes.int16)
    encoded = coder_ops.range_encode(data, cdf, precision=14)
    decoded = coder_ops.range_decode(
        encoded, array_ops.shape(data), cdf, precision=14)

    with self.test_session() as sess:
      self.assertAllEqual(*sess.run((data, decoded)))
Exemplo n.º 14
0
  def _update_mask(self, weights, threshold):
    """Updates the mask for a given weight tensor.

    This functions first computes the cdf of the weight tensor, and estimates
    the threshold value such that 'desired_sparsity' fraction of weights
    have magnitude less than the threshold.

    Args:
      weights: The weight tensor that needs to be masked.
      threshold: The current threshold value. The function will compute a new
        threshold and return the exponential moving average using the current
        value of threshold

    Returns:
      new_threshold: The new value of the threshold based on weights, and
        sparsity at the current global_step
      new_mask: A numpy array of the same size and shape as weights containing
        0 or 1 to indicate which of the values in weights falls below
        the threshold

    Raises:
      ValueError: if sparsity is not defined
    """
    if self._sparsity is None:
      raise ValueError('Sparsity variable undefined')

    with ops.name_scope(weights.op.name + '_pruning_ops'):
      abs_weights = math_ops.abs(weights)
      max_value = math_ops.reduce_max(abs_weights)
      histogram = _histogram(
          abs_weights, [0.0, max_value],
          nbins=self._spec.nbins,
          dtype=np.float32)

      cdf = math_ops.cumsum(histogram)
      norm_cdf = math_ops.div(cdf, math_ops.reduce_sum(histogram))
      current_threshold = math_ops.multiply(
          math_ops.div(
              math_ops.reduce_sum(
                  math_ops.cast(
                      math_ops.less(norm_cdf, self._sparsity), np.float32)),
              float(self._spec.nbins)), max_value)

      smoothed_threshold = math_ops.add_n([
          math_ops.multiply(current_threshold, 1 - self._spec.threshold_decay),
          math_ops.multiply(threshold, self._spec.threshold_decay)
      ])
      new_mask = math_ops.cast(
          math_ops.greater(abs_weights, smoothed_threshold), np.float32)
    return smoothed_threshold, new_mask
Exemplo n.º 15
0
def _randomize(coeffs, radixes, seed=None):
  """Applies the Owen randomization to the coefficients."""
  given_dtype = coeffs.dtype
  coeffs = math_ops.to_int32(coeffs)
  num_coeffs = array_ops.shape(coeffs)[-1]
  radixes = array_ops.reshape(math_ops.to_int32(radixes), [-1])
  perms = _get_permutations(num_coeffs, radixes, seed=seed)
  perms = array_ops.reshape(perms, [-1])
  radix_sum = math_ops.reduce_sum(radixes)
  radix_offsets = array_ops.reshape(math_ops.cumsum(radixes, exclusive=True),
                                    [-1, 1])
  offsets = radix_offsets + math_ops.range(num_coeffs) * radix_sum
  permuted_coeffs = array_ops.gather(perms, coeffs + offsets)
  return math_ops.cast(permuted_coeffs, dtype=given_dtype)
Exemplo n.º 16
0
def sparsemax(logits, name=None):
  """Computes sparsemax activations [1].

  For each batch `i` and class `j` we have
    sparsemax[i, j] = max(logits[i, j] - tau(logits[i, :]), 0)

  [1]: https://arxiv.org/abs/1602.02068

  Args:
    logits: A `Tensor`. Must be one of the following types: `half`, `float32`,
      `float64`.
    name: A name for the operation (optional).

  Returns:
    A `Tensor`. Has the same type as `logits`.
  """

  with ops.name_scope(name, "sparsemax", [logits]) as name:
    logits = ops.convert_to_tensor(logits, name="logits")
    obs = array_ops.shape(logits)[0]
    dims = array_ops.shape(logits)[1]

    z = logits - math_ops.reduce_mean(logits, axis=1)[:, array_ops.newaxis]

    # sort z
    z_sorted, _ = nn.top_k(z, k=dims)

    # calculate k(z)
    z_cumsum = math_ops.cumsum(z_sorted, axis=1)
    k = math_ops.range(
        1, math_ops.cast(dims, logits.dtype) + 1, dtype=logits.dtype
    )
    z_check = 1 + k * z_sorted > z_cumsum
    # because the z_check vector is always [1,1,...1,0,0,...0] finding the
    # (index + 1) of the last `1` is the same as just summing the number of 1.
    k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32), axis=1)

    # calculate tau(z)
    indices = array_ops.stack([math_ops.range(0, obs), k_z - 1], axis=1)
    tau_sum = array_ops.gather_nd(z_cumsum, indices)
    tau_z = (tau_sum - 1) / math_ops.cast(k_z, logits.dtype)

    # calculate p
    return math_ops.maximum(
        math_ops.cast(0, logits.dtype),
        z - tau_z[:, array_ops.newaxis]
    )
Exemplo n.º 17
0
    def test_searchsorted(self):
        sorted_inputs = math_ops.cumsum(random_ops.random_uniform([3, 2, 4]),
                                        axis=-1)
        values = random_ops.random_uniform([2, 3], minval=-1, maxval=4.5)

        def loop_fn(i):
            inputs_i = array_ops.gather(sorted_inputs, i)
            return [
                array_ops.searchsorted(inputs_i,
                                       values,
                                       out_type=dtypes.int32,
                                       side="left"),  # creates LowerBound op.
                array_ops.searchsorted(inputs_i,
                                       values,
                                       out_type=dtypes.int64,
                                       side="right")
            ]  # creates UpperBound op.

        self._test_loop_fn(loop_fn, 3)
Exemplo n.º 18
0
def segment_ids_to_row_splits(segment_ids, num_segments=None, name=None):
  """Generates the RaggedTensor `splits` vector corresponding to a segmentation.

  Returns an integer vector `splits`, where `splits[0] = 0` and
  `splits[i] = splits[i-1] + count(segment_ids==i)`.  Example:

  ```python
  >>> ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4]).eval()
  [ 0 3 3 5 6 9 ]
  ```

  Args:
    segment_ids: A 1-D integer Tensor.
    num_segments: A scalar integer indicating the number of segments.  Defaults
      to `max(segment_ids) + 1` (or zero if `segment_ids` is empty).
    name: A name prefix for the returned tensor (optional).

  Returns:
    A sorted 1-D int64 Tensor, with `shape=[num_segments + 1]`.
  """
  with ops.name_scope(name, "SegmentIdsToRaggedSplits", [segment_ids]) as name:
    segment_ids = ragged_util.convert_to_int_tensor(segment_ids, "segment_ids")
    segment_ids.shape.assert_has_rank(1)
    if num_segments is not None:
      num_segments = ragged_util.convert_to_int_tensor(num_segments,
                                                       "num_segments")
      num_segments.shape.assert_has_rank(0)

    row_lengths = math_ops.bincount(
        segment_ids,
        minlength=num_segments,
        maxlength=num_segments,
        dtype=dtypes.int64)
    splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0)

    # Update shape information, if possible.
    if num_segments is not None:
      const_num_segments = tensor_util.constant_value(num_segments)
      if const_num_segments is not None:
        splits.set_shape(tensor_shape.TensorShape([const_num_segments + 1]))

    return splits
Exemplo n.º 19
0
def segment_ids_to_row_splits(segment_ids, num_segments=None, name=None):
  """Generates the RaggedTensor `row_splits` corresponding to a segmentation.

  Returns an integer vector `splits`, where `splits[0] = 0` and
  `splits[i] = splits[i-1] + count(segment_ids==i)`.  Example:

  ```python
  >>> ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4]).eval()
  [ 0 3 3 5 6 9 ]
  ```

  Args:
    segment_ids: A 1-D integer Tensor.
    num_segments: A scalar integer indicating the number of segments.  Defaults
      to `max(segment_ids) + 1` (or zero if `segment_ids` is empty).
    name: A name prefix for the returned tensor (optional).

  Returns:
    A sorted 1-D int64 Tensor, with `shape=[num_segments + 1]`.
  """
  with ops.name_scope(name, "SegmentIdsToRaggedSplits", [segment_ids]) as name:
    segment_ids = ragged_util.convert_to_int_tensor(segment_ids, "segment_ids")
    segment_ids.shape.assert_has_rank(1)
    if num_segments is not None:
      num_segments = ragged_util.convert_to_int_tensor(num_segments,
                                                       "num_segments")
      num_segments.shape.assert_has_rank(0)

    row_lengths = math_ops.bincount(
        segment_ids,
        minlength=num_segments,
        maxlength=num_segments,
        dtype=dtypes.int64)
    splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0)

    # Update shape information, if possible.
    if num_segments is not None:
      const_num_segments = tensor_util.constant_value(num_segments)
      if const_num_segments is not None:
        splits.set_shape(tensor_shape.TensorShape([const_num_segments + 1]))

    return splits
Exemplo n.º 20
0
def safe_cumprod(x, *args, **kwargs):
  """Computes cumprod of x in logspace using cumsum to avoid underflow.

  The cumprod function and its gradient can result in numerical instabilities
  when its argument has very small and/or zero values.  As long as the argument
  is all positive, we can instead compute the cumulative product as
  exp(cumsum(log(x))).  This function can be called identically to tf.cumprod.

  Args:
    x: Tensor to take the cumulative product of.
    *args: Passed on to cumsum; these are identical to those in cumprod.
    **kwargs: Passed on to cumsum; these are identical to those in cumprod.
  Returns:
    Cumulative product of x.
  """
  with ops.name_scope(None, "SafeCumprod", [x]):
    x = ops.convert_to_tensor(x, name="x")
    tiny = np.finfo(x.dtype.as_numpy_dtype).tiny
    return math_ops.exp(math_ops.cumsum(
        math_ops.log(clip_ops.clip_by_value(x, tiny, 1)), *args, **kwargs))
Exemplo n.º 21
0
def safe_cumprod(x, *args, **kwargs):
  """Computes cumprod of x in logspace using cumsum to avoid underflow.

  The cumprod function and its gradient can result in numerical instabilities
  when its argument has very small and/or zero values.  As long as the argument
  is all positive, we can instead compute the cumulative product as
  exp(cumsum(log(x))).  This function can be called identically to tf.cumprod.

  Args:
    x: Tensor to take the cumulative product of.
    *args: Passed on to cumsum; these are identical to those in cumprod.
    **kwargs: Passed on to cumsum; these are identical to those in cumprod.
  Returns:
    Cumulative product of x.
  """
  with ops.name_scope(None, "SafeCumprod", [x]):
    x = ops.convert_to_tensor(x, name="x")
    tiny = np.finfo(x.dtype.as_numpy_dtype).tiny
    return math_ops.exp(math_ops.cumsum(
        math_ops.log(clip_ops.clip_by_value(x, tiny, 1)), *args, **kwargs))
Exemplo n.º 22
0
def from_row_lengths(values, row_lengths, name=None):
  """Creates a `RaggedTensor` with rows partitioned by `row_lengths`.

  The returned `RaggedTensor` corresponds with the python list defined by:

  ```python
  result = [[values.pop(0) for i in range(length)]
            for length in row_lengths]
  ```

  Args:
    values: A potentially ragged tensor with shape `[nvals, ...]`.
    row_lengths: A 1-D int64 tensor with shape `[nrows]`.  Must be nonnegative.
      `sum(row_lengths)` must be `nvals`.
    name: A name prefix for the RaggedTensor (optional).

  Returns:
    A `RaggedTensor`.  `result.rank = values.rank + 1`.
    `result.ragged_rank = values.ragged_rank + 1`.

  #### Example:
    ```python
    >>> rt = ragged.from_row_lengths(
    ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
    ...     row_lengths=[4, 0, 3, 1, 0])
    >>> rt.eval().tolist()
    [[3, 1, 4, 1], [], [5, 9, 2], [6], []]
    ```
  """
  with ops.name_scope(name, 'RaggedFromRowLengths', [values, row_lengths]):
    values = convert_to_tensor_or_ragged_tensor(values, name='values')
    row_lengths = ops.convert_to_tensor(row_lengths, dtypes.int64,
                                        'row_lengths')
    row_lengths.shape.assert_has_rank(1)
    row_limits = math_ops.cumsum(row_lengths)
    row_splits = array_ops.concat([[0], row_limits], axis=0)
    return ragged_tensor.RaggedTensor(
        values=values,
        row_splits=row_splits,
        cached_row_lengths=row_lengths,
        internal=True)
Exemplo n.º 23
0
def _sparsemax(logits, name=None):
    """Computes sparsemax activations [1].

    For each batch `i` and class `j` we have
    sparsemax[i, j] = max(logits[i, j] - tau(logits[i, :]), 0)

    [1]: https://arxiv.org/abs/1602.02068

    :param logits, tensor

    Returns:
    A `Tensor`. Has the same type as `logits`.
    """

    with ops.name_scope(name, "sparsemax", [logits]) as name:
        logits = ops.convert_to_tensor(logits, name="logits")
        obs = logits.shape[0]
        dims = logits.shape[1]

        z = logits - math_ops.reduce_mean(logits, axis=1)[:, array_ops.newaxis]

        # sort z
        z_sorted, _ = nn.top_k(z, k=dims)

        # calculate k(z)
        z_cumsum = math_ops.cumsum(z_sorted, axis=1)
        k = math_ops.range(
            1, math_ops.cast(dims, logits.dtype) + 1, dtype=logits.dtype)
        z_check = 1 + k * z_sorted > z_cumsum
        # because the z_check vector is always [1,1,...1,0,0,...0] finding the
        # (index + 1) of the last `1` is the same as just summing the number of 1.
        k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32), axis=1)

        # calculate tau(z)
        indices = array_ops.stack([math_ops.range(0, obs), k_z - 1], axis=1)
        tau_sum = array_ops.gather_nd(z_cumsum, indices)
        tau_z = (tau_sum - 1) / math_ops.cast(k_z, logits.dtype)

        # calculate p
        return math_ops.maximum(
            math_ops.cast(0, logits.dtype), z - tau_z[:, array_ops.newaxis])
Exemplo n.º 24
0
def compute_cdf_from_histogram(values, value_range, **kwargs):
  """Returns the normalized cumulative distribution of the given values tensor.

  Computes the histogram and uses tf.cumsum to arrive at cdf

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
    **kwargs: keyword arguments: nbins, name

  Returns:
    A 1-D `Tensor` holding normalized cdf of values.

  """
  nbins = kwargs.get('nbins', _NBINS)
  name = kwargs.get('name', None)
  with ops.name_scope(name, 'cdf', [values, value_range, nbins]):
    histogram = _histogram(
        values, value_range, dtype=dtypes.float32, nbins=nbins)
    cdf = math_ops.cumsum(histogram)
    return math_ops.div(cdf, math_ops.reduce_max(cdf))
Exemplo n.º 25
0
def compute_cdf_from_histogram(values, value_range, **kwargs):
  """Returns the normalized cumulative distribution of the given values tensor.

  Computes the histogram and uses tf.cumsum to arrive at cdf

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
    **kwargs: keyword arguments: nbins, name

  Returns:
    A 1-D `Tensor` holding normalized cdf of values.

  """
  nbins = kwargs.get('nbins', _NBINS)
  name = kwargs.get('name', None)
  with ops.name_scope(name, 'cdf', [values, value_range, nbins]):
    histogram = _histogram(
        values, value_range, dtype=dtypes.float32, nbins=nbins)
    cdf = math_ops.cumsum(histogram)
    return math_ops.div(cdf, math_ops.reduce_max(cdf))
Exemplo n.º 26
0
 def testInvalidAxis(self):
   x = np.arange(0, 10).reshape([2, 5]).astype(np.float32)
   input_tensor = ops.convert_to_tensor(x)
   with self.session(use_gpu=True):
     with self.assertRaisesWithPredicateMatch(
         errors_impl.InvalidArgumentError,
         lambda e: "Expected scan axis in the range [-2, 2)" in str(e)):
       math_ops.cumsum(input_tensor, -3).eval()
     with self.assertRaisesWithPredicateMatch(
         errors_impl.InvalidArgumentError,
         lambda e: "Expected scan axis in the range [-2, 2)" in str(e)):
       math_ops.cumsum(input_tensor, 2).eval()
     with self.assertRaisesWithPredicateMatch(
         errors_impl.InvalidArgumentError,
         lambda e: "axis must be a scalar" in str(e)):
       math_ops.cumsum(input_tensor, [0]).eval()
Exemplo n.º 27
0
def from_row_lengths(values, row_lengths, name=None):
    """Creates a `RaggedTensor` with rows partitioned by `row_lengths`.

  The returned `RaggedTensor` corresponds with the python list defined by:

  ```python
  result = [[values.pop(0) for i in range(length)]
            for length in row_lengths]
  ```

  Args:
    values: A potentially ragged tensor with shape `[nvals, ...]`.
    row_lengths: A 1-D int64 tensor with shape `[nrows]`.  Must be nonnegative.
      `sum(row_lengths)` must be `nvals`.
    name: A name prefix for the RaggedTensor (optional).

  Returns:
    A `RaggedTensor`.  `result.rank = values.rank + 1`.
    `result.ragged_rank = values.ragged_rank + 1`.

  #### Example:
    ```python
    >>> rt = ragged.from_row_lengths(
    ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
    ...     row_lengths=[4, 0, 3, 1, 0])
    >>> rt.eval().tolist()
    [[3, 1, 4, 1], [], [5, 9, 2], [6], []]
    ```
  """
    with ops.name_scope(name, 'RaggedFromRowLengths', [values, row_lengths]):
        values = convert_to_tensor_or_ragged_tensor(values, name='values')
        row_lengths = ops.convert_to_tensor(row_lengths, dtypes.int64,
                                            'row_lengths')
        row_lengths.shape.assert_has_rank(1)
        row_limits = math_ops.cumsum(row_lengths)
        row_splits = array_ops.concat([[0], row_limits], axis=0)
        return ragged_tensor.RaggedTensor(values=values,
                                          row_splits=row_splits,
                                          cached_row_lengths=row_lengths,
                                          internal=True)
Exemplo n.º 28
0
 def testInvalidAxis(self):
     x = np.arange(0, 10).reshape([2, 5]).astype(np.float32)
     input_tensor = ops.convert_to_tensor(x)
     with self.session():
         with self.assertRaisesWithPredicateMatch(
                 errors_impl.InvalidArgumentError, lambda e:
                 "Expected scan axis in the range [-2, 2)" in str(e)):
             math_ops.cumsum(input_tensor, -3).eval()
         with self.assertRaisesWithPredicateMatch(
                 errors_impl.InvalidArgumentError, lambda e:
                 "Expected scan axis in the range [-2, 2)" in str(e)):
             math_ops.cumsum(input_tensor, 2).eval()
         with self.assertRaisesWithPredicateMatch(
                 errors_impl.InvalidArgumentError,
                 lambda e: "axis must be a scalar" in str(e)):
             math_ops.cumsum(input_tensor, [0]).eval()
Exemplo n.º 29
0
def kaiser_bessel_derived_window(window_length, beta=12.,
                                 dtype=dtypes.float32, name=None):
  """Generate a [Kaiser Bessel derived window][kbd].

  Args:
    window_length: A scalar `Tensor` indicating the window length to generate.
    beta: Beta parameter for Kaiser window.
    dtype: The data type to produce. Must be a floating point type.
    name: An optional name for the operation.

  Returns:
    A `Tensor` of shape `[window_length]` of type `dtype`.

  [kbd]:
    https://en.wikipedia.org/wiki/Kaiser_window#Kaiser%E2%80%93Bessel-derived_(KBD)_window
  """
  with ops.name_scope(name, 'kaiser_bessel_derived_window'):
    window_length = _check_params(window_length, dtype)
    halflen = window_length // 2
    kaiserw = kaiser_window(halflen + 1, beta, dtype=dtype)
    kaiserw_csum = math_ops.cumsum(kaiserw)
    halfw = math_ops.sqrt(kaiserw_csum[:-1] / kaiserw_csum[-1])
    window = array_ops.concat((halfw, halfw[::-1]), axis=0)
  return window
Exemplo n.º 30
0
    def sample(self, time, outputs, state, name=None):
        """sample for SampleEmbeddingHelper."""
        del time, state  # unused by sample_fn
        # Outputs are logits, we sample instead of argmax (greedy).
        if not isinstance(outputs, ops.Tensor):
            raise TypeError("Expected outputs to be a single Tensor, got: %s" %
                            type(outputs))

        probs = nn_ops.softmax(outputs, -1)
        sorted_args = sort_ops.argsort(probs, -1, direction='DESCENDING')
        sorted_nucleus_probs = math_ops.cumsum(
            sort_ops.sort(probs, -1, direction='DESCENDING'),
            -1) < self._nucleus
        nucleus_probs = array_ops.gather(sorted_nucleus_probs,
                                         sort_ops.argsort(
                                             sorted_args,
                                             -1,
                                             direction='ASCENDING'),
                                         batch_dims=1)
        argmax_probs = array_ops.one_hot(math_ops.argmax(outputs, -1),
                                         depth=array_ops.shape(outputs)[-1],
                                         on_value=True,
                                         off_value=False,
                                         dtype=dtypes.bool)
        outputs = array_ops.where(
            (nucleus_probs | argmax_probs), outputs,
            -np.inf * array_ops.ones_like(outputs, dtype=dtypes.float32))

        if self._softmax_temperature is None:
            logits = outputs
        else:
            logits = outputs / self._softmax_temperature

        sample_ids = categorical_sample(logits=logits, seed=self._seed)

        return sample_ids
Exemplo n.º 31
0
        def prop_raw(x):
            obs = array_ops.shape(x)[0]
            dim = array_ops.shape(x)[1]

            z = x - math_ops.reduce_mean(x, axis=1)[:, array_ops.newaxis]

            z_sorted, _ = nn.top_k(z, k=dim)

            z_cumsum = math_ops.cumsum(z_sorted, axis=1)
            k = math_ops.range(1,
                               math_ops.cast(dim, x.dtype) + 1,
                               dtype=x.dtype)
            z_check = 1 + k * z_sorted > z_cumsum

            k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32),
                                      axis=1)

            indices = array_ops.stack([math_ops.range(0, obs), k_z - 1],
                                      axis=1)
            tau_sum = array_ops.gather_nd(z_cumsum, indices)
            tau_z = (tau_sum - 1) / math_ops.cast(k_z, x.dtype)

            return math_ops.maximum(math_ops.cast(0, x.dtype),
                                    z - tau_z[:, array_ops.newaxis])
Exemplo n.º 32
0
def monotonic_attention(p_choose_i, previous_attention, mode):
  """Compute monotonic attention distribution from choosing probabilities.

  Monotonic attention implies that the input sequence is processed in an
  explicitly left-to-right manner when generating the output sequence.  In
  addition, once an input sequence element is attended to at a given output
  timestep, elements occurring before it cannot be attended to at subsequent
  output timesteps.  This function generates attention distributions according
  to these assumptions.  For more information, see ``Online and Linear-Time
  Attention by Enforcing Monotonic Alignments''.

  Args:
    p_choose_i: Probability of choosing input sequence/memory element i.  Should
      be of shape (batch_size, input_sequence_length), and should all be in the
      range [0, 1].
    previous_attention: The attention distribution from the previous output
      timestep.  Should be of shape (batch_size, input_sequence_length).  For
      the first output timestep, preevious_attention[n] should be [1, 0, 0, ...,
      0] for all n in [0, ... batch_size - 1].
    mode: How to compute the attention distribution.  Must be one of
      'recursive', 'parallel', or 'hard'.
        * 'recursive' uses tf.scan to recursively compute the distribution.
          This is slowest but is exact, general, and does not suffer from
          numerical instabilities.
        * 'parallel' uses parallelized cumulative-sum and cumulative-product
          operations to compute a closed-form solution to the recurrence
          relation defining the attention distribution.  This makes it more
          efficient than 'recursive', but it requires numerical checks which
          make the distribution non-exact.  This can be a problem in particular
          when input_sequence_length is long and/or p_choose_i has entries very
          close to 0 or 1.
        * 'hard' requires that the probabilities in p_choose_i are all either 0
          or 1, and subsequently uses a more efficient and exact solution.

  Returns:
    A tensor of shape (batch_size, input_sequence_length) representing the
    attention distributions for each sequence in the batch.

  Raises:
    ValueError: mode is not one of 'recursive', 'parallel', 'hard'.
  """
  # Force things to be tensors
  p_choose_i = ops.convert_to_tensor(p_choose_i, name="p_choose_i")
  previous_attention = ops.convert_to_tensor(
      previous_attention, name="previous_attention")
  if mode == "recursive":
    # Use .shape[0].value when it's not None, or fall back on symbolic shape
    batch_size = p_choose_i.shape[0].value or array_ops.shape(p_choose_i)[0]
    # Compute [1, 1 - p_choose_i[0], 1 - p_choose_i[1], ..., 1 - p_choose_i[-2]]
    shifted_1mp_choose_i = array_ops.concat(
        [array_ops.ones((batch_size, 1)), 1 - p_choose_i[:, :-1]], 1)
    # Compute attention distribution recursively as
    # q[i] = (1 - p_choose_i[i])*q[i - 1] + previous_attention[i]
    # attention[i] = p_choose_i[i]*q[i]
    attention = p_choose_i*array_ops.transpose(functional_ops.scan(
        # Need to use reshape to remind TF of the shape between loop iterations
        lambda x, yz: array_ops.reshape(yz[0]*x + yz[1], (batch_size,)),
        # Loop variables yz[0] and yz[1]
        [array_ops.transpose(shifted_1mp_choose_i),
         array_ops.transpose(previous_attention)],
        # Initial value of x is just zeros
        array_ops.zeros((batch_size,))))
  elif mode == "parallel":
    # safe_cumprod computes cumprod in logspace with numeric checks
    cumprod_1mp_choose_i = safe_cumprod(1 - p_choose_i, axis=1, exclusive=True)
    # Compute recurrence relation solution
    attention = p_choose_i*cumprod_1mp_choose_i*math_ops.cumsum(
        previous_attention /
        # Clip cumprod_1mp to avoid divide-by-zero
        clip_ops.clip_by_value(cumprod_1mp_choose_i, 1e-10, 1.), axis=1)
  elif mode == "hard":
    # Remove any probabilities before the index chosen last time step
    p_choose_i *= math_ops.cumsum(previous_attention, axis=1)
    # Now, use exclusive cumprod to remove probabilities after the first
    # chosen index, like so:
    # p_choose_i = [0, 0, 0, 1, 1, 0, 1, 1]
    # cumprod(1 - p_choose_i, exclusive=True) = [1, 1, 1, 1, 0, 0, 0, 0]
    # Product of above: [0, 0, 0, 1, 0, 0, 0, 0]
    attention = p_choose_i*math_ops.cumprod(
        1 - p_choose_i, axis=1, exclusive=True)
  else:
    raise ValueError("mode must be 'recursive', 'parallel', or 'hard'.")
  return attention
Exemplo n.º 33
0
def lengths_to_splits(lengths):
    """Returns splits corresponding to the given lengths."""
    return array_ops.concat([[0], math_ops.cumsum(lengths)], axis=-1)
Exemplo n.º 34
0
def split(value: ragged_tensor.Ragged,
          num_or_size_splits,
          axis=0,
          num=None,
          name=None):
  """Splits a RaggedTensor `value` into a list of sub RaggedTensors.

  If `num_or_size_splits` is an `int`,  then it splits `value` along the
  dimension `axis` into `num_or_size_splits` smaller RaggedTensors. This
  requires that `value.shape[axis]` is divisible by `num_or_size_splits`.

  If `num_or_size_splits` is a 1-D Tensor (or list), then `value` is split into
  `len(num_or_size_splits)` elements. The shape of the `i`-th element has the
  same size as the `value` except along dimension `axis` where the size is
  `num_or_size_splits[i]`.

  Splits along a ragged dimension is not allowed.

  For example:

  >>> rt = tf.RaggedTensor.from_row_lengths(
  ...      np.arange(6 * 3).reshape(6, 3), row_lengths=[1, 2, 2, 1])
  >>> rt.shape
  TensorShape([4, None, 3])
  >>>
  >>> rt1, rt2 = tf.split(rt, 2)  # uniform splits
  >>> rt1.shape
  TensorShape([2, None, 3])
  >>> rt2.shape
  TensorShape([2, None, 3])
  >>>
  >>> rt3, rt4, rt5 = tf.split(rt, [1, 2, 1])  # ragged splits
  >>> rt3.shape
  TensorShape([1, None, 3])
  >>> rt4.shape
  TensorShape([2, None, 3])
  >>> rt5.shape
  TensorShape([1, None, 3])
  >>>
  >>> rt6, rt7 = tf.split(rt, [1, 2], axis=2)  # splits along axis 2
  >>> rt6.shape
  TensorShape([4, None, 1])
  >>> rt7.shape
  TensorShape([4, None, 2])

  Args:
    value: The `RaggedTensor` to split.
    num_or_size_splits: Either an `int` indicating the number of splits
      along `axis` or a 1-D integer `Tensor` or Python list containing the sizes
      of each output tensor along `axis`. If a Python int, then it must evenly
      divide `value.shape[axis]`; otherwise the sum of sizes along the split
      axis must match that of the `value`.
    axis: An `int` or scalar `int32` `Tensor`. The dimension along which
      to split. Must be in the range `[-rank(value), rank(value))`. Defaults to
      0.
    num: An `int` used to specify the number of outputs when
      `num_or_size_splits` is a 1-D list or `Tensor` and its length is
      statically unknown, e.g., specifying `tf.TensorSepc(None)` with
      the `input_signature` argument of `tf.function` (optional).
    name: A name for the operation (optional).

  Returns:
    if `num_or_size_splits` is an `int` returns a list of `num_or_size_splits`
    `RaggedTensor` objects; if `num_or_size_splits` is a 1-D Tensor returns
    `num_or_size_splits.get_shape[0]` `RaggedTensor` objects resulting from
    splitting `value`.

  Raises:
    ValueError: If the dimension `axis` of `value` is a ragged dimension.
    ValueError: If `num` is unspecified and cannot be inferred.
    ValueError: If `num` is specified but doesn't match the length of
      `num_or_size_splits`.
    ValueError: If `num_or_size_splits` is an `int` and less than 1.
    TypeError: If `num_or_size_splits` is not an `int` or 1-D
      list or 1-D `Tensor`.
    InvalidArgumentError: If the `axis` of `value` cannot be exactly splitted
      by `num_or_size_splits`.
    InvalidArgumentError: If `num_or_size_splits` is contains negative integers.
    InvalidArgumentError: If `num_or_size_splits`'s static shape is unknown and
      its dynamic shape is inconsistent `num`.
    InvalidArgumentError: If `num_or_size_splits`'s static rank is unknown and
      `axis` is a negative integer.
  """
  with ops.name_scope(name, 'RaggedSplit'):
    value = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        value, name='value')
    if isinstance(num_or_size_splits, int) and num_or_size_splits == 1:
      return [value]

    # static assert
    check_ops.assert_integer_v2(
        num_or_size_splits,
        message=('`num_or_size_splits` must be an `int` or 1-D list or '
                 '`Tensor` of integers.'))
    value_shape = ragged_shape.RaggedShape.from_tensor(value)
    axis = array_ops.get_positive_axis(axis, value_shape.rank)
    try:
      dim_size = value_shape[axis]
    except ValueError:
      raise ValueError('Cannot split a ragged dimension. Got `value` with '
                       f'shape {value_shape} and `axis` {axis}.')
    if isinstance(num_or_size_splits, int):
      # Uniform split
      num_splits = num_or_size_splits
      if num_splits < 1:
        raise ValueError('`num_or_size_splits` must be >=1 if it is an `int`.'
                         f'Received {num_or_size_splits}.')
      split_length = math_ops.floordiv(dim_size, num_splits)
      split_lengths = array_ops.repeat(split_length, num_splits)
    else:
      # Ragged split
      num_splits = None
      split_lengths = ops.convert_to_tensor(num_or_size_splits)
      if split_lengths.shape.ndims is not None:
        if split_lengths.shape.ndims != 1:
          raise TypeError('`num_or_size_splits` must be an `int` or 1-D list '
                          f'or `Tensor`. Received {num_or_size_splits}.')
        num_splits = tensor_shape.dimension_value(split_lengths.shape[0])

      if num_splits is None:
        if num is None:
          raise ValueError('`num` must be specified as an `int` when the '
                           'size of `num_or_size_split` is statically '
                           f'unknown. Received `num`: {num} and '
                           f'`num_or_size_split`: {num_or_size_splits}.')
        num_splits = num
      else:
        if num is not None and num != num_splits:
          raise ValueError('`num` does not match the size of '
                           f'`num_or_size_split`. Received `num`: {num} and '
                           f'size of `num_or_size_split`: {num_splits}.')

    splits = array_ops.concat([[0], math_ops.cumsum(split_lengths)], axis=0)
    checks = []
    checks.append(
        check_ops.assert_non_negative_v2(
            num_or_size_splits,
            message='`num_or_size_splits` must be non-negative.'))
    checks.append(
        check_ops.assert_equal_v2(
            num_splits,
            array_ops.shape(split_lengths)[0],
            message='`num` is inconsistent with `num_or_size_split.shape[0]`.'))
    checks.append(
        check_ops.assert_equal_v2(
            math_ops.cast(dim_size, splits.dtype),
            splits[-1],
            message=('Cannot exactly split the `axis` dimension of `value` '
                     'with the given `num_or_size_split`.')))
    splits = control_flow_ops.with_dependencies(checks, splits)
    splited_rts = []
    slices = [slice(None)] * (axis + 1)
    for i in range(num_splits):
      slices[-1] = slice(splits[i], splits[i + 1])
      splited_rts.append(value[tuple(slices)])
    return splited_rts
Exemplo n.º 35
0
def _update_confusion_matrix_variables_optimized(
    variables_to_update,
    y_true,
    y_pred,
    thresholds,
    multi_label=False,
    sample_weights=None,
    label_weights=None,
    thresholds_with_epsilon=False):
  """Update confusion matrix variables with memory efficient alternative.

  Note that the thresholds need to be evenly distributed within the list, eg,
  the diff between consecutive elements are the same.

  To compute TP/FP/TN/FN, we are measuring a binary classifier
    C(t) = (predictions >= t)
  at each threshold 't'. So we have
    TP(t) = sum( C(t) * true_labels )
    FP(t) = sum( C(t) * false_labels )

  But, computing C(t) requires computation for each t. To make it fast,
  observe that C(t) is a cumulative integral, and so if we have
    thresholds = [t_0, ..., t_{n-1}];  t_0 < ... < t_{n-1}
  where n = num_thresholds, and if we can compute the bucket function
    B(i) = Sum( (predictions == t), t_i <= t < t{i+1} )
  then we get
    C(t_i) = sum( B(j), j >= i )
  which is the reversed cumulative sum in tf.cumsum().

  We can compute B(i) efficiently by taking advantage of the fact that
  our thresholds are evenly distributed, in that
    width = 1.0 / (num_thresholds - 1)
    thresholds = [0.0, 1*width, 2*width, 3*width, ..., 1.0]
  Given a prediction value p, we can map it to its bucket by
    bucket_index(p) = floor( p * (num_thresholds - 1) )
  so we can use tf.math.unsorted_segment_sum() to update the buckets in one
  pass.

  Consider following example:
  y_true = [0, 0, 1, 1]
  y_pred = [0.1, 0.5, 0.3, 0.9]
  thresholds = [0.0, 0.5, 1.0]
  num_buckets = 2   # [0.0, 1.0], (1.0, 2.0]
  bucket_index(y_pred) = tf.math.floor(y_pred * num_buckets)
                       = tf.math.floor([0.2, 1.0, 0.6, 1.8])
                       = [0, 0, 0, 1]
  # The meaning of this bucket is that if any of the label is true,
  # then 1 will be added to the corresponding bucket with the index.
  # Eg, if the label for 0.2 is true, then 1 will be added to bucket 0. If the
  # label for 1.8 is true, then 1 will be added to bucket 1.
  #
  # Note the second item "1.0" is floored to 0, since the value need to be
  # strictly larger than the bucket lower bound.
  # In the implementation, we use tf.math.ceil() - 1 to achieve this.
  tp_bucket_value = tf.math.unsorted_segment_sum(true_labels, bucket_indices,
                                                 num_segments=num_thresholds)
                  = [1, 1, 0]
  # For [1, 1, 0] here, it means there is 1 true value contributed by bucket 0,
  # and 1 value contributed by bucket 1. When we aggregate them to together,
  # the result become [a + b + c, b + c, c], since large thresholds will always
  # contribute to the value for smaller thresholds.
  true_positive = tf.math.cumsum(tp_bucket_value, reverse=True)
                = [2, 1, 0]

  This implementation exhibits a run time and space complexity of O(T + N),
  where T is the number of thresholds and N is the size of predictions.
  Metrics that rely on standard implementation instead exhibit a complexity of
  O(T * N).

  Args:
    variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys
      and corresponding variables to update as values.
    y_true: A floating point `Tensor` whose shape matches `y_pred`. Will be cast
      to `bool`.
    y_pred: A floating point `Tensor` of arbitrary shape and whose values are in
      the range `[0, 1]`.
    thresholds: A sorted floating point `Tensor` with value in `[0, 1]`.
      It need to be evenly distributed (the diff between each element need to be
      the same).
    multi_label: Optional boolean indicating whether multidimensional
      prediction/labels should be treated as multilabel responses, or flattened
      into a single label. When True, the valus of `variables_to_update` must
      have a second dimension equal to the number of labels in y_true and
      y_pred, and those tensors must not be RaggedTensors.
    sample_weights: Optional `Tensor` whose rank is either 0, or the same rank
      as `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions
      must be either `1`, or the same as the corresponding `y_true` dimension).
    label_weights: Optional tensor of non-negative weights for multilabel
      data. The weights are applied when calculating TP, FP, FN, and TN without
      explicit multilabel handling (i.e. when the data is to be flattened).
    thresholds_with_epsilon: Optional boolean indicating whether the leading and
      tailing thresholds has any epsilon added for floating point imprecisions.
      It will change how we handle the leading and tailing bucket.

  Returns:
    Update op.
  """
  num_thresholds = thresholds.shape.as_list()[0]

  if sample_weights is None:
    sample_weights = 1.0
  else:
    sample_weights = weights_broadcast_ops.broadcast_weights(
        math_ops.cast(sample_weights, dtype=y_pred.dtype), y_pred)
    if not multi_label:
      sample_weights = array_ops.reshape(sample_weights, [-1])
  if label_weights is None:
    label_weights = 1.0
  else:
    label_weights = array_ops.expand_dims(label_weights, 0)
    label_weights = weights_broadcast_ops.broadcast_weights(label_weights,
                                                            y_pred)
    if not multi_label:
      label_weights = array_ops.reshape(label_weights, [-1])
  weights = math_ops.multiply(sample_weights, label_weights)

  # We shouldn't need this, but in case there are predict value that is out of
  # the range of [0.0, 1.0]
  y_pred = clip_ops.clip_by_value(y_pred,
                                  clip_value_min=0.0, clip_value_max=1.0)

  y_true = math_ops.cast(math_ops.cast(y_true, dtypes.bool), y_true.dtype)
  if not multi_label:
    y_true = array_ops.reshape(y_true, [-1])
    y_pred = array_ops.reshape(y_pred, [-1])

  true_labels = math_ops.multiply(y_true, weights)
  false_labels = math_ops.multiply((1.0 - y_true), weights)

  # Compute the bucket indices for each prediction value.
  # Since the predict value has to be strictly greater than the thresholds,
  # eg, buckets like [0, 0.5], (0.5, 1], and 0.5 belongs to first bucket.
  # We have to use math.ceil(val) - 1 for the bucket.
  bucket_indices = math_ops.ceil(y_pred * (num_thresholds - 1)) - 1

  if thresholds_with_epsilon:
    # In this case, the first bucket should actually take into account since
    # the any prediction between [0.0, 1.0] should be larger than the first
    # threshold. We change the bucket value from -1 to 0.
    bucket_indices = nn_ops.relu(bucket_indices)

  bucket_indices = math_ops.cast(bucket_indices, dtypes.int32)

  if multi_label:
    # We need to run bucket segment sum for each of the label class. In the
    # multi_label case, the rank of the label is 2. We first transpose it so
    # that the label dim becomes the first and we can parallel run though them.
    true_labels = array_ops.transpose_v2(true_labels)
    false_labels = array_ops.transpose_v2(false_labels)
    bucket_indices = array_ops.transpose_v2(bucket_indices)

    def gather_bucket(label_and_bucket_index):
      label, bucket_index = label_and_bucket_index[0], label_and_bucket_index[1]
      return math_ops.unsorted_segment_sum(
          data=label, segment_ids=bucket_index, num_segments=num_thresholds)
    tp_bucket_v = vectorized_map(
        gather_bucket, (true_labels, bucket_indices))
    fp_bucket_v = vectorized_map(
        gather_bucket, (false_labels, bucket_indices))
    tp = array_ops.transpose_v2(
        math_ops.cumsum(tp_bucket_v, reverse=True, axis=1))
    fp = array_ops.transpose_v2(
        math_ops.cumsum(fp_bucket_v, reverse=True, axis=1))
  else:
    tp_bucket_v = math_ops.unsorted_segment_sum(
        data=true_labels, segment_ids=bucket_indices,
        num_segments=num_thresholds)
    fp_bucket_v = math_ops.unsorted_segment_sum(
        data=false_labels, segment_ids=bucket_indices,
        num_segments=num_thresholds)
    tp = math_ops.cumsum(tp_bucket_v, reverse=True)
    fp = math_ops.cumsum(fp_bucket_v, reverse=True)

  # fn = sum(true_labels) - tp
  # tn = sum(false_labels) - fp
  if (ConfusionMatrix.TRUE_NEGATIVES in variables_to_update or
      ConfusionMatrix.FALSE_NEGATIVES in variables_to_update):
    if multi_label:
      total_true_labels = math_ops.reduce_sum(true_labels, axis=1)
      total_false_labels = math_ops.reduce_sum(false_labels, axis=1)
    else:
      total_true_labels = math_ops.reduce_sum(true_labels)
      total_false_labels = math_ops.reduce_sum(false_labels)

  update_ops = []
  if ConfusionMatrix.TRUE_POSITIVES in variables_to_update:
    variable = variables_to_update[ConfusionMatrix.TRUE_POSITIVES]
    update_ops.append(variable.assign_add(tp))
  if ConfusionMatrix.FALSE_POSITIVES in variables_to_update:
    variable = variables_to_update[ConfusionMatrix.FALSE_POSITIVES]
    update_ops.append(variable.assign_add(fp))
  if ConfusionMatrix.TRUE_NEGATIVES in variables_to_update:
    variable = variables_to_update[ConfusionMatrix.TRUE_NEGATIVES]
    tn = total_false_labels - fp
    update_ops.append(variable.assign_add(tn))
  if ConfusionMatrix.FALSE_NEGATIVES in variables_to_update:
    variable = variables_to_update[ConfusionMatrix.FALSE_NEGATIVES]
    fn = total_true_labels - tp
    update_ops.append(variable.assign_add(fn))
  return control_flow_ops.group(update_ops)
Exemplo n.º 36
0
def segment_ids_to_row_splits(segment_ids,
                              num_segments=None,
                              out_type=None,
                              name=None):
    """Generates the RaggedTensor `row_splits` corresponding to a segmentation.

  Returns an integer vector `splits`, where `splits[0] = 0` and
  `splits[i] = splits[i-1] + count(segment_ids==i)`.  Example:

  >>> print(tf.ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4]))
  tf.Tensor([0 3 3 5 6 9], shape=(6,), dtype=int64)

  Args:
    segment_ids: A 1-D integer Tensor.
    num_segments: A scalar integer indicating the number of segments.  Defaults
      to `max(segment_ids) + 1` (or zero if `segment_ids` is empty).
    out_type: The dtype for the return value.  Defaults to `segment_ids.dtype`,
      or `tf.int64` if `segment_ids` does not have a dtype.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A sorted 1-D integer Tensor, with `shape=[num_segments + 1]`.
  """
    # Local import bincount_ops to avoid import-cycle.
    from tensorflow.python.ops import bincount_ops  # pylint: disable=g-import-not-at-top
    if out_type is None:
        if isinstance(segment_ids, ops.Tensor):
            out_type = segment_ids.dtype
        elif isinstance(num_segments, ops.Tensor):
            out_type = num_segments.dtype
        else:
            out_type = dtypes.int64
    else:
        out_type = dtypes.as_dtype(out_type)
    with ops.name_scope(name, "SegmentIdsToRaggedSplits",
                        [segment_ids]) as name:
        # Note: we cast int64 tensors to int32, since bincount currently only
        # supports int32 inputs.
        segment_ids = ragged_util.convert_to_int_tensor(segment_ids,
                                                        "segment_ids",
                                                        dtype=dtypes.int32)
        segment_ids.shape.assert_has_rank(1)
        if num_segments is not None:
            num_segments = ragged_util.convert_to_int_tensor(
                num_segments, "num_segments", dtype=dtypes.int32)
            num_segments.shape.assert_has_rank(0)

        row_lengths = bincount_ops.bincount(segment_ids,
                                            minlength=num_segments,
                                            maxlength=num_segments,
                                            dtype=out_type)
        splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0)

        # Update shape information, if possible.
        if num_segments is not None:
            const_num_segments = tensor_util.constant_value(num_segments)
            if const_num_segments is not None:
                splits.set_shape(
                    tensor_shape.TensorShape([const_num_segments + 1]))

        return splits
def _strict_1d_cumsum(tensor, len_tensor):
    """Cumsum of a 1D tensor with defined shape by padding and convolving."""
    # Assumes tensor shape is fully defined.
    return math_ops.cumsum(tensor)[:len_tensor]
Exemplo n.º 38
0
def _CumsumGrad(op, grad):
  axis = op.inputs[1]
  exclusive = op.get_attr("exclusive")
  reverse = op.get_attr("reverse")
  return [math_ops.cumsum(grad, axis, exclusive=exclusive,
                          reverse=not reverse), None]
Exemplo n.º 39
0
def from_tensor(tensor, lengths=None, padding=None, ragged_rank=1, name=None):
    """Converts a `Tensor` into a `RaggedTensor`.

  The set of absent/default values may be specified using a vector of lengths
  or a padding value (but not both).  If `lengths` is specified, then the
  output tensor will satisfy `output[row] = tensor[row][:lengths[row]]`.
  If `padding` is specified, then any row *suffix* consisting entirely of
  `padding` will be excluded from the returned `RaggedTensor`.  If neither
  `lengths` nor `padding` is specified, then the returned `RaggedTensor` will
  have no absent/default values.

  Examples:

  ```python
  >>> dt = tf.constant([[5, 7, 0], [0, 3, 0], [6, 0, 0]])
  >>> ragged.from_tensor(dt).eval().tolist()
  [[5, 7, 0], [0, 3, 0], [6, 0, 0]]
  >>> ragged.from_tensor(dt, lengths=[2, 0, 3]).eval().tolist()
  [[5, 7], [], [6, 0, 0]]
  >>> ragged.from_tensor(dt, padding=0).eval().tolist()
  [[5, 7], [0, 3], [6]]
  ```

  Args:
    tensor: The `Tensor` to convert.  Must have rank `ragged_rank + 1` or
      higher.
    lengths: An optional set of row lengths, specified using a 1-D integer
      `Tensor` whose length is equal to `tensor.shape[0]` (the number of rows in
      `tensor`).  If specified, then `output[row]` will contain
      `tensor[row][:lengths[row]]`.  Negative lengths are treated as zero.
    padding: An optional padding value.  If specified, then any row suffix
      consisting entirely of `padding` will be excluded from the returned
      RaggedTensor.  `padding` is a `Tensor` with the same dtype as `tensor`
      and with `shape=tensor.shape[ragged_rank + 1:]`.
    ragged_rank: Integer specifying the ragged rank for the returned
      `RaggedTensor`.  Must be greater than zero.
    name: A name prefix for the returned tensors (optional).

  Returns:
    A `RaggedTensor` with the specified `ragged_rank`.  The shape of the
    returned ragged tensor is compatible with the shape of `tensor`.
  Raises:
    ValueError: If both `lengths` and `padding` are specified.
  """
    if lengths is not None and padding is not None:
        raise ValueError('Specify lengths or padding, but not both')
    if not isinstance(ragged_rank, int):
        raise TypeError('ragged_rank expected int, got %r' % ragged_rank)
    if ragged_rank <= 0:
        raise ValueError('ragged_rank must be greater than 0; got %s' %
                         ragged_rank)

    with ops.name_scope(name, 'RaggedFromTensor', [tensor, lengths, padding]):
        tensor = ops.convert_to_tensor(tensor, name='tensor')
        tensor.shape.with_rank_at_least(ragged_rank + 1)
        input_shape = array_ops.shape(tensor, out_type=dtypes.int64)
        ncols = input_shape[1]

        # Handle ragged_rank>1 via recursion:
        # If the output should have multiple ragged dimensions, then first
        # flatten the tensor to eliminate all but the last ragged dimension,
        # and recursively convert that flattened tensor.  Then add on the splits
        # for the dimensions that we flattened out.
        if ragged_rank > 1:
            # Flatten `tensor` to eliminate all but the last ragged dimension.
            new_shape = array_ops.concat([
                constant_op.constant([-1], dtypes.int64),
                input_shape[ragged_rank:]
            ],
                                         axis=0)
            flattened = array_ops.reshape(tensor, new_shape)
            # Recursively convert the flattened tensor.
            values = from_tensor(flattened, lengths, padding)
            # The total number of elements in each  dimension.  E.g., if
            # input_shape=[3, 4, 5, 6], then dim[2] has 3*4*5 elements in total.
            dim_size = math_ops.cumprod(input_shape)
            # Construct splits tensors for the dimensions that were flattened.
            new_splits = [
                math_ops.range(0, dim_size[dim - 1] + 1) * input_shape[dim]
                for dim in range(1, ragged_rank)
            ]
            return ragged_factory_ops.from_nested_row_splits(
                values, new_splits)

        # If padding was specified, then use it to find row lengths.
        if padding is not None:
            padding = ops.convert_to_tensor(padding,
                                            name='padding',
                                            dtype=tensor.dtype)
            padding.shape.assert_is_compatible_with(tensor.shape[2:])

            # Find places where the padding is equal to the tensor.  (This will
            # broadcast `padding` across the outermost 2 dimensions of `tensor`,
            # so `has_default_value.shape = tensor.shape`.)
            has_default_value = math_ops.equal(padding, tensor)

            # If the padding isn't a scalar, then require that all values in the
            # padding match each item in the tensor.  After this block of code,
            # `has_default.shape = tensor.shape[:2]`.  (Unfortunately, we can't just
            # use reduce_all for both cases, becaue when you pass an empty `axis`
            # list to reduce_all, it reduces all axes; but we want it to reduce no
            # axes -- i.e., to be a no-op.)
            tensor_rank = array_ops.rank(tensor)
            reduce_axis = math_ops.range(2, tensor_rank)
            has_default = control_flow_ops.cond(
                tensor_rank > 2, lambda: math_ops.reduce_all(has_default_value,
                                                             axis=reduce_axis),
                lambda: has_default_value)
            has_default.set_shape(tensor_shape.TensorShape([None, None]))
            has_default.set_shape(tensor.shape[:2])

            # Use has_default it to find the length of each row: for each non-default
            # item in a row, calculate the length that the row needs to have to
            # include that item; and then take the max of those values (across each
            # row).
            has_nondefault = math_ops.logical_not(has_default)
            has_nondefault = math_ops.cast(has_nondefault, dtypes.int64)
            length_for_nondefault_value = (
                has_nondefault *
                array_ops.expand_dims(math_ops.range(1, ncols + 1), 0))
            lengths = math_ops.reduce_max(length_for_nondefault_value, axis=1)

        # If we have lengths (either directly supplied, or computed from paddings),
        # then use those to construct splits; and then use masking to get the
        # corresponding values.
        if lengths is not None:
            lengths = ragged_util.convert_to_int_tensor(
                lengths, 'lengths', dtypes.int64)
            lengths.shape.assert_has_rank(1)
            lengths = math_ops.minimum(lengths, ncols)
            lengths = math_ops.maximum(lengths, 0)
            limits = math_ops.cumsum(lengths)
            splits = array_ops.concat(
                [array_ops.zeros([1], dtypes.int64), limits], axis=0)
            mask = array_ops.sequence_mask(lengths, maxlen=ncols)
            values = array_ops.boolean_mask(tensor, mask)
            return ragged_factory_ops.from_row_splits(values, splits)

        # If neither padding nor lengths were specified, then create a splits
        # vector that contains no default values, and reshape the input tensor
        # to form the values for the RaggedTensor.
        nrows = input_shape[0]
        nvals = nrows * ncols
        splits = math_ops.range(nrows + 1) * ncols
        values_shape = array_ops.concat([[nvals], input_shape[2:]], axis=0)
        values = array_ops.reshape(tensor, values_shape)
        return ragged_factory_ops.from_row_splits(values, splits)
Exemplo n.º 40
0
def from_value_rowids(values, value_rowids, nrows=None, name=None):
  """Creates a `RaggedTensor` with rows partitioned by `value_rowids`.

  The returned `RaggedTensor` corresponds with the python list defined by:

  ```python
  result = [[values[i] for i in range(len(values)) if value_rowids[i] == row]
            for row in range(nrows)]
  ```

  Warning: currently, this needs to cast value_rowids to int64 before
  converting, since `tf.bincount` only supports `int32`.

  Args:
    values: A potentially ragged tensor with shape `[nvals, ...]`.
    value_rowids: A 1-D int64 tensor with shape `[nvals]`, which corresponds
      one-to-one with `values`, and specifies each value's row index.  Must be
      nonnegative, and must be sorted in ascending order.
    nrows: An int64 scalar specifying the number of rows.  This should be
      specified if the `RaggedTensor` may containing empty training rows.  Must
      be greater than `value_rowids[-1]` (or zero if `value_rowids` is empty).
      Defaults to `value_rowids[-1]` (or zero if `value_rowids` is empty).
    name: A name prefix for the RaggedTensor (optional).

  Returns:
    A `RaggedTensor`.  `result.rank = values.rank + 1`.
    `result.ragged_rank = values.ragged_rank + 1`.

  Raises:
    ValueError: If `nrows` is incompatible with `value_rowids`.

  #### Example:
    ```python
    >>> rt = ragged.from_value_rowids(
    ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
    ...     value_rowids=[0, 0, 0, 0, 2, 2, 2, 3],
    ...     nrows=5)
    >>> rt.eval().tolist()
    [[3, 1, 4, 1], [], [5, 9, 2], [6], []]
    ```
  """
  with ops.name_scope(name, 'RaggedFromValueRowIds',
                      [values, value_rowids, nrows]):
    values = convert_to_tensor_or_ragged_tensor(values, name='values')
    value_rowids = ops.convert_to_tensor(
        value_rowids, dtypes.int64, name='value_rowids')
    if nrows is None:
      const_rowids = tensor_util.constant_value(value_rowids)
      if const_rowids is None:
        nrows = array_ops.concat([value_rowids[-1:], [-1]], axis=0)[0] + 1
        const_nrows = None
      else:
        const_nrows = const_rowids[-1] + 1 if const_rowids.size > 0 else 0
        nrows = ops.convert_to_tensor(const_nrows, dtypes.int64, name='nrows')
    else:
      nrows = ops.convert_to_tensor(nrows, dtypes.int64, 'nrows')
      const_nrows = tensor_util.constant_value(nrows)
      if const_nrows is not None:
        if const_nrows < 0:
          raise ValueError('Expected nrows >= 0; got %d' % const_nrows)
        const_rowids = tensor_util.constant_value(value_rowids)
        if const_rowids is not None and const_rowids.size > 0:
          if not const_nrows >= const_rowids[-1] + 1:
            raise ValueError(
                'Expected nrows >= value_rowids[-1] + 1; got nrows=%d, '
                'value_rowids[-1]=%d' % (const_nrows, const_rowids[-1]))

    value_rowids.shape.assert_has_rank(1)
    nrows.shape.assert_has_rank(0)
    values.shape[:1].assert_is_compatible_with(value_rowids.shape)

    # Convert value_rowids & nrows to row_splits.
    # Note: we don't use segment_ids_to_row_splits() here because we want
    # to save the intermediate value `row_lengths`, so we can cache it.
    # TODO(b/116708836) Upgrade bincount to accept int64 so we can skip the cast
    # (Remove the warning in the docstring when we do.)
    value_rowids_int32 = math_ops.cast(value_rowids, dtypes.int32)
    nrows_int32 = math_ops.cast(nrows, dtypes.int32)
    row_lengths = math_ops.bincount(
        value_rowids_int32,
        minlength=nrows_int32,
        maxlength=nrows_int32,
        dtype=dtypes.int64)
    row_splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0)
    if const_nrows is not None:
      row_lengths.set_shape([const_nrows])
      row_splits.set_shape([const_nrows + 1])

    return ragged_tensor.RaggedTensor(
        values,
        row_splits,
        cached_row_lengths=row_lengths,
        cached_value_rowids=value_rowids,
        cached_nrows=nrows,
        internal=True)
def kernel_classifier_distance_and_std_from_activations(
        real_activations,
        generated_activations,
        max_block_size=10,
        dtype=None):
    """Kernel "classifier" distance for evaluating a generative model.

    This methods computes the kernel classifier distance from activations of
    real images and generated images. This can be used independently of the
    kernel_classifier_distance() method, especially in the case of using large
    batches during evaluation where we would like to precompute all of the
    activations before computing the classifier distance, or if we want to
    compute multiple metrics based on the same images. It also returns a rough
    estimate of the standard error of the estimator.

    This technique is described in detail in https://arxiv.org/abs/1801.01401.
    Given two distributions P and Q of activations, this function calculates

        E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')]
          - 2 E_{X ~ P, Y ~ Q}[k(X, Y)]

    where k is the polynomial kernel

        k(x, y) = ( x^T y / dimension + 1 )^3.

    This captures how different the distributions of real and generated images'
    visual features are. Like the Frechet distance (and unlike the Inception
    score), this is a true distance and incorporates information about the
    target images. Unlike the Frechet score, this function computes an
    *unbiased* and asymptotically normal estimator, which makes comparing
    estimates across models much more intuitive.

    The estimator used takes time quadratic in max_block_size. Larger values of
    max_block_size will decrease the variance of the estimator but increase the
    computational cost. This differs slightly from the estimator used by the
    original paper; it is the block estimator of https://arxiv.org/abs/1307.1954.
    The estimate of the standard error will also be more reliable when there are
    more blocks, i.e. when max_block_size is smaller.

    NOTE: the blocking code assumes that real_activations and
    generated_activations are both in random order. If either is sorted in a
    meaningful order, the estimator will behave poorly.

    Args:
      real_activations: 2D Tensor containing activations of real data. Shape is
        [batch_size, activation_size].
      generated_activations: 2D Tensor containing activations of generated data.
        Shape is [batch_size, activation_size].
      max_block_size: integer, default 1024. The distance estimator splits samples
        into blocks for computational efficiency. Larger values are more
        computationally expensive but decrease the variance of the distance
        estimate. Having a smaller block size also gives a better estimate of the
        standard error.
      dtype: if not None, coerce activations to this dtype before computations.

    Returns:
     The Kernel Inception Distance. A floating-point scalar of the same type
       as the output of the activations.
     An estimate of the standard error of the distance estimator (a scalar of
       the same type).
    """

    real_activations.shape.assert_has_rank(2)
    generated_activations.shape.assert_has_rank(2)
    real_activations.shape[1].assert_is_compatible_with(
        generated_activations.shape[1])

    if dtype is None:
        dtype = real_activations.dtype
        assert generated_activations.dtype == dtype
    else:
        real_activations = math_ops.cast(real_activations, dtype)
        generated_activations = math_ops.cast(generated_activations, dtype)

    # Figure out how to split the activations into blocks of approximately
    # equal size, with none larger than max_block_size.
    n_r = array_ops.shape(real_activations)[0]
    n_g = array_ops.shape(generated_activations)[0]

    n_bigger = math_ops.maximum(n_r, n_g)
    n_blocks = math_ops.to_int32(math_ops.ceil(n_bigger / max_block_size))

    v_r = n_r // n_blocks
    v_g = n_g // n_blocks

    n_plusone_r = n_r - v_r * n_blocks
    n_plusone_g = n_g - v_g * n_blocks

    sizes_r = array_ops.concat([
        array_ops.fill([n_blocks - n_plusone_r], v_r),
        array_ops.fill([n_plusone_r], v_r + 1),
    ], 0)
    sizes_g = array_ops.concat([
        array_ops.fill([n_blocks - n_plusone_g], v_g),
        array_ops.fill([n_plusone_g], v_g + 1),
    ], 0)

    zero = array_ops.zeros([1], dtype=dtypes.int32)
    inds_r = array_ops.concat([zero, math_ops.cumsum(sizes_r)], 0)
    inds_g = array_ops.concat([zero, math_ops.cumsum(sizes_g)], 0)

    dim = math_ops.cast(tf.shape(real_activations)[1], dtype)

    def compute_kid_block(i):
        'Compute the ith block of the KID estimate.'
        r_s = inds_r[i]
        r_e = inds_r[i + 1]
        r = real_activations[r_s:r_e]
        m = math_ops.cast(r_e - r_s, dtype)

        g_s = inds_g[i]
        g_e = inds_g[i + 1]
        g = generated_activations[g_s:g_e]
        n = math_ops.cast(g_e - g_s, dtype)

        k_rr = (math_ops.matmul(r, r, transpose_b=True) / dim + 1)**3
        k_rg = (math_ops.matmul(r, g, transpose_b=True) / dim + 1)**3
        k_gg = (math_ops.matmul(g, g, transpose_b=True) / dim + 1)**3
        return (-2 * math_ops.reduce_mean(k_rg) +
                (math_ops.reduce_sum(k_rr) - math_ops.trace(k_rr)) /
                (m * (m - 1)) +
                (math_ops.reduce_sum(k_gg) - math_ops.trace(k_gg)) / (n *
                                                                      (n - 1)))

    ests = functional_ops.map_fn(compute_kid_block,
                                 math_ops.range(n_blocks),
                                 dtype=dtype,
                                 back_prop=False)

    mn = math_ops.reduce_mean(ests)

    # nn_impl.moments doesn't use the Bessel correction, which we want here
    n_blocks_ = math_ops.cast(n_blocks, dtype)
    var = control_flow_ops.cond(
        math_ops.less_equal(n_blocks, 1),
        lambda: array_ops.constant(float('nan'), dtype=dtype),
        lambda: math_ops.reduce_sum(math_ops.square(ests - mn)) /
        (n_blocks_ - 1))

    return mn, math_ops.sqrt(var / n_blocks_)
Exemplo n.º 42
0
def sparsemax(logits, name=None):
  """Computes sparsemax activations [1].

  For each batch `i` and class `j` we have
    $$sparsemax[i, j] = max(logits[i, j] - tau(logits[i, :]), 0)$$

  [1]: https://arxiv.org/abs/1602.02068

  Args:
    logits: A `Tensor`. Must be one of the following types: `half`, `float32`,
      `float64`.
    name: A name for the operation (optional).

  Returns:
    A `Tensor`. Has the same type as `logits`.
  """

  with ops.name_scope(name, "sparsemax", [logits]) as name:
    logits = ops.convert_to_tensor(logits, name="logits")
    obs = array_ops.shape(logits)[0]
    dims = array_ops.shape(logits)[1]

    # In the paper, they call the logits z.
    # The mean(logits) can be substracted from logits to make the algorithm
    # more numerically stable. the instability in this algorithm comes mostly
    # from the z_cumsum. Substacting the mean will cause z_cumsum to be close
    # to zero. However, in practise the numerical instability issues are very
    # minor and substacting the mean causes extra issues with inf and nan
    # input.
    z = logits

    # sort z
    z_sorted, _ = nn.top_k(z, k=dims)

    # calculate k(z)
    z_cumsum = math_ops.cumsum(z_sorted, axis=1)
    k = math_ops.range(
        1, math_ops.cast(dims, logits.dtype) + 1, dtype=logits.dtype)
    z_check = 1 + k * z_sorted > z_cumsum
    # because the z_check vector is always [1,1,...1,0,0,...0] finding the
    # (index + 1) of the last `1` is the same as just summing the number of 1.
    k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32), axis=1)

    # calculate tau(z)
    # If there are inf values or all values are -inf, the k_z will be zero,
    # this is mathematically invalid and will also cause the gather_nd to fail.
    # Prevent this issue for now by setting k_z = 1 if k_z = 0, this is then
    # fixed later (see p_safe) by returning p = nan. This results in the same
    # behavior as softmax.
    k_z_safe = math_ops.maximum(k_z, 1)
    indices = array_ops.stack([math_ops.range(0, obs), k_z_safe - 1], axis=1)
    tau_sum = array_ops.gather_nd(z_cumsum, indices)
    tau_z = (tau_sum - 1) / math_ops.cast(k_z, logits.dtype)

    # calculate p
    p = math_ops.maximum(
        math_ops.cast(0, logits.dtype), z - tau_z[:, array_ops.newaxis])
    # If k_z = 0 or if z = nan, then the input is invalid
    p_safe = array_ops.where(
        math_ops.logical_or(
            math_ops.equal(k_z, 0), math_ops.is_nan(z_cumsum[:, -1])),
        array_ops.fill([obs, dims], math_ops.cast(float("nan"), logits.dtype)),
        p)

    return p_safe
Exemplo n.º 43
0
 def _inverse(self, y):
     x0 = y[..., 0, array_ops.newaxis]
     xk = math_ops.exp(y[..., 1:])
     x = array_ops.concat([x0, xk], axis=-1)
     return math_ops.cumsum(x, axis=-1)
Exemplo n.º 44
0
    def _power_sum_array(self, max_remaining_steps):
        r"""Computes \sum_{i=0}^{N-1} A^i B (A^i)^T for N=0..max_remaining_steps.

    A is the transition matrix and B is the noise covariance.

    This is more efficient in practice than math_utils.power_sums_tensor, since
    each A^i B (A^i)^T term has a closed-form expression not depending on i - 1.
    Thus vectorization can replace explicit looping.

    Uses a cumulative sum on the following expression:

      (transition^p * transition_covariance * (transition^p)^T)_{i, j}
        = (-1)^(i + j) * sin^2(pi * p) / num_latent_values^2
          * (1/sin(pi / num_latent_values * (p - i))
             + 1/sin(pi / num_latent_values * (p - i - 1)))
          * (1/sin(pi / num_latent_values * (p - j))
             + 1/sin(pi / num_latent_values * (p - j - 1)))

    The expression being derived from the eigenvectors and eigenvalues given in
    the class docstring (and as with CycleStateSpaceModel taking advantage of
    the sparsity of the transition covariance).

    Args:
      max_remaining_steps: A scalar integer Tensor indicating the number of
        non-trivial values to compute.
    Returns:
      A [max_remaining_steps + 1, self._num_latent_values - 1,
      self._num_latent_values - 1] floating point Tensor S with cumulative power
      sums.

      S[N] = \sum_{i=0}^{N-1} A^i B (A^i)^T
        S[0] is the zero matrix
        S[1] is B
        S[2] is A B A^T + B

    """
        num_latent_values_float = math_ops.cast(self._num_latent_values,
                                                self.dtype)
        latent_values_per_period = (
            num_latent_values_float /
            math_ops.cast(self._true_periodicity, dtype=self.dtype))
        original_matrix_powers = (
            math_ops.cast(math_ops.range(max_remaining_steps), self.dtype) *
            latent_values_per_period)
        matrix_dimension_range = math_ops.range(self._num_latent_values -
                                                1)[None, ...]
        matrix_dimension_range_float = math_ops.cast(matrix_dimension_range,
                                                     self.dtype)

        def _cosecant_with_freq(coefficient):
            return 1. / math_ops.sin(
                numpy.pi / num_latent_values_float * coefficient)

        power_minus_index = (original_matrix_powers[..., None] -
                             matrix_dimension_range_float)
        mesh_values = (_cosecant_with_freq(power_minus_index) +
                       _cosecant_with_freq(power_minus_index - 1.))
        meshed = mesh_values[..., None, :] * mesh_values[..., None]
        full_matrix_alternating = math_ops.cast(
            1 - 2 * ((matrix_dimension_range[..., None, :] +
                      matrix_dimension_range[..., None]) % 2), self.dtype)

        def _sine_discontinuity(value):
            """A special case for dealing with discontinuities.

      Decides whether `value`  is close to an integer, and if so computes:

        lim x->n |sin(x * pi)| / sin(x * pi) = sign(sin(n * pi))
                                             = cos(n * pi)

      Args:
        value: The floating point Tensor value which may lead to a
            discontinuity.
      Returns:
        A tuple of (is_discontinuous, sign):
          is_discontinuous: A boolean Tensor of the same shape as `value`,
              indicating whether it is near an integer.
          sign: A floating point Tensor indicating the sign of the discontinuity
            (being near 1 or -1 when `is_discontinuous` is True), of the same
            shape and type as `value`.
      """
            normalized = value / num_latent_values_float
            is_discontinuous = self._close_to_integer(normalized)
            sign = math_ops.cos(normalized * numpy.pi)
            return is_discontinuous, sign

        index_discontinuous, index_sign = _sine_discontinuity(
            original_matrix_powers[..., None] - matrix_dimension_range_float)
        index_minus_discontinuous, index_minus_sign = _sine_discontinuity(
            original_matrix_powers[..., None] - matrix_dimension_range_float -
            1)
        ones_mask_vector = math_ops.logical_or(index_discontinuous,
                                               index_minus_discontinuous)
        ones_sign_vector = array_ops.where(index_discontinuous, index_sign,
                                           index_minus_sign)
        ones_mask = math_ops.logical_and(ones_mask_vector[..., None],
                                         ones_mask_vector[..., None, :])
        zeros_mask = self._close_to_integer(original_matrix_powers)
        zeroed = array_ops.where(zeros_mask, array_ops.zeros_like(meshed),
                                 meshed)
        global_coefficient = (math_ops.sin(numpy.pi * original_matrix_powers) /
                              num_latent_values_float)
        masked_meshed = array_ops.where(
            ones_mask,
            ones_sign_vector[..., None] * ones_sign_vector[..., None, :],
            zeroed * global_coefficient[..., None, None]**2)
        powers_above_zero = full_matrix_alternating * masked_meshed
        return array_ops.pad(math_ops.cumsum(powers_above_zero),
                             [(1, 0), (0, 0), (0, 0)])
Exemplo n.º 45
0
    def _compare(self, x, axis, exclusive, reverse):
        np_out = handle_options(np.cumsum, x, axis, exclusive, reverse)
        with self.cached_session():
            tf_out = math_ops.cumsum(x, axis, exclusive, reverse).eval()

        self.assertAllClose(np_out, tf_out)
Exemplo n.º 46
0
  def _compare(self, x, axis, exclusive, reverse):
    np_out = handle_options(np.cumsum, x, axis, exclusive, reverse)
    with self.cached_session(use_gpu=True):
      tf_out = math_ops.cumsum(x, axis, exclusive, reverse).eval()

    self.assertAllClose(np_out, tf_out)
def _ragged_segment_aggregate(unsorted_segment_op,
                              data,
                              segment_ids,
                              num_segments,
                              separator=None,
                              name=None):
  """Aggregates along segments of a RaggedTensor using `unsorted_segment_op`.

  Returns a RaggedTensor `output` with `num_segments` rows, where the row
  `output[i]` is formed by combining all rows of `data` whose corresponding
  `segment_id` is `i`.  The values in each row are combined using
  `unsorted_segment_op`.

  The length of the row `output[i]` will be the maximum of the lengths of
  all rows of `data` whose corresponding `segment_id` is `i`.  If no `data`
  rows correspond to a given segment ID, then the output row for that segment
  ID will be empty.

  Args:
    unsorted_segment_op: The tensorflow `op` that should be used to combine
      values in each row.  Must have the same signature and basic behavior as
      `unsorted_segment_sum`, `unsorted_segment_max`, etc.
    data: A `RaggedTensor` containing the values to be combined.
    segment_ids: A `Tensor` or `RaggedTensor`.  Must have type `int64` or
      `int32`.  `segment_ids.shape` must be a prefix of `data.shape`.
      `segment_ids` is not required to be sorted.
    num_segments: An `int32` or `int64` scalar.
    separator: An optional string. Defaults to None. The separator to use when
      joining. Only used for string types.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A `RaggedTensor` containing the aggregated values.  The returned tensor
    has the same dtype as `data`, and its shape is
    `[num_segments] + data.shape[segment_ids.rank:]`.
  Raises:
    ValueError: If segment_ids.shape is not a prefix of data.shape.
  """
  if not (ragged_tensor.is_ragged(data) or
          ragged_tensor.is_ragged(segment_ids)):
    if separator is not None:
      # It uses unsorted_segment_join.
      return unsorted_segment_op(data, segment_ids, num_segments, separator,
                                 name)
    else:
      return unsorted_segment_op(data, segment_ids, num_segments, name)

  with ops.name_scope(name, 'RaggedSegment',
                      [data, segment_ids, num_segments]) as name:
    data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data')
    segment_ids = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        segment_ids, name='segment_ids')
    data, segment_ids = ragged_tensor.match_row_splits_dtypes(data, segment_ids)
    if segment_ids.dtype not in (dtypes.int32, dtypes.int64):
      raise ValueError('segment_ids must have dtype int32 or int64.')

    if ragged_tensor.is_ragged(segment_ids):
      if not ragged_tensor.is_ragged(data):
        raise ValueError('segment_ids.shape must be a prefix of data.shape, '
                         'but segment_ids is ragged and data is not.')
      check_splits = check_ops.assert_equal(
          segment_ids.row_splits,
          data.row_splits,
          message='segment_ids.shape must be a prefix of data.shape')
      with ops.control_dependencies([check_splits]):
        return _ragged_segment_aggregate(unsorted_segment_op, data.values,
                                         segment_ids.values, num_segments,
                                         separator)

    # Find the length of each row in data.  (shape=[data_nrows])
    data_row_lengths = data.row_splits[1:] - data.row_splits[:-1]

    # Find the length that each output row will have.  The length of the row
    # corresponding to segment `id` is `max(data_row_lengths[i])` where
    # `segment_ids[i]=id`.  (shape=[output_nrows])
    output_row_lengths = math_ops.maximum(
        math_ops.unsorted_segment_max(data_row_lengths, segment_ids,
                                      num_segments), 0)

    # Build the splits tensor for the output RaggedTensor.
    output_splits = array_ops.concat([
        array_ops.zeros([1], output_row_lengths.dtype),
        math_ops.cumsum(output_row_lengths)
    ],
                                     axis=0)

    # For each row in `data`, find the start & limit position where that row's
    # values will be aggregated in output.values.
    data_row_to_out_row_start = array_ops.gather(output_splits, segment_ids)
    data_row_to_out_row_limit = data_row_to_out_row_start + data_row_lengths

    # For each value in `data.values`, find the position where it will
    # aggregated in `output.values`.
    # Get the target output values index for each data values index.
    data_val_to_out_val_index = range(data_row_to_out_row_start,
                                      data_row_to_out_row_limit).values

    # Recursively aggregate the values.
    output_values = _ragged_segment_aggregate(unsorted_segment_op, data.values,
                                              data_val_to_out_val_index,
                                              output_splits[-1], separator)
    return ragged_tensor.RaggedTensor.from_row_splits(
        output_values, output_splits, validate=False)
Exemplo n.º 48
0
def kernel_classifier_distance_and_std_from_activations(real_activations,
                                                        generated_activations,
                                                        max_block_size=1024,
                                                        dtype=None):
  """Kernel "classifier" distance for evaluating a generative model.

  This methods computes the kernel classifier distance from activations of
  real images and generated images. This can be used independently of the
  kernel_classifier_distance() method, especially in the case of using large
  batches during evaluation where we would like to precompute all of the
  activations before computing the classifier distance, or if we want to
  compute multiple metrics based on the same images. It also returns a rough
  estimate of the standard error of the estimator.

  This technique is described in detail in https://arxiv.org/abs/1801.01401.
  Given two distributions P and Q of activations, this function calculates

      E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')]
        - 2 E_{X ~ P, Y ~ Q}[k(X, Y)]

  where k is the polynomial kernel

      k(x, y) = ( x^T y / dimension + 1 )^3.

  This captures how different the distributions of real and generated images'
  visual features are. Like the Frechet distance (and unlike the Inception
  score), this is a true distance and incorporates information about the
  target images. Unlike the Frechet score, this function computes an
  *unbiased* and asymptotically normal estimator, which makes comparing
  estimates across models much more intuitive.

  The estimator used takes time quadratic in max_block_size. Larger values of
  max_block_size will decrease the variance of the estimator but increase the
  computational cost. This differs slightly from the estimator used by the
  original paper; it is the block estimator of https://arxiv.org/abs/1307.1954.
  The estimate of the standard error will also be more reliable when there are
  more blocks, i.e. when max_block_size is smaller.

  NOTE: the blocking code assumes that real_activations and
  generated_activations are both in random order. If either is sorted in a
  meaningful order, the estimator will behave poorly.

  Args:
    real_activations: 2D Tensor containing activations of real data. Shape is
      [batch_size, activation_size].
    generated_activations: 2D Tensor containing activations of generated data.
      Shape is [batch_size, activation_size].
    max_block_size: integer, default 1024. The distance estimator splits samples
      into blocks for computational efficiency. Larger values are more
      computationally expensive but decrease the variance of the distance
      estimate. Having a smaller block size also gives a better estimate of the
      standard error.
    dtype: If not None, coerce activations to this dtype before computations.

  Returns:
   The Kernel Inception Distance. A floating-point scalar of the same type
     as the output of the activations.
   An estimate of the standard error of the distance estimator (a scalar of
     the same type).
  """

  real_activations.shape.assert_has_rank(2)
  generated_activations.shape.assert_has_rank(2)
  real_activations.shape[1].assert_is_compatible_with(
      generated_activations.shape[1])

  if dtype is None:
    dtype = real_activations.dtype
    assert generated_activations.dtype == dtype
  else:
    real_activations = math_ops.cast(real_activations, dtype)
    generated_activations = math_ops.cast(generated_activations, dtype)

  # Figure out how to split the activations into blocks of approximately
  # equal size, with none larger than max_block_size.
  n_r = array_ops.shape(real_activations)[0]
  n_g = array_ops.shape(generated_activations)[0]

  n_bigger = math_ops.maximum(n_r, n_g)
  n_blocks = math_ops.to_int32(math_ops.ceil(n_bigger / max_block_size))

  v_r = n_r // n_blocks
  v_g = n_g // n_blocks

  n_plusone_r = n_r - v_r * n_blocks
  n_plusone_g = n_g - v_g * n_blocks

  sizes_r = array_ops.concat([
      array_ops.fill([n_blocks - n_plusone_r], v_r),
      array_ops.fill([n_plusone_r], v_r + 1),
  ], 0)
  sizes_g = array_ops.concat([
      array_ops.fill([n_blocks - n_plusone_g], v_g),
      array_ops.fill([n_plusone_g], v_g + 1),
  ], 0)

  zero = array_ops.zeros([1], dtype=dtypes.int32)
  inds_r = array_ops.concat([zero, math_ops.cumsum(sizes_r)], 0)
  inds_g = array_ops.concat([zero, math_ops.cumsum(sizes_g)], 0)

  dim = math_ops.cast(real_activations.shape[1], dtype)

  def compute_kid_block(i):
    """Computes the ith block of the KID estimate."""
    r_s = inds_r[i]
    r_e = inds_r[i + 1]
    r = real_activations[r_s:r_e]
    m = math_ops.cast(r_e - r_s, dtype)

    g_s = inds_g[i]
    g_e = inds_g[i + 1]
    g = generated_activations[g_s:g_e]
    n = math_ops.cast(g_e - g_s, dtype)

    k_rr = (math_ops.matmul(r, r, transpose_b=True) / dim + 1)**3
    k_rg = (math_ops.matmul(r, g, transpose_b=True) / dim + 1)**3
    k_gg = (math_ops.matmul(g, g, transpose_b=True) / dim + 1)**3
    return (-2 * math_ops.reduce_mean(k_rg) +
            (math_ops.reduce_sum(k_rr) - math_ops.trace(k_rr)) / (m * (m - 1)) +
            (math_ops.reduce_sum(k_gg) - math_ops.trace(k_gg)) / (n * (n - 1)))

  ests = map_fn.map_fn(
      compute_kid_block, math_ops.range(n_blocks), dtype=dtype, back_prop=False)

  mn = math_ops.reduce_mean(ests)

  # nn_impl.moments doesn't use the Bessel correction, which we want here
  n_blocks_ = math_ops.cast(n_blocks, dtype)
  var = control_flow_ops.cond(
      math_ops.less_equal(n_blocks, 1),
      lambda: array_ops.constant(float('nan'), dtype=dtype),
      lambda: math_ops.reduce_sum(math_ops.square(ests - mn)) / (n_blocks_ - 1))

  return mn, math_ops.sqrt(var / n_blocks_)
Exemplo n.º 49
0
def _ragged_segment_aggregate(unsorted_segment_op, data, segment_ids,
                              num_segments, name=None):
  """Aggregates along segments of a RaggedTensor using `unsorted_segment_op`.

  Returns a RaggedTensor `output` with `num_segments` rows, where the row
  `output[i]` is formed by combining all rows of `data` whose corresponding
  `segment_id` is `i`.  The values in each row are combined using
  `unsorted_segment_op`.

  The length of the row `output[i]` will be the maximum of the lengths of
  all rows of `data` whose corresponding `segment_id` is `i`.  If no `data`
  rows correspond to a given segment ID, then the output row for that segment
  ID will be empty.

  Args:
    unsorted_segment_op: The tensorflow `op` that should be used to combine
      values in each row.  Must have the same signature and basic behavior as
      `unsorted_segment_sum`, `unsorted_segment_max`, etc.
    data: A `RaggedTensor` containing the values to be combined.
    segment_ids: A `Tensor` or `RaggedTensor`.  Must have type `int64` or
      `int32`.  `segment_ids.shape` must be a prefix of `data.shape`.
      `segment_ids` is not required to be sorted.
    num_segments: An `int32` or `int64` scalar.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A `RaggedTensor` containing the aggregated values.  The returned tensor
    has the same dtype as `data`, and its shape is
    `[num_segments] + data.shape[segment_ids.rank:]`.
  Raises:
    ValueError: If segment_ids.shape is not a prefix of data.shape.
  """
  if not (ragged_tensor.is_ragged(data) or
          ragged_tensor.is_ragged(segment_ids)):
    return unsorted_segment_op(data, segment_ids, num_segments, name)

  with ops.name_scope(name, 'RaggedSegment',
                      [data, segment_ids, num_segments]) as name:
    data = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
        data, name='data')
    segment_ids = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
        segment_ids, name='segment_ids')

    if ragged_tensor.is_ragged(segment_ids):
      if not ragged_tensor.is_ragged(data):
        raise ValueError('segment_ids.shape must be a prefix of data.shape, '
                         'but segment_ids is ragged and data is not.')
      check_splits = check_ops.assert_equal(
          segment_ids.row_splits,
          data.row_splits,
          message='segment_ids.shape must be a prefix of data.shape')
      with ops.control_dependencies([check_splits]):
        return _ragged_segment_aggregate(unsorted_segment_op, data.values,
                                         segment_ids.values, num_segments, name)

    segment_ids = math_ops.cast(segment_ids, dtypes.int64)

    # Find the length of each row in data.  (dtype=int64, shape=[data_nrows])
    data_row_lengths = data.row_splits[1:] - data.row_splits[:-1]

    # Find the length that each output row will have.  The length of the row
    # corresponding to segment `id` is `max(data_row_lengths[i])` where
    # `segment_ids[i]=id`.  (dtype=int64, shape=[output_nrows])
    output_row_lengths = math_ops.maximum(
        math_ops.unsorted_segment_max(data_row_lengths, segment_ids,
                                      num_segments), 0)
    assert output_row_lengths.dtype == dtypes.int64

    # Build the splits tensor for the output RaggedTensor.
    output_splits = array_ops.concat(
        [
            array_ops.zeros([1], dtypes.int64),
            math_ops.cumsum(output_row_lengths)
        ],
        axis=0)

    # For each row in `data`, find the start & limit position where that row's
    # values will be aggregated in output.values.
    data_row_to_out_row_start = array_ops.gather(output_splits, segment_ids)
    data_row_to_out_row_limit = data_row_to_out_row_start + data_row_lengths

    # For each value in `data.values`, find the position where it will
    # aggregated in `output.values`.
    # Get the target output values index for each data values index.
    data_val_to_out_val_index = range(data_row_to_out_row_start,
                                      data_row_to_out_row_limit).values

    # Recursively aggregate the values.
    output_values = _ragged_segment_aggregate(unsorted_segment_op, data.values,
                                              data_val_to_out_val_index,
                                              output_splits[-1])
    return ragged_factory_ops.from_row_splits(output_values, output_splits)
Exemplo n.º 50
0
 def _computeLogSumExp(self, x, **kwargs):
   result_naive = math_ops.cumsum(math_ops.exp(x), **kwargs)
   result_fused = math_ops.exp(math_ops.cumulative_logsumexp(x, **kwargs))
   return result_naive, result_fused
Exemplo n.º 51
0
  def from_value_rowids(cls,
                        value_rowids,
                        nrows=None,
                        validate=True,
                        preferred_dtype=None):
    """Creates a `RowPartition` with rows partitioned by `value_rowids`.

    This `RowPartition` divides a sequence `values` into rows by specifying
    which row each value should be added to:

    ```python
    rows = [[] for _ in nrows]
    for (value, rowid) in zip(values, value_rowids):
      rows[rowid].append(value)
    ``

    Args:
      value_rowids: A 1-D integer tensor with shape `[nvals]`, which corresponds
        one-to-one with `values`, and specifies each value's row index.  Must be
        nonnegative, and must be sorted in ascending order.
      nrows: An integer scalar specifying the number of rows.  This should be
        specified if the `RowPartition` may containing empty training rows. Must
        be greater than `value_rowids[-1]` (or greater than or equal to zero if
        `value_rowids` is empty). Defaults to `value_rowids[-1]` (or zero if
        `value_rowids` is empty).
      validate: If true, then use assertions to check that the arguments form a
        valid `RowPartition`.
      preferred_dtype: The dtype to encode value_rowids if it doesn't already
        have one. The default is tf.int64.

    Returns:
      A `RowPartition`.

    Raises:
      ValueError: If `nrows` is incompatible with `value_rowids`.

    #### Example:

    >>> print(RowPartition.from_value_rowids(
    ...     value_rowids=[0, 0, 0, 0, 2, 2, 2, 3],
    ...     nrows=4))
    tf.RowPartition(row_splits=tf.Tensor([0 4 4 7 8], shape=(5,), dtype=int64))
    """
    if not isinstance(validate, bool):
      raise TypeError("validate must have type bool")
    with ops.name_scope(None, "RowPartitionFromValueRowIds",
                        [value_rowids, nrows]):
      value_rowids = cls._convert_row_partition(value_rowids, "value_rowids",
                                                preferred_dtype)
      if nrows is None:
        const_rowids = tensor_util.constant_value(value_rowids)
        if const_rowids is None:
          nrows = array_ops.concat([value_rowids[-1:], [-1]], axis=0)[0] + 1
          const_nrows = None
        else:
          const_nrows = const_rowids[-1] + 1 if const_rowids.size > 0 else 0
          nrows = ops.convert_to_tensor(
              const_nrows, value_rowids.dtype, name="nrows")
      else:
        nrows = ops.convert_to_tensor(nrows, value_rowids.dtype, "nrows")
        const_nrows = tensor_util.constant_value(nrows)
        if const_nrows is not None:
          if const_nrows < 0:
            raise ValueError("Expected nrows >= 0; got %d" % const_nrows)
          const_rowids = tensor_util.constant_value(value_rowids)
          if const_rowids is not None and const_rowids.size > 0:
            if not const_nrows >= const_rowids[-1] + 1:
              raise ValueError(
                  "Expected nrows >= value_rowids[-1] + 1; got nrows=%d, "
                  "value_rowids[-1]=%d" % (const_nrows, const_rowids[-1]))

      value_rowids.shape.assert_has_rank(1)
      nrows.shape.assert_has_rank(0)

      if validate:
        msg = ("Arguments to from_value_rowids do not form a valid "
               "RowPartition")
        checks = [
            check_ops.assert_rank(value_rowids, 1, message=msg),
            check_ops.assert_rank(nrows, 0, message=msg),
            check_ops.assert_non_negative(value_rowids[:1], message=msg),
            _assert_monotonic_increasing(value_rowids, message=msg),
            check_ops.assert_less(value_rowids[-1:], nrows, message=msg),
        ]
        value_rowids = control_flow_ops.with_dependencies(checks, value_rowids)

      # Convert value_rowids & nrows to row_splits.
      # Note: we don't use segment_ids_to_row_splits() here because we want
      # to save the intermediate value `row_lengths`, so we can cache it.
      # TODO(b/116708836) Upgrade bincount to accept int64 so we can skip the
      # cast.
      value_rowids_int32 = math_ops.cast(value_rowids, dtypes.int32)
      nrows_int32 = math_ops.cast(nrows, dtypes.int32)
      row_lengths = math_ops.bincount(
          value_rowids_int32,
          minlength=nrows_int32,
          maxlength=nrows_int32,
          dtype=value_rowids.dtype)
      row_splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0)
      if const_nrows is not None:
        row_lengths.set_shape([const_nrows])
        row_splits.set_shape([const_nrows + 1])

      return cls(
          row_splits=row_splits,
          row_lengths=row_lengths,
          value_rowids=value_rowids,
          nrows=nrows,
          internal=_row_partition_factory_key)
Exemplo n.º 52
0
def _list_mle_loss(labels,
                   logits,
                   weights=None,
                   lambda_weight=None,
                   reduction=core_losses.Reduction.SUM_BY_NONZERO_WEIGHTS,
                   name=None,
                   seed=None):
    """Computes the ListMLE loss [Xia et al.

  2008] for a list.

  Given the labels of graded relevance l_i and the logits s_i, we calculate
  the ListMLE loss for the given list.

  The `lambda_weight` re-weights examples based on l_i and r_i.
  The recommended weighting scheme is the formulation presented in the
  "Position-Aware ListMLE" paper (Lan et. al) and available using
  create_p_list_mle_lambda_weight() factory function above.

  Args:
    labels: A `Tensor` of the same shape as `logits` representing graded
      relevance.
    logits: A `Tensor` with shape [batch_size, list_size]. Each value is the
      ranking score of the corresponding item.
    weights: A scalar, a `Tensor` with shape [batch_size, 1] for list-wise
      weights, or a `Tensor` with shape [batch_size, list_size] for item-wise
      weights.
    lambda_weight: A `DCGLambdaWeight` instance.
    reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
      reduce training loss over batch.
    name: A string used as the name for this loss.
    seed: A randomization seed used when shuffling ground truth permutations.

  Returns:
    An op for the ListMLE loss.
  """
    with ops.name_scope(name, 'list_mle_loss', (labels, logits, weights)):
        is_label_valid = utils.is_label_valid(labels)
        # Reset the invalid labels to 0 and reset the invalid logits to a logit with
        # ~= 0 contribution.
        labels = array_ops.where(is_label_valid, labels,
                                 array_ops.zeros_like(labels))
        logits = array_ops.where(
            is_label_valid, logits,
            math_ops.log(_EPSILON) * array_ops.ones_like(logits))
        weights = 1.0 if weights is None else ops.convert_to_tensor(weights)
        weights = array_ops.squeeze(weights)

        # Shuffle labels and logits to add randomness to sort.
        shuffled_indices = utils.shuffle_valid_indices(is_label_valid, seed)
        shuffled_labels = array_ops.gather_nd(labels, shuffled_indices)
        shuffled_logits = array_ops.gather_nd(logits, shuffled_indices)

        sorted_labels, sorted_logits = utils.sort_by_scores(
            shuffled_labels, [shuffled_labels, shuffled_logits])

        raw_max = math_ops.reduce_max(sorted_logits, axis=1, keepdims=True)
        sorted_logits = sorted_logits - raw_max
        sums = math_ops.cumsum(math_ops.exp(sorted_logits),
                               axis=1,
                               reverse=True)
        sums = math_ops.log(sums) - sorted_logits

        if lambda_weight is not None and isinstance(lambda_weight,
                                                    ListMLELambdaWeight):
            sums *= lambda_weight.individual_weights(sorted_labels)

        negative_log_likelihood = math_ops.reduce_sum(sums, 1)

        return core_losses.compute_weighted_loss(negative_log_likelihood,
                                                 weights=weights,
                                                 reduction=reduction)
Exemplo n.º 53
0
def _add_batched_ragged_partition(rt,
                                  partition,
                                  tensor_dict,
                                  feature_key,
                                  validate,
                                  outer_splits=None):
    """Adds a batched ragged partition tensor to a batched ragged tensor.

  Args:
    rt: A RaggedTensor with shape [batch_size, ...].
    partition: The partition configuration object.  Specifies the key that
      should be used to look up the partition tensor (unless partition is a
      RaggedFeature.UniformRowLength, in which case there is no partition
      tensor).  The specified tensor must have shape [batch_size, ...].
    tensor_dict: The dictionary mapping keys to tensors.
    feature_key: The name of the feature being parsed (for error messages).
    validate: Whether to validate that the values form a valid RaggedTensor.
    outer_splits: If not None, then we have two batch dimensions, and this
      is the row-splits for the collapsed batch dimension.  Every partition
      tensor must have an outer row_splits that matches this value.

  Returns:
    A new RaggedTensor where each batch item `rt[i]` has been partitioned
    using the `partition_t[i]`.
  """
    if isinstance(partition, RaggedFeature.UniformRowLength):
        if rt.ragged_rank > 1:
            length = ops.convert_to_tensor(partition.length,
                                           rt.row_splits.dtype)
            return ragged_tensor.RaggedTensor.from_row_splits(
                ragged_tensor.RaggedTensor.from_uniform_row_length(
                    rt.values, length, validate=validate),
                rt.row_splits // length,
                validate=validate)
        else:
            reshaped_vals = array_ops.reshape(
                rt.values,
                array_ops.concat([[-1, partition.length],
                                  array_ops.shape(rt.values)[1:]],
                                 axis=0))
            return ragged_tensor.RaggedTensor.from_row_splits(
                reshaped_vals,
                rt.row_splits // partition.length,
                validate=validate)

    partition_t = tensor_dict[partition.key]
    if partition_t.values.dtype != rt.row_splits.dtype:
        partition_t = math_ops.cast(partition_t, rt.row_splits.dtype)

    checks = []
    if outer_splits is not None:
        if validate:
            checks.append(
                check_ops.assert_equal(
                    outer_splits,
                    partition_t.row_splits,
                    message="Feature %s: values and partitions are not aligned"
                    % feature_key))
        partition_t = partition_t.values

    with ops.control_dependencies(checks):
        if isinstance(partition,
                      (RaggedFeature.RowSplits, RaggedFeature.RowLimits)):
            if isinstance(partition, RaggedFeature.RowSplits):
                partition_t = partition_t[:, 1:]
            adjusted_limits = partition_t.values + array_ops.repeat(
                rt.row_starts(), partition_t.row_lengths())
            return partition_t.with_values(
                ragged_tensor.RaggedTensor.from_row_limits(rt.values,
                                                           adjusted_limits,
                                                           validate=validate))
        elif isinstance(partition, RaggedFeature.RowStarts):
            adjusted_starts = partition_t.values + array_ops.repeat(
                rt.row_starts(), partition_t.row_lengths())
            return partition_t.with_values(
                ragged_tensor.RaggedTensor.from_row_starts(rt.values,
                                                           adjusted_starts,
                                                           validate=validate))
        elif isinstance(partition, RaggedFeature.RowLengths):
            return partition_t.with_values(
                ragged_tensor.RaggedTensor.from_row_lengths(rt.values,
                                                            partition_t.values,
                                                            validate=validate))
        elif isinstance(partition, RaggedFeature.ValueRowIds):
            nrows = math_ops.maximum(  # number of rows in each batch item
                ragged_math_ops.reduce_max(partition_t + 1, axis=1), 0)
            adjusted_rowids = partition_t.values + array_ops.repeat(
                math_ops.cumsum(nrows, exclusive=True),
                partition_t.row_lengths())
            return ragged_tensor.RaggedTensor.from_row_lengths(
                ragged_tensor.RaggedTensor.from_value_rowids(
                    rt.values, adjusted_rowids, validate=validate),
                nrows,
                validate=validate)

        raise ValueError(f"Unhandled partition type {partition!r}")
Exemplo n.º 54
0
def indicators_to_sparse_ids(indicators,
                             ignore_value=None,
                             dtype=dtypes.int64):
    """Convert a dense indicator tensor to sparse IDs.

  This is commonly used for converting a dense classification label to sparse.
  In the following example, we have an input of shape (2, 2, num_classes),
  where num_classes=4.

  ```python
  indicators = [
    [
      [0, 0, 1, 0],
      [0, 0, 0, 0]
    ], [
      [1, 0, 1, 1],
      [0, 0, 1, 0]
    ]
  ]
  sparse_ids = indicator_to_sparse_ids(indicators)
  ```

  `sparse_ids` in "jagged" format:
  [
    [
      [2],
      []
    ], [
      [0, 2, 3],
      [2]
    ]
  ]

  `sparse_ids` in `SparseTensor` format:
  ```python
  {
    indices: [[0, 0, 1], [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 1, 0]],
    values: [2, 0, 2, 3, 2],
    dense_shape: [2, 2, 3]
  }
  ```

  Args:
    indicators: Dense `Tensor` of shape `(d0, ..., dn, num_classes)`.
      `ignore_value` values are ignored. For other values (typically, ones), the
      index along the last dimension is returned.
    ignore_value: Entries in `indicators` equal to this value will be
      absent from the returned `SparseTensor`. If `None`, default value of
      `indicators` dtype will be used (e.g. '' for `str`, 0 for `int`).
    dtype: Type of result, must be integer type.

  Returns:
    `SparseTensor` of type `dtype` and shape `(d0, ..., dn, max_num_labels)`,
      where `max_num_labels` is the maximum number of non-zero values in any
      row (in the example above, row (1, 1) has 3 non-zero values, so the result
      shape is (2, 2, 3)). The values of this `SparseTensor` are in the range
      `[0, num_classes)` and correspond to the index of non-ignore values along
      the last dimension of `indicators`.

  Raises:
    ValueError: if `dtype` is not integer.
  """
    if not dtype.is_integer:
        raise ValueError("Invalid dtype {} not integer.".format(dtype))
    with ops.name_scope(None, "indicators_to_sparse_ids",
                        (indicators, ignore_value)):
        # Convert indicators to binary ones and zeros. We use int64 since
        # SparseTensor requires int64 indices.
        indicators = ops.convert_to_tensor(indicators, name="indicators")
        missing_indicators = math_ops.equal(indicators,
                                            _ignore_value_tensor(
                                                indicators.dtype,
                                                ignore_value),
                                            name="missing")
        zeros_like_indicators = array_ops.zeros_like(indicators,
                                                     dtype=dtypes.int64,
                                                     name="zeros")
        binary_indicators = array_ops.where(missing_indicators,
                                            zeros_like_indicators,
                                            array_ops.ones_like(
                                                indicators,
                                                dtype=dtypes.int64,
                                                name="ones"),
                                            name="binary_indicators")

        # Use cumsum along the last dimension to generate per-row indexes.
        # Note that these are 1-based (since 0 indicates missing values), so they're
        # off-by-1 from the actual indices. We'll subtract 1 below. Since they're
        # off-by-one, the max value is the size of the last dimension (i.e.,
        # last_index + 1).
        row_index_indicators = array_ops.where(
            missing_indicators, zeros_like_indicators,
            math_ops.cumsum(binary_indicators, axis=-1),
            "row_index_indicators")
        result_last_dim = array_ops.reshape(
            math_ops.reduce_max(row_index_indicators),
            shape=(1, ),
            name="result_last_dim")

        # Convert to a SparseTensor. The values of this SparseTensor are the last
        # indices of our result, and the last indices of this SparseTensor (i.e.,
        # the class IDs indicated by `indicators`) are the values of our result, so
        # we use tensor slicing and concat to swap them.
        sparse_row_index_indicators = dense_to_sparse_tensor(
            row_index_indicators, ignore_value=0)
        return sparse_tensor.SparseTensor(
            indices=array_ops.concat(
                (sparse_row_index_indicators.indices[:, :-1],
                 array_ops.reshape(sparse_row_index_indicators.values - 1,
                                   (-1, 1))),
                axis=1,
                name="indices"),
            values=math_ops.cast(sparse_row_index_indicators.indices[:, -1],
                                 dtype=dtype,
                                 name="values"),
            dense_shape=array_ops.concat(
                (sparse_row_index_indicators.dense_shape[0:-1],
                 result_last_dim),
                axis=0,
                name="dense_shape"))
Exemplo n.º 55
0
 def f(x):
     return math_ops.cumsum(x)
def kernel_classifier_distance_and_std_from_activations(
        real_activations,
        generated_activations,
        max_block_size=10,
        dtype=None):

    real_activations.shape.assert_has_rank(2)
    generated_activations.shape.assert_has_rank(2)
    real_activations.shape[1].assert_is_compatible_with(
        generated_activations.shape[1])

    if dtype is None:
        dtype = real_activations.dtype
        assert generated_activations.dtype == dtype
    else:
        real_activations = math_ops.cast(real_activations, dtype)
        generated_activations = math_ops.cast(generated_activations, dtype)

    n_r = array_ops.shape(real_activations)[0]
    n_g = array_ops.shape(generated_activations)[0]

    n_bigger = math_ops.maximum(n_r, n_g)
    n_blocks = math_ops.to_int32(math_ops.ceil(n_bigger / max_block_size))

    v_r = n_r // n_blocks
    v_g = n_g // n_blocks

    n_plusone_r = n_r - v_r * n_blocks
    n_plusone_g = n_g - v_g * n_blocks

    sizes_r = array_ops.concat([
        array_ops.fill([n_blocks - n_plusone_r], v_r),
        array_ops.fill([n_plusone_r], v_r + 1),
    ], 0)
    sizes_g = array_ops.concat([
        array_ops.fill([n_blocks - n_plusone_g], v_g),
        array_ops.fill([n_plusone_g], v_g + 1),
    ], 0)

    zero = array_ops.zeros([1], dtype=dtypes.int32)
    inds_r = array_ops.concat([zero, math_ops.cumsum(sizes_r)], 0)
    inds_g = array_ops.concat([zero, math_ops.cumsum(sizes_g)], 0)

    dim = math_ops.cast(tf.shape(real_activations)[1], dtype)

    def compute_kid_block(i):
        r_s = inds_r[i]
        r_e = inds_r[i + 1]
        r = real_activations[r_s:r_e]
        m = math_ops.cast(r_e - r_s, dtype)

        g_s = inds_g[i]
        g_e = inds_g[i + 1]
        g = generated_activations[g_s:g_e]
        n = math_ops.cast(g_e - g_s, dtype)

        k_rr = (math_ops.matmul(r, r, transpose_b=True) / dim + 1)**3
        k_rg = (math_ops.matmul(r, g, transpose_b=True) / dim + 1)**3
        k_gg = (math_ops.matmul(g, g, transpose_b=True) / dim + 1)**3
        return (-2 * math_ops.reduce_mean(k_rg) +
                (math_ops.reduce_sum(k_rr) - math_ops.trace(k_rr)) /
                (m * (m - 1)) +
                (math_ops.reduce_sum(k_gg) - math_ops.trace(k_gg)) / (n *
                                                                      (n - 1)))

    ests = functional_ops.map_fn(compute_kid_block,
                                 math_ops.range(n_blocks),
                                 dtype=dtype,
                                 back_prop=False)

    mn = math_ops.reduce_mean(ests)

    n_blocks_ = math_ops.cast(n_blocks, dtype)
    var = control_flow_ops.cond(
        math_ops.less_equal(n_blocks, 1),
        lambda: array_ops.constant(float('nan'), dtype=dtype),
        lambda: math_ops.reduce_sum(math_ops.square(ests - mn)) /
        (n_blocks_ - 1))

    return mn, math_ops.sqrt(var / n_blocks_)
Exemplo n.º 57
0
 def loop_fn(i):
   a = array_ops.gather(x, i)
   return math_ops.cumsum(
       a, axis=axis, exclusive=exclusive, reverse=reverse)
Exemplo n.º 58
0
def indicators_to_sparse_ids(indicators, ignore_value=None, dtype=dtypes.int64):
  """Convert a dense indicator tensor to sparse IDs.

  This is commonly used for converting a dense classification label to sparse.
  In the following example, we have an input of shape (2, 2, num_classes),
  where num_classes=4.

  indicators = [
    [[0, 0, 1, 0], [0, 0, 0, 0]],
    [[1, 0, 1, 1], [0, 0, 1, 0]],
  ]
  indicator_to_sparse_ids(indicators) => [
    [[2], []],
    [[0, 2, 3], [2]],
  ]

  Args:
    indicators: Dense `Tensor` of shape `(d0, ..., dn, num_classes)`. This must
      have a statically defined rank. `ignore_value` values are ignored. For
      other values (typically, ones), the index along the last dimension is
      returned.
    ignore_value: Entries in `indicators` equal to this value will be
      absent from the returned `SparseTensor`. If `None`, default value of
      `indicators` dtype will be used (e.g. '' for `str`, 0 for `int`).
    dtype: Type of result, must be integer type.

  Returns:
    `tf.int64` `SparseTensor` of shape `(d0, ..., dn, max_num_labels)`,
      where `max_num_labels` is the maximum number of non-zero values in any
      row (in the example above, row (1, 1) has 3 non-zero values, so the result
      shape is (2, 2, 3)). The values of this `SparseTensor` are in the range
      `[0, num_classes)` and correspond to the index of non-empty values along
      the last dimension of `indicators`.

  Raises:
    ValueError: if `dtype` is not integer.
  """
  if not dtype.is_integer:
    raise ValueError("Invalid dtype {} not integer.".format(dtype))
  with ops.name_scope(
      None, "indicators_to_sparse_ids", (indicators, ignore_value)):
    # Convert indicators to binary ones and zeros. We use int64 since
    # SparseTensor requires int64 indices.
    indicators = ops.convert_to_tensor(indicators, name="indicators")
    if ignore_value is None:
      ignore_value = _ignore_value(indicators.dtype)
    missing_indicators = math_ops.equal(
        indicators, ignore_value, name="missing")
    zeros_like_indicators = array_ops.zeros_like(
        indicators, dtype=dtypes.int64, name="zeros")
    binary_indicators = array_ops.where(
        missing_indicators, zeros_like_indicators,
        array_ops.ones_like(indicators, dtype=dtypes.int64, name="ones"),
        name="binary_indicators")

    # Use cumsum along the last dimension to generate per-row indexes.
    # Note that these are 1-based (since 0 indicates missing values), so they're
    # off-by-1 from the actual indices. We'll subtract 1 below. Since they're
    # off-by-one, the max value is the size of last dimension (i.e.,
    # last_index + 1).
    row_index_indicators = array_ops.where(
        missing_indicators, zeros_like_indicators,
        math_ops.cumsum(binary_indicators, axis=-1), "row_index_indicators")
    result_last_dim = array_ops.reshape(
        math_ops.reduce_max(row_index_indicators), shape=(1,),
        name="result_last_dim")

    # Convert to a SparseTensor. The values of this SparseTensor are the last
    # indices of our result, and the last indices of this SparseTensor (i.e.,
    # the class IDs indicated by `indicators`) are the values of our result, so
    # we use unstack/stack to swap them.
    sparse_row_index_indicators = dense_to_sparse_tensor(
        row_index_indicators, ignore_value=0)
    index_columns = array_ops.unstack(
        sparse_row_index_indicators.indices, axis=1)
    return sparse_tensor.SparseTensor(
        indices=array_ops.stack(
            index_columns[0:-1] + [sparse_row_index_indicators.values - 1],
            axis=1, name="indices"),
        values=math_ops.cast(index_columns[-1], dtype=dtype, name="values"),
        dense_shape=array_ops.concat(
            (sparse_row_index_indicators.dense_shape[0:-1], result_last_dim),
            axis=0, name="dense_shape"))
Exemplo n.º 59
0
 def loop_fn(i):
     a = array_ops.gather(x, i)
     return math_ops.cumsum(a,
                            axis=axis,
                            exclusive=exclusive,
                            reverse=reverse)
Exemplo n.º 60
0
def lengths_to_splits(lengths):
  """Returns splits corresponding to the given lengths."""
  return array_ops.concat([[0], math_ops.cumsum(lengths)], axis=-1)