Пример #1
0
 def _SparseTensor_2x6(self):
   ind = np.array([[0, 0], [1, 0], [1, 3], [1, 4]])
   val = np.array([0, 10, 13, 14])
   shape = np.array([2, 6])
   return ops.SparseTensor(
       constant_op.constant(ind, dtypes.int64),
       constant_op.constant(val, dtypes.int32),
       constant_op.constant(shape, dtypes.int64))
Пример #2
0
 def _SparseTensor_2x3x4(self, dtype):
     ind = np.array([[0, 0, 1], [0, 1, 0], [0, 1, 2], [1, 0, 3], [1, 1, 1],
                     [1, 1, 3], [1, 2, 2]])
     val = np.array([1, 10, 12, 103, 111, 113, 122])
     shape = np.array([2, 3, 4])
     return ops.SparseTensor(constant_op.constant(ind, types.int64),
                             constant_op.constant(val, dtype),
                             constant_op.constant(shape, types.int64))
Пример #3
0
 def testSparseTensor(self):
     g0 = ops.Graph()
     a = g0.create_op("a", [], [dtypes.float32])
     b = g0.create_op("b", [], [dtypes.float32])
     sparse = ops.SparseTensor(_apply_op(g0, "const", [], [dtypes.int64]),
                               _apply_op(g0, "const", [], [dtypes.float32]),
                               _apply_op(g0, "const", [], [dtypes.int64]))
     self._testGraphElements([a, sparse, b])
Пример #4
0
def _SparseTensorDenseMatMulGrad(op, grad):
    """Gradients for the dense tensor in the SparseTensorDenseMatMul op.

  If either input is complex, no gradient is provided.

  Args:
    op: the SparseTensorDenseMatMul op
    grad: the incoming gradient

  Returns:
    Gradient for each of the 4 input tensors:
      (sparse_indices, sparse_values, sparse_shape, dense_tensor)
    The gradients for indices and shape are None.

  Raises:
    TypeError: When the two operands don't have the same type.
  """
    sp_t = ops.SparseTensor(*op.inputs[:3])
    adj_a = op.get_attr("adjoint_a")
    adj_b = op.get_attr("adjoint_b")

    a_type = sp_t.values.dtype.base_dtype
    b_type = op.inputs[3].dtype.base_dtype
    if a_type != b_type:
        raise TypeError(
            "SparseTensorDenseMatMul op received operands with "
            "different types: ", a_type, " and ", b_type)
    if a_type in (ops.dtypes.complex64, ops.dtypes.complex128):
        raise NotImplementedError(
            "SparseTensorDenseMatMul op does not support "
            "complex gradients.")

    # gradient w.r.t. dense
    b_grad = sparse_ops.sparse_tensor_dense_matmul(sp_t,
                                                   grad,
                                                   adjoint_a=not adj_a)
    if adj_b:
        b_grad = array_ops.transpose(b_grad)

    # gradient w.r.t. sparse values
    a_indices = op.inputs[0]
    b = op.inputs[3]

    rows = a_indices[:, 0]
    cols = a_indices[:, 1]

    # TODO(zongheng, ebrevdo): add conjugates in the right places when complex
    # values are allowed.
    # TODO(zongheng): these gather calls could potentially duplicate rows/cols in
    # memory.  If there is a need, we should look into implementing this more
    # intelligently to avoid duplicating data.
    parts_a = array_ops.gather(grad, rows if not adj_a else cols)
    parts_b = array_ops.gather(b if not adj_b else array_ops.transpose(b),
                               cols if not adj_a else rows)
    a_values_grad = math_ops.reduce_sum(parts_a * parts_b, reduction_indices=1)

    # gradients w.r.t. (a_indices, a_values, a_shape, b)
    return (None, a_values_grad, None, b_grad)
Пример #5
0
 def _SparseTensor_3x50(self, indices_dtype, values_dtype):
   ind = np.array([
       [0, 0],
       [1, 0], [1, 1], [1, 2],
       [2, 0], [2, 1]])
   # NB: these are not sorted
   indices = np.array([0, 13, 10, 14, 32, 33])
   values = np.array([-3, 4, 1, 1, 5, 9])
   shape = np.array([3, 3])
   indices = ops.SparseTensor(
       constant_op.constant(ind, dtypes.int64),
       constant_op.constant(indices, indices_dtype),
       constant_op.constant(shape, dtypes.int64))
   values = ops.SparseTensor(
       constant_op.constant(ind, dtypes.int64),
       constant_op.constant(values, values_dtype),
       constant_op.constant(shape, dtypes.int64))
   return indices, values
Пример #6
0
def confusion_matrix(predictions, labels, num_classes=None, name=None):
  """Computes the confusion matrix from predictions and labels.

  Calculate the Confusion Matrix for a pair of prediction and
  label 1-D int arrays.

  Considering a prediction array such as: `[1, 2, 3]`
  And a label array such as: `[2, 2, 3]`

  The confusion matrix returned would be the following one:
      [[0, 0, 0]
       [0, 1, 0]
       [0, 1, 0]
       [0, 0, 1]]

  Where the matrix rows represent the prediction labels and the columns
  represents the real labels. The confusion matrix is always a 2-D array
  of shape [n, n], where n is the number of valid labels for a given
  classification task. Both prediction and labels must be 1-D arrays of
  the same shape in order for this function to work.

  Args:
    predictions: A 1-D array represeting the predictions for a given
                 classification.
    labels: A 1-D represeting the real labels for the classification task.
    num_classes: The possible number of labels the classification task can
                 have. If this value is not provided, it will be calculated
                 using both predictions and labels array.
    name: Scope name.

  Returns:
    A l X l matrix represeting the confusion matrix, where l in the number of
    possible labels in the classification task.

  Raises:
    ValueError: If both predictions and labels are not 1-D vectors and do not
                have the same size.
  """
  with ops.op_scope([predictions, labels, num_classes], name,
                    'confusion_matrix') as name:
    predictions, labels = metric_ops.remove_squeezable_dimensions(
        ops.convert_to_tensor(
            predictions, name='predictions', dtype=dtypes.int64),
        ops.convert_to_tensor(labels, name='labels', dtype=dtypes.int64))

    if num_classes is None:
      num_classes = math_ops.maximum(math_ops.reduce_max(predictions),
                                     math_ops.reduce_max(labels)) + 1

    shape = array_ops.pack([num_classes, num_classes])
    indices = array_ops.transpose(array_ops.pack([predictions, labels]))
    values = array_ops.ones_like(predictions, dtype=dtypes.int32)
    cm_sparse = ops.SparseTensor(
        indices=indices, values=values, shape=shape)
    zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtypes.int32)

    return sparse_ops.sparse_add(zero_matrix, cm_sparse)
Пример #7
0
 def _SparseTensor_2x3x4(self, dtype):
     # Includes two entries with the form [1, 1, x] : 150.
     ind = np.array([[0, 0, 1], [0, 1, 0], [0, 1, 2], [1, 0, 3], [1, 1, 0],
                     [1, 1, 1], [1, 1, 2], [1, 2, 2]])
     val = np.array([1, 10, 12, 103, 150, 149, 150, 122])
     shape = np.array([2, 3, 4])
     return ops.SparseTensor(constant_op.constant(ind, dtypes.int64),
                             constant_op.constant(val, dtype),
                             constant_op.constant(shape, dtypes.int64))
Пример #8
0
 def insert_transformed_feature(self, columns_to_tensors):
   """Handles sparse column to id conversion."""
   sparse_id_values = string_ops.string_to_hash_bucket(
       columns_to_tensors[self.name].values,
       self.bucket_size,
       name=self.name + "_lookup")
   columns_to_tensors[self] = ops.SparseTensor(
       columns_to_tensors[self.name].indices, sparse_id_values,
       columns_to_tensors[self.name].shape)
Пример #9
0
  def testBasic(self):
    with self.test_session(use_gpu=False):
      # 1-D, values at index 0.
      sp_zero = ops.SparseTensor([[0]], [0], [7])
      sp_one = ops.SparseTensor([[0]], [1], [7])
      max_tf = tf.sparse_maximum(sp_zero, sp_one).eval()
      min_tf = tf.sparse_minimum(sp_zero, sp_one).eval()
      self._assertSparseTensorValueEqual(sp_one.eval(), max_tf)
      self._assertSparseTensorValueEqual(sp_zero.eval(), min_tf)

      # Values at different indices.
      sp_zero = ops.SparseTensor([[0]], [0], [7])
      sp_zero_2 = ops.SparseTensor([[1]], [0], [7])
      expected = ops.SparseTensor([[0], [1]], [0, 0], [7])
      max_tf = tf.sparse_maximum(sp_zero, sp_zero_2).eval()
      min_tf = tf.sparse_minimum(sp_zero, sp_zero_2).eval()
      self._assertSparseTensorValueEqual(expected.eval(), max_tf)
      self._assertSparseTensorValueEqual(expected.eval(), min_tf)
Пример #10
0
 def _apply_transform(self, input_tensors, **kwargs):
   input_tensor = input_tensors[0]
   if isinstance(input_tensor, ops.SparseTensor):
     result = ops.SparseTensor(input_tensor.indices,
                               operation(input_tensor.values),
                               input_tensor.shape)
   else:
     result = operation(input_tensor)
   # pylint: disable=not-callable
   return self.return_type(result)
Пример #11
0
def _sparsify(x, thresh=0.5, index_dtype=np.int64):
  x[x < thresh] = 0

  non_zero = np.where(x)
  x_indices = np.vstack(non_zero).astype(index_dtype).T
  x_values = x[non_zero]
  x_shape = x.shape

  return ops.SparseTensor(
      indices=x_indices, values=x_values, shape=x_shape), len(x_values)
Пример #12
0
def sparse_add(sp_a, sp_b, thresh=0):
    """Adds two `SparseTensor` objects to produce another `SparseTensor`.

  The input `SparseTensor` objects' indices are assumed ordered in standard
  lexicographic order.  If this is not the case, before this step run
  `SparseReorder` to restore index ordering.

  By default, if two values sum to zero at some index, the output `SparseTensor`
  would still include that particular location in its index, storing a zero in
  the corresponding value slot.  To override this, callers can specify `thresh`,
  indicating that if the sum has a magnitude strictly smaller than `thresh`, its
  corresponding value and index would then not be included.  In particular,
  `thresh == 0.0` (default) means everything is kept and actual thresholding
  happens only for a positive value.

  For example, suppose the logical sum is (densified):

      [       2]
      [.1      ]
      [ 6   -.2]

  Then,

      - thresh == 0 (the default): all 4 index/value pairs will be returned.
      - thresh == 0.11: only .1 will vanish, and the remaining three index/value
                        pairs will be returned.
      - thresh == 0.21: both .1 and -.2 will vanish.

  Args:
    sp_a: The first input `SparseTensor`.
    sp_b: The second input `SparseTensor`.
    thresh: A 0-D `Tensor`.  The magnitude threshold that determines if an
    output value/index pair takes space.  Its dtype should match that of the
    values if they are real; if the latter are complex64/complex128, then the
    dtype should be float32/float64, correspondingly.

  Returns:
    A `SparseTensor` with the same shape, representing the sum.

  Raises:
    TypeError: If either `sp_a` or `sp_b` is not a `SparseTensor`.
  """
    if not all(
            isinstance(sp_input, ops.SparseTensor)
            for sp_input in [sp_a, sp_b]):
        raise TypeError("All inputs must be SparseTensors")

    thresh = ops.convert_to_tensor(thresh,
                                   dtype=sp_a.values.dtype.real_dtype,
                                   name="thresh")
    output_ind, output_val, output_shape = (gen_sparse_ops._sparse_add(
        sp_a.indices, sp_a.values, sp_a.shape, sp_b.indices, sp_b.values,
        sp_b.shape, thresh))

    return ops.SparseTensor(output_ind, output_val, output_shape)
Пример #13
0
 def _SparseTensor_String5x6(self):
   ind = np.array([
       [0, 0],
       [1, 0], [1, 3], [1, 4],
       [3, 2], [3, 3]])
   val = np.array(["a", "b", "c", "d", "e", "f"])
   shape = np.array([5, 6])
   return ops.SparseTensor(
       constant_op.constant(ind, dtypes.int64),
       constant_op.constant(val, dtypes.string),
       constant_op.constant(shape, dtypes.int64))
Пример #14
0
def _sparse_to_sparse_shape(op):
    """Shapes for `SparseTensor` result given 2 sparse inputs.

  Args:
    op: Operation with 2 `SparseTensor` inputs.

  Returns:
    Tuple of three shapes corresponding to the indices, values, and shape
    `Tensor` components of the result `SparseTensor`.
  """
    # The following should stay in sync with `ComputeSparseToSparse` shape
    # assertions in kernels/set_kernels.cc.
    # Assert valid dimensions for the 3 `Tensor` components of `SparseTensor`.
    ops.SparseTensor(op.inputs[0], op.inputs[1], op.inputs[2])
    ops.SparseTensor(op.inputs[3], op.inputs[4], op.inputs[5])

    indices_shape = tensor_shape.unknown_shape(2)
    values_shape = tensor_shape.unknown_shape(1)
    shape_shape = tensor_shape.unknown_shape(1)
    return (indices_shape, values_shape, shape_shape)
Пример #15
0
 def _SparseTensor_5x6(self, dtype):
   ind = np.array([
       [0, 0],
       [1, 0], [1, 3], [1, 4],
       [3, 2], [3, 3]])
   val = np.array([0, 10, 13, 14, 32, 33])
   shape = np.array([5, 6])
   return ops.SparseTensor(
       constant_op.constant(ind, dtypes.int64),
       constant_op.constant(val, dtype),
       constant_op.constant(shape, dtypes.int64))
Пример #16
0
def ctc_beam_search_decoder(inputs,
                            sequence_length,
                            beam_width=100,
                            top_paths=1,
                            merge_repeated=True):
    """Performs beam search decoding on the logits given in input.

  **Note** The `ctc_greedy_decoder` is a special case of the
  `ctc_beam_search_decoder` with `top_paths=1` (but that decoder is faster
  for this special case).

  If `merge_repeated` is `True`, merge repeated classes in the output beams.
  This means that if consecutive entries in a beam are the same,
  only the first of these is emitted.  That is, when the top path
  is `A B B B B`, the return value is:

    * `A B` if `merge_repeated = True`.
    * `A B B B B` if `merge_repeated = False`.

  Args:
    inputs: 3-D `float` `Tensor`, size
      `[max_time x batch_size x num_classes]`.  The logits.
    sequence_length: 1-D `int32` vector containing sequence lengths,
      having size `[batch_size]`.
    beam_width: An int scalar >= 0 (beam search beam width).
    top_paths: An int scalar >= 0, <= beam_width (controls output size).
    merge_repeated: Boolean.  Default: True.

  Returns:
    A tuple `(decoded, log_probabilities)` where
    decoded: A list of length top_paths, where `decoded[j]`
      is a `SparseTensor` containing the decoded outputs:
      `decoded[j].indices`: Indices matrix `(total_decoded_outputs[j] x 2)`
        The rows store: [batch, time].
      `decoded[j].values`: Values vector, size `(total_decoded_outputs[j])`.
        The vector stores the decoded classes for beam j.
      `decoded[j].shape`: Shape vector, size `(2)`.
        The shape values are: `[batch_size, max_decoded_length[j]]`.
    log_probability: A `float` matrix `(batch_size x top_paths)` containing
        sequence log-probabilities.
  """

    decoded_ixs, decoded_vals, decoded_shapes, log_probabilities = (
        gen_ctc_ops._ctc_beam_search_decoder(inputs,
                                             sequence_length,
                                             beam_width=beam_width,
                                             top_paths=top_paths,
                                             merge_repeated=merge_repeated))

    return ([
        ops.SparseTensor(ix, val, shape)
        for (ix, val, shape) in zip(decoded_ixs, decoded_vals, decoded_shapes)
    ], log_probabilities)
Пример #17
0
 def _SparseTensor_3x50(self, indices_dtype, values_dtype):
   # NOTE: This input is intentionally not sorted to validate the
   # already_sorted flag below.
   ind = np.array([
       [0, 0],
       [1, 0], [1, 2],
       [2, 0], [2, 1],
       [1, 1]])
   # NB: these are not sorted
   indices = np.array([0, 13, 10, 33, 32, 14])
   values = np.array([-3, 4, 1, 9, 5, 1])
   shape = np.array([3, 3])
   indices = ops.SparseTensor(
       constant_op.constant(ind, dtypes.int64),
       constant_op.constant(indices, indices_dtype),
       constant_op.constant(shape, dtypes.int64))
   values = ops.SparseTensor(
       constant_op.constant(ind, dtypes.int64),
       constant_op.constant(values, values_dtype),
       constant_op.constant(shape, dtypes.int64))
   return indices, values
Пример #18
0
 def _dense_to_sparse_tensor(dense_tensor):
     """Returns a SparseTensor for the input dense_tensor."""
     ignore_value = 0.0
     sparse_indices = array_ops.where(
         math_ops.not_equal(
             dense_tensor,
             math_ops.cast(ignore_value, dense_tensor.dtype)))
     sparse_values = array_ops.gather_nd(dense_tensor, sparse_indices)
     # SparseTensor needs the shape to be converted to int64.
     int64_shape = math_ops.to_int64(array_ops.shape(dense_tensor))
     return ops.SparseTensor(sparse_indices,
                             sparse_values,
                             shape=int64_shape)
Пример #19
0
 def testFetchSparseTensor(self):
   with session.Session() as s:
     indices = np.array([[3, 2, 0], [4, 5, 1]]).astype(np.int64)
     values = np.array([1.0, 2.0]).astype(np.float32)
     shape = np.array([7, 9, 2]).astype(np.int64)
     sp = ops.SparseTensor(
         constant_op.constant(indices),
         constant_op.constant(values),
         constant_op.constant(shape))
     # Single fetch, use as tuple
     sp_out = s.run(sp)
     indices_out, values_out, shape_out = sp_out
     self.assertAllEqual(indices_out, indices)
     self.assertAllEqual(values_out, values)
     self.assertAllEqual(shape_out, shape)
     # Single fetch, use as SparseTensorValue
     sp_out = s.run(sp)
     self.assertAllEqual(sp_out.indices, indices)
     self.assertAllEqual(sp_out.values, values)
     self.assertAllEqual(sp_out.shape, shape)
     # Tuple fetch, use as tuple
     indices_out, values_out, shape_out = s.run(sp)
     self.assertAllEqual(indices_out, indices)
     self.assertAllEqual(values_out, values)
     self.assertAllEqual(shape_out, shape)
     # List fetch, use as tuple
     (indices_out, values_out, shape_out), = s.run([sp])
     self.assertAllEqual(indices_out, indices)
     self.assertAllEqual(values_out, values)
     self.assertAllEqual(shape_out, shape)
     # List fetch, use as SparseTensorValue
     sp_out, = s.run([sp])
     self.assertAllEqual(sp_out.indices, indices)
     self.assertAllEqual(sp_out.values, values)
     self.assertAllEqual(sp_out.shape, shape)
     # Dict fetch (single value), use as tuple
     indices_out, values_out, shape_out = s.run({'sp': sp})['sp']
     self.assertAllEqual(indices_out, indices)
     self.assertAllEqual(values_out, values)
     self.assertAllEqual(shape_out, shape)
     # Dict fetch (list value), use as tuple
     (indices_out, values_out, shape_out), = s.run({'sp': [sp]})['sp']
     self.assertAllEqual(indices_out, indices)
     self.assertAllEqual(values_out, values)
     self.assertAllEqual(shape_out, shape)
     # Dict fetch, use as SparseTensorValue
     sp_out = s.run({'sp': sp})['sp']
     self.assertAllEqual(sp_out.indices, indices)
     self.assertAllEqual(sp_out.values, values)
     self.assertAllEqual(sp_out.shape, shape)
Пример #20
0
def sparse_split(split_dim, num_split, sp_input, name=None):
    """Split a `SparseTensor` into `num_split` tensors along `split_dim`.

  If the `sp_input.shape[split_dim]` is not an integer multiple of `num_split`
  each slice starting from 0:`shape[split_dim] % num_split` gets extra one
  dimension. For example, if `split_dim = 1` and `num_split = 2` and the
  input is:

      input_tensor = shape = [2, 7]
      [    a   d e  ]
      [b c          ]

  Graphically the output tensors are:

      output_tensor[0] =
      [    a ]
      [b c   ]

      output_tensor[1] =
      [ d e  ]
      [      ]

  Args:
    split_dim: A 0-D `int32` `Tensor`. The dimension along which to split.
    num_split: A Python integer. The number of ways to split.
    sp_input: The `SparseTensor` to split.
    name: A name for the operation (optional).

  Returns:
    `num_split` `SparseTensor` objects resulting from splitting `value`.

  Raises:
    TypeError: If `sp_input` is not a `SparseTensor`.
  """
    if not isinstance(sp_input, ops.SparseTensor):
        raise TypeError("Input must be a SparseTensor")

    output_inds, output_vals, output_shapes = (gen_sparse_ops._sparse_split(
        split_dim,
        sp_input.indices,
        sp_input.values,
        sp_input.shape,
        num_split,
        name=name))
    sparse_tensors = []
    for i in range(0, num_split):
        sparse_tensors.append(
            ops.SparseTensor(output_inds[i], output_vals[i], output_shapes[i]))
    return sparse_tensors
Пример #21
0
def _set_operation(a, b, set_operation, validate_indices=True):
    """Compute set operation of elements in last dimension of `a` and `b`.

  All but the last dimension of `a` and `b` must match.

  Args:
    a: `Tensor` or `SparseTensor` of the same type as `b`. If sparse, indices
        must be sorted in row-major order.
    b: `Tensor` or `SparseTensor` of the same type as `a`. Must be
        `SparseTensor` if `a` is `SparseTensor`. If sparse, indices must be
        sorted in row-major order.
    set_operation: String indicating set operaiton. See
        SetOperationOp::SetOperationFromContext for valid values.
    validate_indices: Whether to validate the order and range of sparse indices
       in `a` and `b`.

  Returns:
    A `SparseTensor` with the same rank as `a` and `b`, and all but the last
    dimension the same. Elements along the last dimension contain the results
    of the set operation.

  Raises:
    TypeError: If inputs are invalid types.
    ValueError: If `a` is sparse and `b` is dense.
  """
    a = framework.convert_to_tensor_or_sparse_tensor(a, name="a")
    if a.dtype.base_dtype not in _VALID_DTYPES:
        raise TypeError("'a' invalid dtype %s." % a.dtype)
    b = framework.convert_to_tensor_or_sparse_tensor(b, name="b")
    if b.dtype.base_dtype != a.dtype.base_dtype:
        raise TypeError("Types don't match, %s vs %s." % (a.dtype, b.dtype))
    # pylint: disable=protected-access
    if isinstance(a, ops.SparseTensor):
        if isinstance(b, ops.SparseTensor):
            indices, values, shape = _set_ops.sparse_to_sparse_set_operation(
                a.indices, a.values, a.shape, b.indices, b.values, b.shape,
                set_operation, validate_indices)
        else:
            raise ValueError(
                "Sparse,Dense is not supported, but Dense,Sparse is. "
                "Please flip the order of your inputs.")
    elif isinstance(b, ops.SparseTensor):
        indices, values, shape = _set_ops.dense_to_sparse_set_operation(
            a, b.indices, b.values, b.shape, set_operation, validate_indices)
    else:
        indices, values, shape = _set_ops.dense_to_dense_set_operation(
            a, b, set_operation, validate_indices)
    # pylint: enable=protected-access
    return ops.SparseTensor(indices, values, shape)
Пример #22
0
def string_split(source, delimiter=" "):  # pylint: disable=invalid-name
    """Split elements of `source` based on `delimiter` into a `SparseTensor`.

  Let N be the size of source (typically N will be the batch size). Split each
  element of `source` based on `delimiter` and return a `SparseTensor`
  containing the splitted tokens. Empty tokens are ignored.

  If `delimiter` is an empty string, each element of the `source` is split
  into individual 1 character strings.

  For example:
  N = 2, source[0] is 'hello world' and source[1] is 'a b c', then the output
  will be

  st.indices = [0, 0;
                0, 1;
                1, 0;
                1, 1;
                1, 2]
  st.shape = [2, 3]
  st.values = ['hello', 'world', 'a', 'b', 'c']

  Args:
    source: `1-D` string `Tensor`, the strings to split.
    delimiter: `0-D` string `Tensor`, the delimiter character, the string should
      be length 0 or 1.

  Returns:
    A `SparseTensor` of rank `2`, the strings split according to the delimiter.
    The first column of the indices corresponds to the row in `source` and the
    second column corresponds to the index of the split component in this row.

  Raises:
    ValueError: If delimiter is not a character.
  """
    if isinstance(delimiter, six.string_types) and len(delimiter) > 1:
        raise ValueError("delimiter must be a character, got %s" % delimiter)
    delimiter = ops.convert_to_tensor(delimiter, dtype=dtypes.string)
    source = ops.convert_to_tensor(source, dtype=dtypes.string)

    # pylint: disable=protected-access
    indices, values, shape = gen_string_ops._string_split(source,
                                                          delimiter=delimiter)
    # pylint: enable=protected-access
    indices.set_shape([None, 2])
    values.set_shape([None])
    shape.set_shape([2])
    return ops.SparseTensor(indices, values, shape)
Пример #23
0
def sparse_reorder(sp_input, name=None):
  """Reorders a `SparseTensor` into the canonical, row-major ordering.

  Note that by convention, all sparse ops preserve the canonical ordering
  along increasing dimension number. The only time ordering can be violated
  is during manual manipulation of the indices and values to add entries.

  Reordering does not affect the shape of the `SparseTensor`.

  For example, if sp_input has shape `[4, 5]` and `indices` / `values`:

      [0, 3]: b
      [0, 1]: a
      [3, 1]: d
      [2, 0]: c

  then the output will be a `SparseTensor` of shape `[4, 5]` and
  `indices` / `values`:

      [0, 1]: a
      [0, 3]: b
      [2, 0]: c
      [3, 1]: d

  Args:
    sp_input: The input `SparseTensor`.
    name: A name prefix for the returned tensors (optional)

  Returns:
    A `SparseTensor` with the same shape and non-empty values, but in
    canonical ordering.

  Raises:
    TypeError: If `sp_input` is not a `SparseTensor`.
  """
  if not isinstance(sp_input, ops.SparseTensor):
    raise TypeError("Input must be a SparseTensor")

  reordered_ind, reordered_val = (
      gen_sparse_ops._sparse_reorder(
          sp_input.indices,
          sp_input.values,
          sp_input.shape,
          name=name))

  return ops.SparseTensor(
      reordered_ind, reordered_val, array_ops.identity(sp_input.shape))
Пример #24
0
def _create_joint_embedding_lookup(columns_to_tensors,
                                   embedding_lookup_arguments,
                                   num_outputs,
                                   trainable,
                                   weight_collections):
  """Creates an embedding lookup for all columns sharing a single weight."""
  for arg in embedding_lookup_arguments:
    assert arg.weight_tensor is None, (
        'Joint sums for weighted sparse columns are not supported. '
        'Please use weighted_sum_from_feature_columns instead.')
    assert arg.combiner == 'sum', (
        'Combiners other than sum are not supported for joint sums. '
        'Please use weighted_sum_from_feature_columns instead.')
  assert len(embedding_lookup_arguments) >= 1, (
      'At least one column must be in the model.')
  prev_size = 0
  sparse_tensors = []
  for a in embedding_lookup_arguments:
    t = a.input_tensor
    values = t.values + prev_size
    prev_size += a.vocab_size
    sparse_tensors.append(
        ops.SparseTensor(t.indices,
                         values,
                         t.shape))
  sparse_tensor = sparse_ops.sparse_concat(1, sparse_tensors)
  with variable_scope.variable_scope(
      None, default_name='linear_weights', values=columns_to_tensors.values()):
    variable = contrib_variables.model_variable(
        name='weights',
        shape=[prev_size, num_outputs],
        dtype=dtypes.float32,
        initializer=init_ops.zeros_initializer,
        trainable=trainable,
        collections=weight_collections)
    if isinstance(variable, variables.Variable):
      variable = [variable]
    else:
      variable = variable._get_variable_list()  # pylint: disable=protected-access
    predictions = embedding_ops.safe_embedding_lookup_sparse(
        variable,
        sparse_tensor,
        sparse_weights=None,
        default_id=0,
        combiner='sum',
        name='_weights')
    return variable, predictions
Пример #25
0
def dense_to_sparse_tensor(dense_tensor, ignore_value=None):
    """Converts a dense Tensor to a SparseTensor, dropping ignore_value cells.

  Args:
    dense_tensor: A `Tensor`.
    ignore_value: Entries in `dense_tensor` equal to this value will be
      absent from the return `SparseTensor`. If `None`, default value of
      dense_tensor's dtype will be used (e.g. '' for `str`, 0 for `int`).

  Returns:
    A `SparseTensor` with the same shape as `dense_tensor`.

  Raises:
    ValueError: when `dense_tensor`'s rank is `None`.
  """
    with ops.name_scope("DenseToSparseTensor"):
        dense_t = ops.convert_to_tensor(dense_tensor)
        if dense_t.get_shape().ndims is None:
            # TODO(b/32318825): Implement dense_to_sparse_tensor for undefined rank.
            raise ValueError(
                "dense_tensor.get_shape() should be defined, got None.")
        if ignore_value is None:
            if dense_t.dtype == dtypes.string:
                # Exception due to TF strings are converted to numpy objects by default.
                ignore_value = ""
            else:
                ignore_value = dense_t.dtype.as_numpy_dtype()
        dense_shape = math_ops.cast(array_ops.shape(dense_t), dtypes.int64)
        indices = array_ops.where(
            math_ops.not_equal(dense_t,
                               math_ops.cast(ignore_value, dense_t.dtype)))
        index_dims = len(dense_t.get_shape())
        # Flattens the tensor and indices for use with gather.
        flat_tensor = array_ops.reshape(dense_t, [-1])
        flat_indices = indices[:, index_dims - 1]
        # Computes the correct flattened indices for 2d (or higher) tensors.
        if index_dims > 1:
            higher_dims = indices[:, :index_dims - 1]
            shape_multipliers = array_ops.pack(
                _multiplier_helper(array_ops.unpack(dense_shape)[1:]))
            offsets = math_ops.reduce_sum(math_ops.mul(higher_dims,
                                                       shape_multipliers),
                                          reduction_indices=[1])
            flat_indices = math_ops.add(flat_indices, offsets)
        values = array_ops.gather(flat_tensor, flat_indices)
        return ops.SparseTensor(indices, values, dense_shape)
Пример #26
0
def sparse_retain(sp_input, to_retain):
    """Retains specified non-empty values within a `SparseTensor`.

  For example, if `sp_input` has shape `[4, 5]` and 4 non-empty string values:

      [0, 1]: a
      [0, 3]: b
      [2, 0]: c
      [3, 1]: d

  and `to_retain = [True, False, False, True]`, then the output will
  be a `SparseTensor` of shape `[4, 5]` with 2 non-empty values:

      [0, 1]: a
      [3, 1]: d

  Args:
    sp_input: The input `SparseTensor` with `N` non-empty elements.
    to_retain: A bool vector of length `N` with `M` true values.

  Returns:
    A `SparseTensor` with the same shape as the input and `M` non-empty
    elements corresponding to the true positions in `to_retain`.

  Raises:
    TypeError: If `sp_input` is not a `SparseTensor`.
  """
    if not isinstance(sp_input, ops.SparseTensor):
        raise TypeError("Input must be a SparseTensor")

    to_retain = ops.convert_to_tensor(to_retain)

    # Shape checking, if shape is known at graph construction time
    retain_shape = to_retain.get_shape()
    retain_shape.assert_has_rank(1)
    sp_input.values.get_shape()[0].merge_with(retain_shape[0])

    where_true = array_ops.reshape(array_ops.where(to_retain), [-1])
    new_indices = array_ops.gather(sp_input.indices, where_true)
    new_values = array_ops.gather(sp_input.values, where_true)
    return ops.SparseTensor(new_indices, new_values,
                            array_ops.identity(sp_input.shape))
Пример #27
0
    def to_weighted_sum(self,
                        input_tensor,
                        num_outputs=1,
                        weight_collections=None,
                        trainable=True):
        """Returns a Tensor as linear predictions and a list of created Variable."""
        dimension = self.source_column.dimension
        batch_size = array_ops.shape(input_tensor)[0]

        if dimension > 1:
            i1 = array_ops.reshape(
                array_ops.tile(
                    array_ops.expand_dims(math_ops.range(0, batch_size), 1),
                    [1, dimension]), [-1])
            i2 = array_ops.tile(math_ops.range(0, dimension), [batch_size])
            # Flatten the bucket indices and unique them across dimensions
            # E.g. 2nd dimension indices will range from k to 2*k-1 with k buckets
            # TODO(chapelle): move that logic to insert_transformed_feature to ensure
            #   unique buckets across dimensions after crossing.
            bucket_indices = array_ops.reshape(input_tensor,
                                               [-1]) + self.length * i2
        else:
            # Simpler indices when dimension=1
            i1 = math_ops.range(0, batch_size)
            i2 = array_ops.zeros([batch_size], dtype=dtypes.int32)
            bucket_indices = array_ops.reshape(input_tensor, [-1])

        indices = math_ops.to_int64(
            array_ops.transpose(array_ops.pack((i1, i2))))
        shape = math_ops.to_int64(array_ops.pack([batch_size, 1]))
        sparse_id_values = ops.SparseTensor(indices, bucket_indices, shape)
        vocab_size = self.length * self.source_column.dimension

        return _create_embedding_lookup(
            input_tensor=sparse_id_values,
            vocab_size=vocab_size,
            dimension=num_outputs,
            weight_collections=_add_variable_collection(weight_collections),
            initializer=init_ops.zeros_initializer,
            combiner="sum",
            trainable=trainable,
            name=self.name + "_weights")
Пример #28
0
    def testAssertSameGraph(self):
        g0 = ops.Graph()
        a = g0.create_op("a", [], [dtypes.float32])
        b = g0.create_op("b", [], [dtypes.float32])
        ops.assert_same_graph([a, b])
        ops.assert_same_graph([a, b], g0)
        g1 = ops.Graph()
        c = g1.create_op("c", [], [dtypes.float32])
        self.assertRaises(ValueError, ops.assert_same_graph, [a, b, c])
        self.assertRaises(ValueError, ops.assert_same_graph, [c], g0)
        self.assertRaises(ValueError, ops.assert_same_graph, [a], g1)

        sparse = ops.SparseTensor(_apply_op(g0, "const", [], [dtypes.int64]),
                                  _apply_op(g0, "const", [], [dtypes.float32]),
                                  _apply_op(g0, "const", [], [dtypes.int64]))
        ops.assert_same_graph([sparse, a, b])
        ops.assert_same_graph([sparse, a, b], g0)
        self.assertRaises(ValueError, ops.assert_same_graph, [sparse, a, c])
        self.assertRaises(ValueError, ops.assert_same_graph, [sparse, a, c],
                          g1)
Пример #29
0
def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True):
    """Performs greedy decoding on the logits given in input (best path).

  Note: Regardless of the value of merge_repeated, if the maximum index of a
  given time and batch corresponds to the blank index `(num_classes - 1)`, no
  new element is emitted.

  If merge_repeated is `True`, merge repeated classes in output.
  This means that if consecutive logits' maximum indices are the same,
  only the first of these is emitted.  Labeling the blank '*', the sequence
  "A B B * B B" becomes "A B" if `merge_repeated = True` and "A B B B B"
  if `merge_repeated = False`.


  Args:
    inputs: 3-D `float` `Tensor` sized
      `[max_time x batch_size x num_classes]`.  The logits.
    sequence_length: 1-D `int32` vector containing sequence lengths,
      having size `[batch_size]`.
    merge_repeated: Boolean.  Default: True.


  Returns:
    A tuple `(decoded, log_probabilities)` where
    decoded: A single-element list. `decoded[0]`
      is an `SparseTensor` containing the decoded outputs s.t.:
      `decoded.indices`: Indices matrix `(total_decoded_outputs x 2)`.
        The rows store: `[batch, time]`.
      `decoded.values`: Values vector, size `(total_decoded_outputs)`.
        The vector stores the decoded classes.
      `decoded.shape`: Shape vector, size `(2)`.
        The shape values are: `[batch_size, max_decoded_length]`
    log_probability: A `float` matrix `(batch_size x 1)` containing sequence
        log-probabilities.
  """
    outputs = gen_ctc_ops._ctc_greedy_decoder(inputs,
                                              sequence_length,
                                              merge_repeated=merge_repeated)
    (decoded_ix, decoded_val, decoded_shape, log_probabilities) = outputs
    return ([ops.SparseTensor(decoded_ix, decoded_val,
                              decoded_shape)], log_probabilities)
Пример #30
0
    def lookup(self, keys, name=None):
        """Looks up `keys` in a table, outputs the corresponding values.

    The `default_value` is used for keys not present in the table.

    Args:
      keys: Keys to look up. May be either a `SparseTensor` or dense `Tensor`.
      name: A name for the operation (optional).

    Returns:
      A `SparseTensor` if keys are sparse, otherwise a dense `Tensor`.

    Raises:
      TypeError: when `keys` or `default_value` doesn't match the table data
        types.
    """
        if name is None:
            name = "%s_lookup_table_find" % self._name

        key_tensor = keys
        if isinstance(keys, ops.SparseTensor):
            key_tensor = keys.values

        if keys.dtype != self._key_dtype:
            raise TypeError(
                "Signature mismatch. Keys must be dtype %s, got %s." %
                (self._key_dtype, keys.dtype))

        # pylint: disable=protected-access
        values = gen_data_flow_ops._lookup_table_find(self._table_ref,
                                                      key_tensor,
                                                      self._default_value,
                                                      name=name)
        # pylint: enable=protected-access

        values.set_shape(key_tensor.get_shape())
        if isinstance(keys, ops.SparseTensor):
            return ops.SparseTensor(keys.indices, values, keys.shape)
        else:
            return values