Пример #1
0
 def _SparseTensor_2x5x6(self):
   return sparse_tensor.SparseTensor(
       constant_op.constant(self._IND_2_5_6, dtypes.int64),
       constant_op.constant(self._VAL_2_5_6, dtypes.int32),
       constant_op.constant(self._SHP_2_5_6, dtypes.int64))
Пример #2
0
 def sparse_lookup():
     sp_ids = sparse_tensor.SparseTensor(indices=[[0, 0], [0, 1],
                                                  [1, 0], [2, 2]],
                                         values=[0, 3, 4, 1],
                                         dense_shape=[3, 3])
     return embedding_ops.embedding_lookup_sparse_v2(sv, sp_ids, None)
Пример #3
0
 def make_sparse(_):
     return sparse_tensor.SparseTensor(
         indices=array_ops.constant([[0, 0], [1, 0], [1, 1]],
                                    dtype=dtypes.int64),
         values=array_ops.constant([0, 0, 1], dtype=dtypes.int32),
         dense_shape=array_ops.constant([2, 2], dtype=dtypes.int64))
Пример #4
0
 def call(self, inputs):
     indices = array_ops.where_v2(math_ops.not_equal(inputs, 0))
     values = array_ops.gather_nd(inputs, indices)
     shape = array_ops.shape(inputs, out_type=dtypes.int64)
     return sparse_tensor.SparseTensor(indices, values, dense_shape=shape)
Пример #5
0
  def _test_set_intersection_3d(self, dtype, invalid_indices=False):
    if invalid_indices:
      indices = constant_op.constant(
          [
              [0, 1, 0],
              [0, 1, 1],  # 0,1
              [1, 0, 0],  # 1,0
              [1, 1, 0],
              [1, 1, 1],
              [1, 1, 2],  # 1,1
              [0, 0, 0],
              [0, 0, 2],  # 0,0
              # 2,0
              [2, 1, 1]  # 2,1
              # 3,*
          ],
          dtypes.int64)
    else:
      indices = constant_op.constant(
          [
              [0, 0, 0],
              [0, 0, 2],  # 0,0
              [0, 1, 0],
              [0, 1, 1],  # 0,1
              [1, 0, 0],  # 1,0
              [1, 1, 0],
              [1, 1, 1],
              [1, 1, 2],  # 1,1
              # 2,0
              [2, 1, 1]  # 2,1
              # 3,*
          ],
          dtypes.int64)
    sp_a = sparse_tensor_lib.SparseTensor(
        indices,
        _constant(
            [
                1,
                9,  # 0,0
                3,
                3,  # 0,1
                1,  # 1,0
                9,
                7,
                8,  # 1,1
                # 2,0
                5  # 2,1
                # 3,*
            ],
            dtype),
        constant_op.constant([4, 2, 3], dtypes.int64))
    sp_b = sparse_tensor_lib.SparseTensor(
        constant_op.constant(
            [
                [0, 0, 0],
                [0, 0, 3],  # 0,0
                # 0,1
                [1, 0, 0],  # 1,0
                [1, 1, 0],
                [1, 1, 1],  # 1,1
                [2, 0, 1],  # 2,0
                [2, 1, 1],  # 2,1
                [3, 0, 0],  # 3,0
                [3, 1, 0]  # 3,1
            ],
            dtypes.int64),
        _constant(
            [
                1,
                3,  # 0,0
                # 0,1
                3,  # 1,0
                7,
                8,  # 1,1
                2,  # 2,0
                5,  # 2,1
                4,  # 3,0
                4  # 3,1
            ],
            dtype),
        constant_op.constant([4, 2, 4], dtypes.int64))

    if invalid_indices:
      with self.assertRaisesRegexp(errors_impl.OpError, "out of order"):
        self._set_intersection(sp_a, sp_b)
    else:
      expected_indices = [
          [0, 0, 0],  # 0,0
          # 0,1
          # 1,0
          [1, 1, 0],
          [1, 1, 1],  # 1,1
          # 2,0
          [2, 1, 0],  # 2,1
          # 3,*
      ]
      expected_values = _values(
          [
              1,  # 0,0
              # 0,1
              # 1,0
              7,
              8,  # 1,1
              # 2,0
              5,  # 2,1
              # 3,*
          ],
          dtype)
      expected_shape = [4, 2, 2]
      expected_counts = [
          [
              1,  # 0,0
              0  # 0,1
          ],
          [
              0,  # 1,0
              2  # 1,1
          ],
          [
              0,  # 2,0
              1  # 2,1
          ],
          [
              0,  # 3,0
              0  # 3,1
          ]
      ]

      # Sparse to sparse.
      intersection = self._set_intersection(sp_a, sp_b)
      self._assert_set_operation(
          expected_indices,
          expected_values,
          expected_shape,
          intersection,
          dtype=dtype)
      self.assertAllEqual(expected_counts,
                          self._set_intersection_count(sp_a, sp_b))

      # NOTE: sparse_to_dense doesn't support uint8 and uint16.
      if dtype not in [dtypes.uint8, dtypes.uint16]:
        # Dense to sparse.
        a = math_ops.cast(
            sparse_ops.sparse_to_dense(
                sp_a.indices,
                sp_a.dense_shape,
                sp_a.values,
                default_value="-1" if dtype == dtypes.string else -1),
            dtype=dtype)
        intersection = self._set_intersection(a, sp_b)
        self._assert_set_operation(
            expected_indices,
            expected_values,
            expected_shape,
            intersection,
            dtype=dtype)
        self.assertAllEqual(expected_counts,
                            self._set_intersection_count(a, sp_b))

        # Dense to dense.
        b = math_ops.cast(
            sparse_ops.sparse_to_dense(
                sp_b.indices,
                sp_b.dense_shape,
                sp_b.values,
                default_value="-2" if dtype == dtypes.string else -2),
            dtype=dtype)
        intersection = self._set_intersection(a, b)
        self._assert_set_operation(
            expected_indices,
            expected_values,
            expected_shape,
            intersection,
            dtype=dtype)
        self.assertAllEqual(expected_counts, self._set_intersection_count(a, b))
Пример #6
0
  def testIncompatibleStructure(self):
    # Define three mutually incompatible values/structures, and assert that:
    # 1. Using one structure to flatten a value with an incompatible structure
    #    fails.
    # 2. Using one structure to restructre a flattened value with an
    #    incompatible structure fails.
    value_tensor = constant_op.constant(42.0)
    s_tensor = structure.Structure.from_value(value_tensor)
    flat_tensor = s_tensor._to_tensor_list(value_tensor)

    value_sparse_tensor = sparse_tensor.SparseTensor(
        indices=[[0, 0]], values=[1], dense_shape=[1, 1])
    s_sparse_tensor = structure.Structure.from_value(value_sparse_tensor)
    flat_sparse_tensor = s_sparse_tensor._to_tensor_list(value_sparse_tensor)

    value_nest = {
        "a": constant_op.constant(37.0),
        "b": constant_op.constant([1, 2, 3])
    }
    s_nest = structure.Structure.from_value(value_nest)
    flat_nest = s_nest._to_tensor_list(value_nest)

    with self.assertRaisesRegexp(
        ValueError, r"SparseTensor.* is not convertible to a tensor with "
        r"dtype.*float32.* and shape \(\)"):
      s_tensor._to_tensor_list(value_sparse_tensor)
    with self.assertRaisesRegexp(
        ValueError, r"Value \{.*\} is not convertible to a tensor with "
        r"dtype.*float32.* and shape \(\)"):
      s_tensor._to_tensor_list(value_nest)

    with self.assertRaisesRegexp(TypeError, "Input must be a SparseTensor"):
      s_sparse_tensor._to_tensor_list(value_tensor)

    with self.assertRaisesRegexp(TypeError, "Input must be a SparseTensor"):
      s_sparse_tensor._to_tensor_list(value_nest)

    with self.assertRaisesRegexp(
        ValueError, "Tensor.* not compatible with the nested structure "
        ".*TensorStructure.*TensorStructure"):
      s_nest._to_tensor_list(value_tensor)

    with self.assertRaisesRegexp(
        ValueError, "SparseTensor.* not compatible with the nested structure "
        ".*TensorStructure.*TensorStructure"):
      s_nest._to_tensor_list(value_sparse_tensor)

    with self.assertRaisesRegexp(
        ValueError, r"Cannot convert.*with dtype.*float32.* and shape \(\)"):
      s_tensor._from_tensor_list(flat_sparse_tensor)

    with self.assertRaisesRegexp(
        ValueError, "TensorStructure corresponds to a single tf.Tensor."):
      s_tensor._from_tensor_list(flat_nest)

    with self.assertRaisesRegexp(
        ValueError, "SparseTensorStructure corresponds to a single tf.variant "
        "vector of length 3."):
      s_sparse_tensor._from_tensor_list(flat_tensor)

    with self.assertRaisesRegexp(
        ValueError, "SparseTensorStructure corresponds to a single tf.variant "
        "vector of length 3."):
      s_sparse_tensor._from_tensor_list(flat_nest)

    with self.assertRaisesRegexp(
        ValueError, "Expected 2 flat values in NestedStructure but got 1."):
      s_nest._from_tensor_list(flat_tensor)

    with self.assertRaisesRegexp(
        ValueError, "Expected 2 flat values in NestedStructure but got 1."):
      s_nest._from_tensor_list(flat_sparse_tensor)
Пример #7
0
class StructureTest(test.TestCase, parameterized.TestCase):

  # NOTE(mrry): The arguments must be lifted into lambdas because otherwise they
  # will be executed before the (eager- or graph-mode) test environment has been
  # set up.
  # pylint: disable=g-long-lambda,protected-access
  @parameterized.parameters(
      (lambda: constant_op.constant(37.0), structure.TensorStructure,
       [dtypes.float32], [[]]),
      (lambda: sparse_tensor.SparseTensor(
          indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),
       structure.SparseTensorStructure, [dtypes.variant], [[3]]),
      (lambda: (constant_op.constant(37.0), constant_op.constant([1, 2, 3])),
       structure.NestedStructure, [dtypes.float32, dtypes.int32], [[], [3]]),
      (lambda: {
          "a": constant_op.constant(37.0),
          "b": constant_op.constant([1, 2, 3])
      }, structure.NestedStructure, [dtypes.float32, dtypes.int32], [[], [3]]),
      (lambda: {
          "a": constant_op.constant(37.0),
          "b": (sparse_tensor.SparseTensor(
              indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
                sparse_tensor.SparseTensor(
                    indices=[[3, 4]], values=[-1], dense_shape=[4, 5]))
      }, structure.NestedStructure,
       [dtypes.float32, dtypes.variant, dtypes.variant], [[], [3], [3]]))
  def testFlatStructure(self, value_fn, expected_structure, expected_types,
                        expected_shapes):
    value = value_fn()
    s = structure.Structure.from_value(value)
    self.assertIsInstance(s, expected_structure)
    self.assertEqual(expected_types, s._flat_types)
    self.assertEqual(expected_shapes, s._flat_shapes)

  @parameterized.parameters(
      (lambda: constant_op.constant(37.0), lambda: [
          constant_op.constant(38.0),
          array_ops.placeholder(dtypes.float32),
          variables.Variable(100.0), 42.0,
          np.array(42.0, dtype=np.float32)
      ], lambda: [constant_op.constant([1.0, 2.0]), constant_op.constant(37)]),
      (lambda: sparse_tensor.SparseTensor(
          indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),
       lambda: [
           sparse_tensor.SparseTensor(
               indices=[[1, 1], [3, 4]], values=[10, -1], dense_shape=[4, 5]),
           sparse_tensor.SparseTensorValue(
               indices=[[1, 1], [3, 4]], values=[10, -1], dense_shape=[4, 5]),
           array_ops.sparse_placeholder(dtype=dtypes.int32),
           array_ops.sparse_placeholder(dtype=dtypes.int32, shape=[None, None])
       ], lambda: [
           constant_op.constant(37, shape=[4, 5]),
           sparse_tensor.SparseTensor(
               indices=[[3, 4]], values=[-1], dense_shape=[5, 6]),
           array_ops.sparse_placeholder(
               dtype=dtypes.int32, shape=[None, None, None]),
           sparse_tensor.SparseTensor(
               indices=[[3, 4]], values=[-1.0], dense_shape=[4, 5])
       ]),
      (lambda: {
          "a": constant_op.constant(37.0),
          "b": constant_op.constant([1, 2, 3])
      }, lambda: [{
          "a": constant_op.constant(15.0),
          "b": constant_op.constant([4, 5, 6])
      }], lambda: [{
          "a": constant_op.constant(15.0),
          "b": constant_op.constant([4, 5, 6, 7])
      }, {
          "a": constant_op.constant(15),
          "b": constant_op.constant([4, 5, 6])
      }, {
          "a":
              constant_op.constant(15),
          "b":
              sparse_tensor.SparseTensor(
                  indices=[[0], [1], [2]], values=[4, 5, 6], dense_shape=[3])
      }, (constant_op.constant(15.0), constant_op.constant([4, 5, 6]))]),
  )
  def testIsCompatibleWithStructure(
      self, original_value_fn, compatible_values_fn, incompatible_values_fn):
    original_value = original_value_fn()
    compatible_values = compatible_values_fn()
    incompatible_values = incompatible_values_fn()
    s = structure.Structure.from_value(original_value)
    for compatible_value in compatible_values:
      self.assertTrue(
          s.is_compatible_with(
              structure.Structure.from_value(compatible_value)))
    for incompatible_value in incompatible_values:
      self.assertFalse(
          s.is_compatible_with(
              structure.Structure.from_value(incompatible_value)))

  @parameterized.parameters(
      (lambda: constant_op.constant(37.0),),
      (lambda: sparse_tensor.SparseTensor(
          indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),),
      (lambda: {"a": constant_op.constant(37.0),
                "b": constant_op.constant([1, 2, 3])},),
      (lambda: {"a": constant_op.constant(37.0),
                "b": (sparse_tensor.SparseTensor(
                    indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
                      sparse_tensor.SparseTensor(
                          indices=[[3, 4]], values=[-1], dense_shape=[4, 5]))
               },),
      )
  def testRoundTripConversion(self, value_fn):
    value = value_fn()
    s = structure.Structure.from_value(value)
    before = self.evaluate(value)
    after = self.evaluate(s._from_tensor_list(s._to_tensor_list(value)))

    flat_before = nest.flatten(before)
    flat_after = nest.flatten(after)
    for b, a in zip(flat_before, flat_after):
      if isinstance(b, sparse_tensor.SparseTensorValue):
        self.assertAllEqual(b.indices, a.indices)
        self.assertAllEqual(b.values, a.values)
        self.assertAllEqual(b.dense_shape, a.dense_shape)
      else:
        self.assertAllEqual(b, a)
  # pylint: enable=g-long-lambda

  def testIncompatibleStructure(self):
    # Define three mutually incompatible values/structures, and assert that:
    # 1. Using one structure to flatten a value with an incompatible structure
    #    fails.
    # 2. Using one structure to restructre a flattened value with an
    #    incompatible structure fails.
    value_tensor = constant_op.constant(42.0)
    s_tensor = structure.Structure.from_value(value_tensor)
    flat_tensor = s_tensor._to_tensor_list(value_tensor)

    value_sparse_tensor = sparse_tensor.SparseTensor(
        indices=[[0, 0]], values=[1], dense_shape=[1, 1])
    s_sparse_tensor = structure.Structure.from_value(value_sparse_tensor)
    flat_sparse_tensor = s_sparse_tensor._to_tensor_list(value_sparse_tensor)

    value_nest = {
        "a": constant_op.constant(37.0),
        "b": constant_op.constant([1, 2, 3])
    }
    s_nest = structure.Structure.from_value(value_nest)
    flat_nest = s_nest._to_tensor_list(value_nest)

    with self.assertRaisesRegexp(
        ValueError, r"SparseTensor.* is not convertible to a tensor with "
        r"dtype.*float32.* and shape \(\)"):
      s_tensor._to_tensor_list(value_sparse_tensor)
    with self.assertRaisesRegexp(
        ValueError, r"Value \{.*\} is not convertible to a tensor with "
        r"dtype.*float32.* and shape \(\)"):
      s_tensor._to_tensor_list(value_nest)

    with self.assertRaisesRegexp(TypeError, "Input must be a SparseTensor"):
      s_sparse_tensor._to_tensor_list(value_tensor)

    with self.assertRaisesRegexp(TypeError, "Input must be a SparseTensor"):
      s_sparse_tensor._to_tensor_list(value_nest)

    with self.assertRaisesRegexp(
        ValueError, "Tensor.* not compatible with the nested structure "
        ".*TensorStructure.*TensorStructure"):
      s_nest._to_tensor_list(value_tensor)

    with self.assertRaisesRegexp(
        ValueError, "SparseTensor.* not compatible with the nested structure "
        ".*TensorStructure.*TensorStructure"):
      s_nest._to_tensor_list(value_sparse_tensor)

    with self.assertRaisesRegexp(
        ValueError, r"Cannot convert.*with dtype.*float32.* and shape \(\)"):
      s_tensor._from_tensor_list(flat_sparse_tensor)

    with self.assertRaisesRegexp(
        ValueError, "TensorStructure corresponds to a single tf.Tensor."):
      s_tensor._from_tensor_list(flat_nest)

    with self.assertRaisesRegexp(
        ValueError, "SparseTensorStructure corresponds to a single tf.variant "
        "vector of length 3."):
      s_sparse_tensor._from_tensor_list(flat_tensor)

    with self.assertRaisesRegexp(
        ValueError, "SparseTensorStructure corresponds to a single tf.variant "
        "vector of length 3."):
      s_sparse_tensor._from_tensor_list(flat_nest)

    with self.assertRaisesRegexp(
        ValueError, "Expected 2 flat values in NestedStructure but got 1."):
      s_nest._from_tensor_list(flat_tensor)

    with self.assertRaisesRegexp(
        ValueError, "Expected 2 flat values in NestedStructure but got 1."):
      s_nest._from_tensor_list(flat_sparse_tensor)

  def testIncompatibleNestedStructure(self):
    # Define three mutually incompatible nested values/structures, and assert
    # that:
    # 1. Using one structure to flatten a value with an incompatible structure
    #    fails.
    # 2. Using one structure to restructre a flattened value with an
    #    incompatible structure fails.

    value_0 = {
        "a": constant_op.constant(37.0),
        "b": constant_op.constant([1, 2, 3])
    }
    s_0 = structure.Structure.from_value(value_0)
    flat_s_0 = s_0._to_tensor_list(value_0)

    # `value_1` has compatible nested structure with `value_0`, but different
    # classes.
    value_1 = {
        "a":
            constant_op.constant(37.0),
        "b":
            sparse_tensor.SparseTensor(
                indices=[[0, 0]], values=[1], dense_shape=[1, 1])
    }
    s_1 = structure.Structure.from_value(value_1)
    flat_s_1 = s_1._to_tensor_list(value_1)

    # `value_2` has incompatible nested structure with `value_0` and `value_1`.
    value_2 = {
        "a":
            constant_op.constant(37.0),
        "b": (sparse_tensor.SparseTensor(
            indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
              sparse_tensor.SparseTensor(
                  indices=[[3, 4]], values=[-1], dense_shape=[4, 5]))
    }
    s_2 = structure.Structure.from_value(value_2)
    flat_s_2 = s_2._to_tensor_list(value_2)

    with self.assertRaisesRegexp(
        ValueError, "SparseTensor.* not compatible with the nested structure "
        ".*TensorStructure"):
      s_0._to_tensor_list(value_1)

    with self.assertRaisesRegexp(
        ValueError, "SparseTensor.*SparseTensor.* not compatible with the "
        "nested structure .*TensorStructure"):
      s_0._to_tensor_list(value_2)

    with self.assertRaisesRegexp(
        ValueError, "Tensor.* not compatible with the nested structure "
        ".*SparseTensorStructure"):
      s_1._to_tensor_list(value_0)

    with self.assertRaisesRegexp(
        ValueError, "SparseTensor.*SparseTensor.* not compatible with the "
        "nested structure .*TensorStructure"):
      s_0._to_tensor_list(value_2)

    # NOTE(mrry): The repr of the dictionaries is not sorted, so the regexp
    # needs to account for "a" coming before or after "b". It might be worth
    # adding a deterministic repr for these error messages (among other
    # improvements).
    with self.assertRaisesRegexp(
        ValueError, "Tensor.*Tensor.* not compatible with the nested structure "
        ".*(TensorStructure.*SparseTensorStructure.*SparseTensorStructure|"
        "SparseTensorStructure.*SparseTensorStructure.*TensorStructure)"):
      s_2._to_tensor_list(value_0)

    with self.assertRaisesRegexp(
        ValueError, "(Tensor.*SparseTensor|SparseTensor.*Tensor).* "
        "not compatible with the nested structure .*"
        "(TensorStructure.*SparseTensorStructure.*SparseTensorStructure|"
        "SparseTensorStructure.*SparseTensorStructure.*TensorStructure)"):
      s_2._to_tensor_list(value_1)

    with self.assertRaisesRegexp(
        ValueError, r"Cannot convert.*with dtype.*int32.* and shape \(3,\)"):
      s_0._from_tensor_list(flat_s_1)

    with self.assertRaisesRegexp(
        ValueError, "Expected 2 flat values in NestedStructure but got 3."):
      s_0._from_tensor_list(flat_s_2)

    with self.assertRaisesRegexp(
        ValueError, "SparseTensorStructure corresponds to a single tf.variant "
        "vector of length 3."):
      s_1._from_tensor_list(flat_s_0)

    with self.assertRaisesRegexp(
        ValueError, "Expected 2 flat values in NestedStructure but got 3."):
      s_1._from_tensor_list(flat_s_2)

    with self.assertRaisesRegexp(
        ValueError, "Expected 3 flat values in NestedStructure but got 2."):
      s_2._from_tensor_list(flat_s_0)

    with self.assertRaisesRegexp(
        ValueError, "Expected 3 flat values in NestedStructure but got 2."):
      s_2._from_tensor_list(flat_s_1)

  @parameterized.named_parameters(
      ("Tensor", dtypes.float32, tensor_shape.scalar(), ops.Tensor,
       structure.TensorStructure(dtypes.float32, [])),
      ("SparseTensor", dtypes.int32, tensor_shape.matrix(2, 2),
       sparse_tensor.SparseTensor,
       structure.SparseTensorStructure(dtypes.int32, [2, 2])),
      ("Nest",
       {"a": dtypes.float32, "b": (dtypes.int32, dtypes.string)},
       {"a": tensor_shape.scalar(),
        "b": (tensor_shape.matrix(2, 2), tensor_shape.scalar())},
       {"a": ops.Tensor, "b": (sparse_tensor.SparseTensor, ops.Tensor)},
       structure.NestedStructure({
           "a": structure.TensorStructure(dtypes.float32, []),
           "b": (structure.SparseTensorStructure(dtypes.int32, [2, 2]),
                 structure.TensorStructure(dtypes.string, []))})),
  )
  def testFromLegacyStructure(self, output_types, output_shapes, output_classes,
                              expected_structure):
    actual_structure = structure.Structure._from_legacy_structure(
        output_types, output_shapes, output_classes)
    self.assertTrue(expected_structure.is_compatible_with(actual_structure))
    self.assertTrue(actual_structure.is_compatible_with(expected_structure))
Пример #8
0
def safe_embedding_lookup_sparse(embedding_weights,
                                 sparse_ids,
                                 sparse_weights=None,
                                 combiner='mean',
                                 default_id=None,
                                 name=None,
                                 partition_strategy='div',
                                 max_norm=None):
    """Lookup embedding results, accounting for invalid IDs and empty features.

  The partitioned embedding in `embedding_weights` must all be the same shape
  except for the first dimension. The first dimension is allowed to vary as the
  vocabulary size is not necessarily a multiple of `P`.  `embedding_weights`
  may be a `PartitionedVariable` as returned by using `tf.get_variable()` with a
  partitioner.

  Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs
  with non-positive weight. For an entry with no features, the embedding vector
  for `default_id` is returned, or the 0-vector if `default_id` is not supplied.

  The ids and weights may be multi-dimensional. Embeddings are always aggregated
  along the last dimension.

  Args:
    embedding_weights:  A list of `P` float `Tensor`s or values representing
        partitioned embedding `Tensor`s.  Alternatively, a `PartitionedVariable`
        created by partitioning along dimension 0.  The total unpartitioned
        shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the
        vocab size and `e_1, ..., e_m` are the embedding dimensions.
    sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the
        ids. `d_0` is typically batch size.
    sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing
        float weights corresponding to `sparse_ids`, or `None` if all weights
        are be assumed to be 1.0.
    combiner: A string specifying how to combine embedding results for each
        entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean"
        the default.
    default_id: The id to use for an entry with no features.
    name: A name for this operation (optional).
    partition_strategy: A string specifying the partitioning strategy.
        Currently `"div"` and `"mod"` are supported. Default is `"div"`.
    max_norm: If not `None`, all embeddings are l2-normalized to max_norm before
        combining.


  Returns:
    Dense `Tensor` of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`.

  Raises:
    ValueError: if `embedding_weights` is empty.
  """
    if embedding_weights is None:
        raise ValueError('Missing embedding_weights %s.' % embedding_weights)
    if isinstance(embedding_weights, variables.PartitionedVariable):
        embedding_weights = list(
            embedding_weights)  # get underlying Variables.
    if not isinstance(embedding_weights, list):
        embedding_weights = [embedding_weights]
    if len(embedding_weights) < 1:
        raise ValueError('Missing embedding_weights %s.' % embedding_weights)

    dtype = sparse_weights.dtype if sparse_weights is not None else None
    embedding_weights = [
        ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
    ]

    with ops.name_scope(name, 'embedding_lookup', embedding_weights +
                        [sparse_ids, sparse_weights]) as scope:
        # Reshape higher-rank sparse ids and weights to linear segment ids.
        original_shape = sparse_ids.dense_shape
        original_rank_dim = sparse_ids.dense_shape.get_shape()[0]
        original_rank = (array_ops.size(original_shape)
                         if original_rank_dim.value is None else
                         original_rank_dim.value)
        sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [
            math_ops.reduce_prod(
                array_ops.slice(original_shape, [0], [original_rank - 1])),
            array_ops.gather(original_shape, original_rank - 1)
        ])
        if sparse_weights is not None:
            sparse_weights = sparse_tensor.SparseTensor(
                sparse_ids.indices, sparse_weights.values,
                sparse_ids.dense_shape)

        # Prune invalid ids and weights.
        sparse_ids, sparse_weights = _prune_invalid_ids(
            sparse_ids, sparse_weights)
        if combiner != 'sum':
            sparse_ids, sparse_weights = _prune_invalid_weights(
                sparse_ids, sparse_weights)

        # Fill in dummy values for empty features, if necessary.
        sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(
            sparse_ids, default_id or 0)
        if sparse_weights is not None:
            sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(
                sparse_weights, 1.0)

        result = embedding_lookup_sparse(
            embedding_weights,
            sparse_ids,
            sparse_weights,
            combiner=combiner,
            partition_strategy=partition_strategy,
            name=None if default_id is None else scope,
            max_norm=max_norm)

        if default_id is None:
            # Broadcast is_row_empty to the same shape as embedding_lookup_result,
            # for use in Select.
            is_row_empty = array_ops.tile(
                array_ops.reshape(is_row_empty, [-1, 1]),
                array_ops.stack([1, array_ops.shape(result)[1]]))

            result = array_ops.where(is_row_empty,
                                     array_ops.zeros_like(result),
                                     result,
                                     name=scope)

        # Reshape back from linear ids back into higher-dimensional dense result.
        final_result = array_ops.reshape(
            result,
            array_ops.concat([
                array_ops.slice(math_ops.cast(original_shape, dtypes.int32),
                                [0], [original_rank - 1]),
                array_ops.slice(array_ops.shape(result), [1], [-1])
            ], 0))
        final_result.set_shape(
            tensor_shape.unknown_shape(
                (original_rank_dim - 1).value).concatenate(
                    result.get_shape()[1:]))
        return final_result
Пример #9
0
class InputsTest(test.TestCase, parameterized.TestCase):
    @staticmethod
    def make_apply_fn(dataset):
        def apply_fn(dataset):
            def _apply_fn(dataset):
                return dataset.cache()

            return dataset.apply(_apply_fn)

        return apply_fn

    @staticmethod
    def make_gen():
        def gen():
            yield 42

        return gen

    @staticmethod
    def make_interleave_fn(dataset, num_parallel_calls=None):
        def interleave_fn(dataset):
            return dataset.interleave(lambda x: dataset_ops.Dataset.range(0),
                                      cycle_length=2,
                                      num_parallel_calls=num_parallel_calls)

        return interleave_fn

    @parameterized.named_parameters(
        ("FixedLengthRecord", readers.FixedLengthRecordDataset("", 42)),
        ("FromGenerator",
         dataset_ops.Dataset.from_generator(make_gen.__func__(),
                                            dtypes.int32), 1),
        ("FromSparseTensorSlices",
         dataset_ops.Dataset.from_sparse_tensor_slices(
             sparse_tensor.SparseTensor(indices=np.array([[0, 0], [1, 0],
                                                          [2, 0]]),
                                        values=np.array([0, 0, 0]),
                                        dense_shape=np.array([3, 1])))),
        ("FromTensors", dataset_ops.Dataset.from_tensors([42])),
        ("FromTensorSlices", dataset_ops.Dataset.from_tensors([42])),
        ("Range", dataset_ops.Dataset.range(10)),
        ("TextLine", readers.TextLineDataset("")),
        ("TFRecord", readers.TFRecordDataset(""), 1),
    )
    def testDatasetSourceInputs(self, dataset, num_inputs=0):
        self.assertEqual(num_inputs, len(dataset._inputs()))

    @parameterized.named_parameters(
        ("Apply", make_apply_fn.__func__(
            dataset_ops.Dataset.range(0)), dataset_ops.Dataset.range(0)),
        ("Batch", lambda x: x.batch(10), dataset_ops.Dataset.range(0)),
        ("Cache", lambda x: x.cache(), dataset_ops.Dataset.range(0)),
        ("Filter", lambda x: x.filter(lambda x: True),
         dataset_ops.Dataset.range(0)),
        ("FlatMap",
         lambda x: x.flat_map(lambda x: dataset_ops.Dataset.range(0)),
         dataset_ops.Dataset.range(0)),
        ("Interleave", make_interleave_fn.__func__(
            dataset_ops.Dataset.range(0)), dataset_ops.Dataset.range(0)),
        ("Map", lambda x: x.map(lambda x: x), dataset_ops.Dataset.range(0)),
        ("PaddedBatch", lambda x: x.padded_batch(10, []),
         dataset_ops.Dataset.range(0)),
        ("ParallelInterleave",
         make_interleave_fn.__func__(dataset_ops.Dataset.range(0),
                                     2), dataset_ops.Dataset.range(0)),
        ("ParallelMap", lambda x: x.map(lambda x: x, num_parallel_calls=2),
         dataset_ops.Dataset.range(0)),
        ("Repeat", lambda x: x.repeat(), dataset_ops.Dataset.range(0)),
        ("Shuffle", lambda x: x.shuffle(10), dataset_ops.Dataset.range(0)),
        ("Skip", lambda x: x.skip(1), dataset_ops.Dataset.range(0)),
        ("Take", lambda x: x.take(1), dataset_ops.Dataset.range(0)),
        ("Window", lambda x: x.window(10), dataset_ops.Dataset.range(0)),
    )
    def testUnaryTransformationInputs(self, dataset_fn, input_dataset):
        self.assertEqual([input_dataset], dataset_fn(input_dataset)._inputs())

    @parameterized.named_parameters(
        ("Concatenate", lambda x, y: x.concatenate(y),
         dataset_ops.Dataset.range(0), dataset_ops.Dataset.range(1)))
    def testBinaryTransformationInputs(self, dataset_fn, input1, input2):
        self.assertEqual([input1, input2],
                         dataset_fn(input1, input2)._inputs())

    @parameterized.named_parameters(
        ("ZipOne", dataset_ops.Dataset.zip, (dataset_ops.Dataset.range(0))),
        ("ZipNest", dataset_ops.Dataset.zip,
         (dataset_ops.Dataset.range(0),
          (dataset_ops.Dataset.range(1), dataset_ops.Dataset.range(2)))),
        ("ZipTuple", dataset_ops.Dataset.zip,
         (dataset_ops.Dataset.range(0), dataset_ops.Dataset.range(1))))
    def testVariadicTransformationInputs(self, dataset_fn, input_datasets):
        self.assertEqual(nest.flatten(input_datasets),
                         dataset_fn(input_datasets)._inputs())

    def testCollectInputs(self):
        ds1 = dataset_ops.Dataset.range(0)
        ds2 = ds1.concatenate(ds1)
        ds3 = dataset_ops.Dataset.zip((ds2, ds1, ds2))

        inputs = []
        queue = [ds3]
        while queue:
            ds = queue[0]
            queue = queue[1:]
            queue.extend(ds._inputs())
            inputs.append(ds)

        self.assertEqual(5, inputs.count(ds1))
        self.assertEqual(2, inputs.count(ds2))
        self.assertEqual(1, inputs.count(ds3))
  def testGenerateFeatureSplitCandidatesInactive(self):
    with self.test_session() as sess:
      # The data looks like the following:
      # Example |  Gradients    | Partition | Sparse Quantile |
      # i0      |  (0.2, 0.12)  | 0         | 1               |
      # i1      |  (-0.5, 0.07) | 0         | N/A             |
      # i2      |  (1.2, 0.2)   | 0         | 0               |
      # i3      |  (4.0, 0.13)  | 1         | 1               |
      gradients = array_ops.constant([0.2, -0.5, 1.2, 4.0])
      hessians = array_ops.constant([0.12, 0.07, 0.2, 0.13])
      example_partitions = array_ops.constant([0, 0, 0, 1], dtype=dtypes.int32)
      indices = array_ops.constant([[0, 0], [2, 0], [3, 0]], dtype=dtypes.int64)
      values = array_ops.constant([0.52, 0.3, 0.52])
      sparse_column = sparse_tensor.SparseTensor(indices, values, [4, 1])

      gradient_shape = tensor_shape.scalar()
      hessian_shape = tensor_shape.scalar()
      class_id = -1

      split_handler = ordinal_split_handler.SparseSplitHandler(
          l1_regularization=0,
          l2_regularization=2,
          tree_complexity_regularization=0,
          min_node_weight=0,
          epsilon=0.01,
          num_quantiles=2,
          feature_column_group_id=0,
          gradient_shape=gradient_shape,
          hessian_shape=hessian_shape,
          sparse_float_column=sparse_column,
          init_stamp_token=0,
          multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS)
      resources.initialize_resources(resources.shared_resources()).run()

      empty_gradients, empty_hessians = get_empty_tensors(
          gradient_shape, hessian_shape)
      example_weights = array_ops.ones([4, 1], dtypes.float32)

      update_1 = split_handler.update_stats_sync(
          0,
          example_partitions,
          gradients,
          hessians,
          empty_gradients,
          empty_hessians,
          example_weights,
          is_active=array_ops.constant([True, False]))
      with ops.control_dependencies([update_1]):
        are_splits_ready = split_handler.make_splits(0, 1, class_id)[0]

      with ops.control_dependencies([are_splits_ready]):
        update_2 = split_handler.update_stats_sync(
            1,
            example_partitions,
            gradients,
            hessians,
            empty_gradients,
            empty_hessians,
            example_weights,
            is_active=array_ops.constant([False, True]))
      with ops.control_dependencies([update_2]):
        are_splits_ready2, partitions, gains, splits = (
            split_handler.make_splits(1, 2, class_id))
        are_splits_ready, are_splits_ready2, partitions, gains, splits = (
            sess.run([
                are_splits_ready, are_splits_ready2, partitions, gains, splits
            ]))

    # During the first iteration, inequality split handlers are not going to
    # have any splits. Make sure that we return not_ready in that case.
    self.assertFalse(are_splits_ready)
    self.assertTrue(are_splits_ready2)
    # The handler was inactive so it shouldn't any splits.
    self.assertEqual(len(partitions), 0)
    self.assertEqual(len(gains), 0)
    self.assertEqual(len(splits), 0)
def confusion_matrix(labels,
                     predictions,
                     num_classes=None,
                     weights=None,
                     dtype=dtypes.int32,
                     name=None):
  """Computes the confusion matrix from predictions and labels.

  The matrix columns represent the prediction labels and the rows represent the
  real labels. The confusion matrix is always a 2-D array of shape `[n, n]`,
  where `n` is the number of valid labels for a given classification task. Both
  prediction and labels must be 1-D arrays of the same shape in order for this
  function to work.

  If `num_classes` is `None`, then `num_classes` will be set to one plus the
  maximum value in either predictions or labels. Class labels are expected to
  start at 0. For example, if `num_classes` is 3, then the possible labels
  would be `[0, 1, 2]`.

  If `weights` is not `None`, then each prediction contributes its
  corresponding weight to the total value of the confusion matrix cell.

  For example:

  ```python
    tf.confusion_matrix([1, 2, 4], [2, 2, 4]) ==>
        [[0 0 0 0 0]
         [0 0 1 0 0]
         [0 0 1 0 0]
         [0 0 0 0 0]
         [0 0 0 0 1]]
  ```

  Note that the possible labels are assumed to be `[0, 1, 2, 3, 4]`,
  resulting in a 5x5 confusion matrix.

  Args:
    labels: 1-D `Tensor` of real labels for the classification task.
    predictions: 1-D `Tensor` of predictions for a given classification.
    num_classes: The possible number of labels the classification task can
                 have. If this value is not provided, it will be calculated
                 using both predictions and labels array.
    weights: An optional `Tensor` whose shape matches `predictions`.
    dtype: Data type of the confusion matrix.
    name: Scope name.

  Returns:
    A `Tensor` of type `dtype` with shape `[n, n]` representing the confusion
    matrix, where `n` is the number of possible labels in the classification
    task.

  Raises:
    ValueError: If both predictions and labels are not 1-D vectors and have
      mismatched shapes, or if `weights` is not `None` and its shape doesn't
      match `predictions`.
  """
  with ops.name_scope(name, 'confusion_matrix',
                      (predictions, labels, num_classes, weights)) as name:
    labels, predictions = remove_squeezable_dimensions(
        ops.convert_to_tensor(labels, name='labels'),
        ops.convert_to_tensor(
            predictions, name='predictions'))
    predictions = math_ops.cast(predictions, dtypes.int64)
    labels = math_ops.cast(labels, dtypes.int64)

    # Sanity checks - underflow or overflow can cause memory corruption.
    labels = control_flow_ops.with_dependencies(
        [check_ops.assert_non_negative(
            labels, message='`labels` contains negative values')],
        labels)
    predictions = control_flow_ops.with_dependencies(
        [check_ops.assert_non_negative(
            predictions, message='`predictions` contains negative values')],
        predictions)

    if num_classes is None:
      num_classes = math_ops.maximum(math_ops.reduce_max(predictions),
                                     math_ops.reduce_max(labels)) + 1
    else:
      num_classes_int64 = math_ops.cast(num_classes, dtypes.int64)
      labels = control_flow_ops.with_dependencies(
          [check_ops.assert_less(
              labels, num_classes_int64, message='`labels` out of bound')],
          labels)
      predictions = control_flow_ops.with_dependencies(
          [check_ops.assert_less(
              predictions, num_classes_int64,
              message='`predictions` out of bound')],
          predictions)

    if weights is not None:
      weights = ops.convert_to_tensor(weights, name='weights')
      predictions.get_shape().assert_is_compatible_with(weights.get_shape())
      weights = math_ops.cast(weights, dtype)

    shape = array_ops.stack([num_classes, num_classes])
    indices = array_ops.stack([labels, predictions], axis=1)
    values = (array_ops.ones_like(predictions, dtype)
              if weights is None else weights)
    cm_sparse = sparse_tensor.SparseTensor(
        indices=indices,
        values=values,
        dense_shape=math_ops.cast(shape, dtypes.int64))
    zero_matrix = array_ops.zeros(math_ops.cast(shape, dtypes.int32), dtype)

    return sparse_ops.sparse_add(zero_matrix, cm_sparse)
  def testGenerateFeatureSplitCandidatesMulticlassDiagonalHessian(self):
    with self.test_session() as sess:
      # Batch is 4, 2 classes
      gradients = array_ops.constant(
          [[0.2, 1.4], [-0.5, 0.1], [1.2, 3], [4.0, -3]])
      # Each hessian is a diagonal from a full hessian matrix.
      hessian_0 = [0.12, 0.11]
      hessian_1 = [0.07, 0.2]
      hessian_2 = [0.2, 0.9]
      hessian_3 = [0.13, 2.2]
      hessians = array_ops.constant(
          [hessian_0, hessian_1, hessian_2, hessian_3])

      example_partitions = array_ops.constant([0, 0, 0, 1], dtype=dtypes.int32)
      indices = array_ops.constant([[0, 0], [2, 0], [3, 0]], dtype=dtypes.int64)
      values = array_ops.constant([0.52, 0.3, 0.52])
      sparse_column = sparse_tensor.SparseTensor(indices, values, [4, 1])

      gradient_shape = tensor_shape.TensorShape([2])
      hessian_shape = tensor_shape.TensorShape([2])
      class_id = -1

      split_handler = ordinal_split_handler.SparseSplitHandler(
          l1_regularization=0,
          l2_regularization=2,
          tree_complexity_regularization=0,
          min_node_weight=0,
          epsilon=0.01,
          num_quantiles=2,
          feature_column_group_id=0,
          sparse_float_column=sparse_column,
          init_stamp_token=0,
          gradient_shape=gradient_shape,
          hessian_shape=hessian_shape,
          multiclass_strategy=learner_pb2.LearnerConfig.DIAGONAL_HESSIAN)
      resources.initialize_resources(resources.shared_resources()).run()

      empty_gradients, empty_hessians = get_empty_tensors(
          gradient_shape, hessian_shape)
      example_weights = array_ops.ones([4, 1], dtypes.float32)

      update_1 = split_handler.update_stats_sync(
          0,
          example_partitions,
          gradients,
          hessians,
          empty_gradients,
          empty_hessians,
          example_weights,
          is_active=array_ops.constant([True, True]))
      with ops.control_dependencies([update_1]):
        are_splits_ready = split_handler.make_splits(0, 1, class_id)[0]

      with ops.control_dependencies([are_splits_ready]):
        update_2 = split_handler.update_stats_sync(
            1,
            example_partitions,
            gradients,
            hessians,
            empty_gradients,
            empty_hessians,
            example_weights,
            is_active=array_ops.constant([True, True]))
      with ops.control_dependencies([update_2]):
        are_splits_ready2, partitions, gains, splits = (
            split_handler.make_splits(1, 2, class_id))
        are_splits_ready, are_splits_ready2, partitions, gains, splits = (
            sess.run([
                are_splits_ready, are_splits_ready2, partitions, gains, splits
            ]))

    self.assertFalse(are_splits_ready)
    self.assertTrue(are_splits_ready2)

    split_info = split_info_pb2.SplitInfo()
    split_info.ParseFromString(splits[0])

    left_child = split_info.left_child.vector
    right_child = split_info.right_child.vector
    split_node = split_info.split_node.sparse_float_binary_split_default_right
    # Each leaf has 2 element vector.
    self.assertEqual(2, len(left_child.value))
    self.assertEqual(2, len(right_child.value))
    self.assertEqual(0, split_node.split.feature_column)
    self.assertAllClose(0.52, split_node.split.threshold)

    split_info.ParseFromString(splits[1])
    left_child = split_info.left_child.vector
    right_child = split_info.right_child.vector
    split_node = split_info.split_node.sparse_float_binary_split_default_left
    self.assertEqual(2, len(left_child.value))
    self.assertEqual(0, split_node.split.feature_column)
    self.assertAllClose(0.52, split_node.split.threshold)
  def testGenerateFeatureSplitCandidates(self):
    with self.test_session() as sess:
      # The data looks like the following:
      # Example |  Gradients    | Partition | Sparse Quantile |
      # i0      |  (0.2, 0.12)  | 0         | 1               |
      # i1      |  (-0.5, 0.07) | 0         | N/A             |
      # i2      |  (1.2, 0.2)   | 0         | 0               |
      # i3      |  (4.0, 0.13)  | 1         | 1               |
      gradients = array_ops.constant([0.2, -0.5, 1.2, 4.0])
      hessians = array_ops.constant([0.12, 0.07, 0.2, 0.13])
      example_partitions = array_ops.constant([0, 0, 0, 1], dtype=dtypes.int32)
      indices = array_ops.constant([[0, 0], [2, 0], [3, 0]], dtype=dtypes.int64)
      values = array_ops.constant([0.52, 0.3, 0.52])
      sparse_column = sparse_tensor.SparseTensor(indices, values, [4, 1])

      gradient_shape = tensor_shape.scalar()
      hessian_shape = tensor_shape.scalar()
      class_id = -1

      split_handler = ordinal_split_handler.SparseSplitHandler(
          l1_regularization=0,
          l2_regularization=2,
          tree_complexity_regularization=0,
          min_node_weight=0,
          epsilon=0.01,
          num_quantiles=2,
          feature_column_group_id=0,
          sparse_float_column=sparse_column,
          init_stamp_token=0,
          gradient_shape=gradient_shape,
          hessian_shape=hessian_shape,
          multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS)
      resources.initialize_resources(resources.shared_resources()).run()

      empty_gradients, empty_hessians = get_empty_tensors(
          gradient_shape, hessian_shape)
      example_weights = array_ops.ones([4, 1], dtypes.float32)

      update_1 = split_handler.update_stats_sync(
          0,
          example_partitions,
          gradients,
          hessians,
          empty_gradients,
          empty_hessians,
          example_weights,
          is_active=array_ops.constant([True, True]))
      with ops.control_dependencies([update_1]):
        are_splits_ready = split_handler.make_splits(0, 1, class_id)[0]

      with ops.control_dependencies([are_splits_ready]):
        update_2 = split_handler.update_stats_sync(
            1,
            example_partitions,
            gradients,
            hessians,
            empty_gradients,
            empty_hessians,
            example_weights,
            is_active=array_ops.constant([True, True]))
      with ops.control_dependencies([update_2]):
        are_splits_ready2, partitions, gains, splits = (
            split_handler.make_splits(1, 2, class_id))
        are_splits_ready, are_splits_ready2, partitions, gains, splits = (
            sess.run([
                are_splits_ready, are_splits_ready2, partitions, gains, splits
            ]))

    # During the first iteration, inequality split handlers are not going to
    # have any splits. Make sure that we return not_ready in that case.
    self.assertFalse(are_splits_ready)
    self.assertTrue(are_splits_ready2)

    self.assertAllEqual([0, 1], partitions)
    # Check the split on partition 0.
    # -(0.2 + 1.2) / (0.12 + 0.2 + 2)
    expected_left_weight = -0.603448275862069
    # (0.2 + 1.2) ** 2 / (0.12 + 0.2 + 2)
    expected_left_gain = 0.8448275862068965
    # 0.5 / (0.07 + 2)
    expected_right_weight = 0.24154589371980678
    # 0.5 ** 2 / (0.07 + 2)
    expected_right_gain = 0.12077294685990339
    # (0.2 + 1.2 - 0.5) ** 2 /  (0.12 + 0.2 + 0.07 + 2)
    expected_bias_gain = 0.3389121338912133

    split_info = split_info_pb2.SplitInfo()
    split_info.ParseFromString(splits[0])
    left_child = split_info.left_child.vector
    right_child = split_info.right_child.vector
    split_node = split_info.split_node.sparse_float_binary_split_default_right
    self.assertAllClose(
        expected_left_gain + expected_right_gain - expected_bias_gain, gains[0])

    self.assertAllClose([expected_left_weight], left_child.value)

    self.assertAllClose([expected_right_weight], right_child.value)

    self.assertEqual(0, split_node.split.feature_column)

    self.assertAllClose(0.52, split_node.split.threshold)

    # Check the split on partition 1.
    expected_left_weight = -1.8779342723004695
    expected_right_weight = 0

    # Verify candidate for partition 1, there's only one active bucket here
    # so zero gain is expected.
    split_info.ParseFromString(splits[1])
    left_child = split_info.left_child.vector
    right_child = split_info.right_child.vector
    split_node = split_info.split_node.sparse_float_binary_split_default_left

    self.assertAllClose(0.0, gains[1])

    self.assertAllClose([expected_left_weight], left_child.value)

    self.assertAllClose([expected_right_weight], right_child.value)

    self.assertEqual(0, split_node.split.feature_column)

    self.assertAllClose(0.52, split_node.split.threshold)
  def testEmpty(self):
    with self.test_session() as sess:
      indices = array_ops.constant([], dtype=dtypes.int64, shape=[0, 2])
      # No values in this feature column in this mini-batch.
      values = array_ops.constant([], dtype=dtypes.float32)
      sparse_column = sparse_tensor.SparseTensor(indices, values, [4, 1])

      gradient_shape = tensor_shape.scalar()
      hessian_shape = tensor_shape.scalar()
      class_id = -1

      split_handler = ordinal_split_handler.SparseSplitHandler(
          l1_regularization=0,
          l2_regularization=2,
          tree_complexity_regularization=0,
          min_node_weight=0,
          epsilon=0.01,
          num_quantiles=2,
          feature_column_group_id=0,
          sparse_float_column=sparse_column,
          init_stamp_token=0,
          gradient_shape=gradient_shape,
          hessian_shape=hessian_shape,
          multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS)
      resources.initialize_resources(resources.shared_resources()).run()
      gradients = array_ops.constant([0.2, -0.5, 1.2, 4.0])
      hessians = array_ops.constant([0.12, 0.07, 0.2, 0.13])
      partition_ids = array_ops.constant([0, 0, 0, 1], dtype=dtypes.int32)

      empty_gradients, empty_hessians = get_empty_tensors(
          gradient_shape, hessian_shape)
      example_weights = array_ops.ones([4, 1], dtypes.float32)

      update_1 = split_handler.update_stats_sync(
          0,
          partition_ids,
          gradients,
          hessians,
          empty_gradients,
          empty_hessians,
          example_weights,
          is_active=array_ops.constant([True, True]))
      with ops.control_dependencies([update_1]):
        are_splits_ready = split_handler.make_splits(0, 1, class_id)[0]

      with ops.control_dependencies([are_splits_ready]):
        update_2 = split_handler.update_stats_sync(
            1,
            partition_ids,
            gradients,
            hessians,
            empty_gradients,
            empty_hessians,
            example_weights,
            is_active=array_ops.constant([True, True]))
      with ops.control_dependencies([update_2]):
        are_splits_ready2, partitions, gains, splits = (
            split_handler.make_splits(1, 2, class_id))
        are_splits_ready, are_splits_ready2, partitions, gains, splits = (
            sess.run([
                are_splits_ready, are_splits_ready2, partitions, gains, splits
            ]))
    self.assertFalse(are_splits_ready)
    self.assertTrue(are_splits_ready2)
    self.assertEqual(len(partitions), 0)
    self.assertEqual(len(gains), 0)
    self.assertEqual(len(splits), 0)
Пример #15
0
class StructureTest(test_base.DatasetTestBase, parameterized.TestCase,
                    ragged_test_util.RaggedTensorTestCase):

    # pylint: disable=g-long-lambda,protected-access
    @parameterized.named_parameters(
        ("Tensor", lambda: constant_op.constant(37.0), tensor_spec.TensorSpec,
         [dtypes.float32], [[]]),
        ("TensorArray", lambda: tensor_array_ops.TensorArray(
            dtype=dtypes.float32, element_shape=(3, ), size=0),
         tensor_array_ops.TensorArraySpec, [dtypes.variant], [[]]),
        ("SparseTensor", lambda: sparse_tensor.SparseTensor(
            indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),
         sparse_tensor.SparseTensorSpec, [dtypes.variant], [None]),
        ("RaggedTensor",
         lambda: ragged_factory_ops.constant([[1, 2], [], [4]]),
         ragged_tensor.RaggedTensorSpec, [dtypes.variant], [None]),
        ("Nested_0", lambda:
         (constant_op.constant(37.0), constant_op.constant([1, 2, 3])), tuple,
         [dtypes.float32, dtypes.int32], [[], [3]]),
        ("Nested_1", lambda: {
            "a": constant_op.constant(37.0),
            "b": constant_op.constant([1, 2, 3])
        }, dict, [dtypes.float32, dtypes.int32], [[], [3]]),
        ("Nested_2", lambda: {
            "a":
            constant_op.constant(37.0),
            "b":
            (sparse_tensor.
             SparseTensor(indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
             sparse_tensor.SparseTensor(
                 indices=[[3, 4]], values=[-1], dense_shape=[4, 5]))
        }, dict, [dtypes.float32, dtypes.variant, dtypes.variant], [[], None,
                                                                    None]),
    )
    def testFlatStructure(self, value_fn, expected_structure, expected_types,
                          expected_shapes):
        value = value_fn()
        s = structure.type_spec_from_value(value)
        self.assertIsInstance(s, expected_structure)
        flat_types = structure.get_flat_tensor_types(s)
        self.assertEqual(expected_types, flat_types)
        flat_shapes = structure.get_flat_tensor_shapes(s)
        self.assertLen(flat_shapes, len(expected_shapes))
        for expected, actual in zip(expected_shapes, flat_shapes):
            if expected is None:
                self.assertEqual(actual.ndims, None)
            else:
                self.assertEqual(actual.as_list(), expected)

    @parameterized.named_parameters(
        ("Tensor", lambda: constant_op.constant(37.0), lambda: [
            constant_op.constant(38.0),
            array_ops.placeholder(dtypes.float32),
            variables.Variable(100.0), 42.0,
            np.array(42.0, dtype=np.float32)
        ],
         lambda: [constant_op.constant([1.0, 2.0]),
                  constant_op.constant(37)]),
        ("TensorArray", lambda: tensor_array_ops.TensorArray(
            dtype=dtypes.float32, element_shape=(3, ), size=0), lambda: [
                tensor_array_ops.TensorArray(
                    dtype=dtypes.float32, element_shape=(3, ), size=0),
                tensor_array_ops.TensorArray(
                    dtype=dtypes.float32, element_shape=(3, ), size=10)
            ], lambda: [
                tensor_array_ops.TensorArray(
                    dtype=dtypes.int32, element_shape=(3, ), size=0),
                tensor_array_ops.TensorArray(
                    dtype=dtypes.float32, element_shape=(), size=0)
            ]),
        ("SparseTensor", lambda: sparse_tensor.SparseTensor(
            indices=[[3, 4]], values=[-1], dense_shape=[4, 5]), lambda: [
                sparse_tensor.SparseTensor(indices=[[1, 1], [3, 4]],
                                           values=[10, -1],
                                           dense_shape=[4, 5]),
                sparse_tensor.SparseTensorValue(indices=[[1, 1], [3, 4]],
                                                values=[10, -1],
                                                dense_shape=[4, 5]),
                array_ops.sparse_placeholder(dtype=dtypes.int32),
                array_ops.sparse_placeholder(dtype=dtypes.int32,
                                             shape=[None, None])
            ], lambda: [
                constant_op.constant(37, shape=[4, 5]),
                sparse_tensor.SparseTensor(
                    indices=[[3, 4]], values=[-1], dense_shape=[5, 6]),
                array_ops.sparse_placeholder(dtype=dtypes.int32,
                                             shape=[None, None, None]),
                sparse_tensor.SparseTensor(
                    indices=[[3, 4]], values=[-1.0], dense_shape=[4, 5])
            ]),
        ("RaggedTensor",
         lambda: ragged_factory_ops.constant([[1, 2], [], [3]]), lambda: [
             ragged_factory_ops.constant([[1, 2], [3, 4], []]),
             ragged_factory_ops.constant([[1], [2, 3, 4], [5]]),
         ], lambda: [
             ragged_factory_ops.constant(1),
             ragged_factory_ops.constant([1, 2]),
             ragged_factory_ops.constant([[1], [2]]),
             ragged_factory_ops.constant([["a", "b"]]),
         ]),
        ("Nested", lambda: {
            "a": constant_op.constant(37.0),
            "b": constant_op.constant([1, 2, 3])
        }, lambda: [{
            "a": constant_op.constant(15.0),
            "b": constant_op.constant([4, 5, 6])
        }], lambda: [{
            "a": constant_op.constant(15.0),
            "b": constant_op.constant([4, 5, 6, 7])
        }, {
            "a": constant_op.constant(15),
            "b": constant_op.constant([4, 5, 6])
        }, {
            "a":
            constant_op.constant(15),
            "b":
            sparse_tensor.SparseTensor(
                indices=[[0], [1], [2]], values=[4, 5, 6], dense_shape=[3])
        }, (constant_op.constant(15.0), constant_op.constant([4, 5, 6]))]),
    )
    @test_util.run_deprecated_v1
    def testIsCompatibleWithStructure(self, original_value_fn,
                                      compatible_values_fn,
                                      incompatible_values_fn):
        original_value = original_value_fn()
        compatible_values = compatible_values_fn()
        incompatible_values = incompatible_values_fn()
        s = structure.type_spec_from_value(original_value)
        for compatible_value in compatible_values:
            self.assertTrue(
                structure.are_compatible(
                    s, structure.type_spec_from_value(compatible_value)))
        for incompatible_value in incompatible_values:
            self.assertFalse(
                structure.are_compatible(
                    s, structure.type_spec_from_value(incompatible_value)))

    @parameterized.named_parameters(
        ("Tensor", lambda: constant_op.constant(37.0),
         lambda: constant_op.constant(42.0),
         lambda: constant_op.constant([5])),
        ("TensorArray", lambda: tensor_array_ops.TensorArray(
            dtype=dtypes.float32, element_shape=(3, ), size=0),
         lambda: tensor_array_ops.TensorArray(
             dtype=dtypes.float32, element_shape=(3, ), size=0),
         lambda: tensor_array_ops.TensorArray(
             dtype=dtypes.int32, element_shape=(), size=0)),
        ("SparseTensor", lambda: sparse_tensor.SparseTensor(
            indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),
         lambda: sparse_tensor.SparseTensor(
             indices=[[1, 2]], values=[42], dense_shape=[4, 5]),
         lambda: sparse_tensor.SparseTensor(
             indices=[[3]], values=[-1], dense_shape=[5]),
         lambda: sparse_tensor.SparseTensor(
             indices=[[3, 4]], values=[1.0], dense_shape=[4, 5])),
        ("RaggedTensor",
         lambda: ragged_factory_ops.constant([[[1, 2]], [[3]]]),
         lambda: ragged_factory_ops.constant([[[5]], [[8], [3, 2]]]), lambda:
         ragged_factory_ops.constant([[[1]], [[2], [3]]], ragged_rank=1),
         lambda: ragged_factory_ops.constant([[[1.0, 2.0]], [[3.0]]]),
         lambda: ragged_factory_ops.constant([[[1]], [[2]], [[3]]])),
        ("Nested", lambda: {
            "a": constant_op.constant(37.0),
            "b": constant_op.constant([1, 2, 3])
        }, lambda: {
            "a": constant_op.constant(42.0),
            "b": constant_op.constant([4, 5, 6])
        }, lambda: {
            "a": constant_op.constant([1, 2, 3]),
            "b": constant_op.constant(37.0)
        }),
    )  # pyformat: disable
    def testStructureFromValueEquality(self, value1_fn, value2_fn,
                                       *not_equal_value_fns):
        # pylint: disable=g-generic-assert
        s1 = structure.type_spec_from_value(value1_fn())
        s2 = structure.type_spec_from_value(value2_fn())
        self.assertEqual(s1, s1)  # check __eq__ operator.
        self.assertEqual(s1, s2)  # check __eq__ operator.
        self.assertFalse(s1 != s1)  # check __ne__ operator.
        self.assertFalse(s1 != s2)  # check __ne__ operator.
        for c1, c2 in zip(nest.flatten(s1), nest.flatten(s2)):
            self.assertEqual(hash(c1), hash(c1))
            self.assertEqual(hash(c1), hash(c2))
        for value_fn in not_equal_value_fns:
            s3 = structure.type_spec_from_value(value_fn())
            self.assertNotEqual(s1, s3)  # check __ne__ operator.
            self.assertNotEqual(s2, s3)  # check __ne__ operator.
            self.assertFalse(s1 == s3)  # check __eq_ operator.
            self.assertFalse(s2 == s3)  # check __eq_ operator.

    @parameterized.named_parameters(
        ("RaggedTensor_RaggedRank",
         structure.RaggedTensorStructure(dtypes.int32, None, 1),
         structure.RaggedTensorStructure(dtypes.int32, None, 2)),
        ("RaggedTensor_Shape",
         structure.RaggedTensorStructure(dtypes.int32, [3, None], 1),
         structure.RaggedTensorStructure(dtypes.int32, [5, None], 1)),
        ("RaggedTensor_DType",
         structure.RaggedTensorStructure(dtypes.int32, None, 1),
         structure.RaggedTensorStructure(dtypes.float32, None, 1)),
    )
    def testRaggedStructureInequality(self, s1, s2):
        # pylint: disable=g-generic-assert
        self.assertNotEqual(s1, s2)  # check __ne__ operator.
        self.assertFalse(s1 == s2)  # check __eq__ operator.

    @parameterized.named_parameters(
        ("Tensor", lambda: constant_op.constant(37.0),
         lambda: constant_op.constant(42.0),
         lambda: constant_op.constant([5])),
        ("TensorArray", lambda: tensor_array_ops.TensorArray(
            dtype=dtypes.float32, element_shape=(3, ), size=0),
         lambda: tensor_array_ops.TensorArray(
             dtype=dtypes.float32, element_shape=(3, ), size=0),
         lambda: tensor_array_ops.TensorArray(
             dtype=dtypes.int32, element_shape=(), size=0)),
        ("SparseTensor", lambda: sparse_tensor.SparseTensor(
            indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),
         lambda: sparse_tensor.SparseTensor(
             indices=[[1, 2]], values=[42], dense_shape=[4, 5]),
         lambda: sparse_tensor.SparseTensor(
             indices=[[3]], values=[-1], dense_shape=[5])),
        ("Nested", lambda: {
            "a": constant_op.constant(37.0),
            "b": constant_op.constant([1, 2, 3])
        }, lambda: {
            "a": constant_op.constant(42.0),
            "b": constant_op.constant([4, 5, 6])
        }, lambda: {
            "a": constant_op.constant([1, 2, 3]),
            "b": constant_op.constant(37.0)
        }),
    )
    def testHash(self, value1_fn, value2_fn, value3_fn):
        s1 = structure.type_spec_from_value(value1_fn())
        s2 = structure.type_spec_from_value(value2_fn())
        s3 = structure.type_spec_from_value(value3_fn())
        for c1, c2, c3 in zip(nest.flatten(s1), nest.flatten(s2),
                              nest.flatten(s3)):
            self.assertEqual(hash(c1), hash(c1))
            self.assertEqual(hash(c1), hash(c2))
            self.assertNotEqual(hash(c1), hash(c3))
            self.assertNotEqual(hash(c2), hash(c3))

    @parameterized.named_parameters(
        (
            "Tensor",
            lambda: constant_op.constant(37.0),
        ),
        (
            "SparseTensor",
            lambda: sparse_tensor.SparseTensor(
                indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),
        ),
        ("TensorArray", lambda: tensor_array_ops.TensorArray(
            dtype=dtypes.float32, element_shape=(), size=1).write(0, 7)),
        (
            "RaggedTensor",
            lambda: ragged_factory_ops.constant([[1, 2], [], [3]]),
        ),
        (
            "Nested_0",
            lambda: {
                "a": constant_op.constant(37.0),
                "b": constant_op.constant([1, 2, 3])
            },
        ),
        (
            "Nested_1",
            lambda: {
                "a":
                constant_op.constant(37.0),
                "b": (sparse_tensor.SparseTensor(
                    indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
                      sparse_tensor.SparseTensor(
                          indices=[[3, 4]], values=[-1], dense_shape=[4, 5]))
            },
        ),
    )
    def testRoundTripConversion(self, value_fn):
        value = value_fn()
        s = structure.type_spec_from_value(value)

        def maybe_stack_ta(v):
            if isinstance(v, tensor_array_ops.TensorArray):
                return v.stack()
            else:
                return v

        before = self.evaluate(maybe_stack_ta(value))
        after = self.evaluate(
            maybe_stack_ta(
                structure.from_tensor_list(s,
                                           structure.to_tensor_list(s,
                                                                    value))))

        flat_before = nest.flatten(before)
        flat_after = nest.flatten(after)
        for b, a in zip(flat_before, flat_after):
            if isinstance(b, sparse_tensor.SparseTensorValue):
                self.assertAllEqual(b.indices, a.indices)
                self.assertAllEqual(b.values, a.values)
                self.assertAllEqual(b.dense_shape, a.dense_shape)
            elif isinstance(b, (ragged_tensor.RaggedTensor,
                                ragged_tensor_value.RaggedTensorValue)):
                self.assertRaggedEqual(b, a)
            else:
                self.assertAllEqual(b, a)

    # pylint: enable=g-long-lambda

    def preserveStaticShape(self):
        rt = ragged_factory_ops.constant([[1, 2], [], [3]])
        rt_s = structure.type_spec_from_value(rt)
        rt_after = structure.from_tensor_list(
            rt_s, structure.to_tensor_list(rt_s, rt))
        self.assertEqual(rt_after.row_splits.shape.as_list(),
                         rt.row_splits.shape.as_list())
        self.assertEqual(rt_after.values.shape.as_list(), [None])

        st = sparse_tensor.SparseTensor(indices=[[3, 4]],
                                        values=[-1],
                                        dense_shape=[4, 5])
        st_s = structure.type_spec_from_value(st)
        st_after = structure.from_tensor_list(
            st_s, structure.to_tensor_list(st_s, st))
        self.assertEqual(st_after.indices.shape.as_list(), [None, 2])
        self.assertEqual(st_after.values.shape.as_list(), [None])
        self.assertEqual(st_after.dense_shape.shape.as_list(),
                         st.dense_shape.shape.as_list())

    def testIncompatibleStructure(self):
        # Define three mutually incompatible values/structures, and assert that:
        # 1. Using one structure to flatten a value with an incompatible structure
        #    fails.
        # 2. Using one structure to restructre a flattened value with an
        #    incompatible structure fails.
        value_tensor = constant_op.constant(42.0)
        s_tensor = structure.type_spec_from_value(value_tensor)
        flat_tensor = structure.to_tensor_list(s_tensor, value_tensor)

        value_sparse_tensor = sparse_tensor.SparseTensor(indices=[[0, 0]],
                                                         values=[1],
                                                         dense_shape=[1, 1])
        s_sparse_tensor = structure.type_spec_from_value(value_sparse_tensor)
        flat_sparse_tensor = structure.to_tensor_list(s_sparse_tensor,
                                                      value_sparse_tensor)

        value_nest = {
            "a": constant_op.constant(37.0),
            "b": constant_op.constant([1, 2, 3])
        }
        s_nest = structure.type_spec_from_value(value_nest)
        flat_nest = structure.to_tensor_list(s_nest, value_nest)

        with self.assertRaisesRegexp(
                ValueError,
                r"SparseTensor.* is not convertible to a tensor with "
                r"dtype.*float32.* and shape \(\)"):
            structure.to_tensor_list(s_tensor, value_sparse_tensor)
        with self.assertRaisesRegexp(
                ValueError,
                "The two structures don't have the same nested structure."):
            structure.to_tensor_list(s_tensor, value_nest)

        with self.assertRaisesRegexp(
                TypeError, "Neither a SparseTensor nor SparseTensorValue"):
            structure.to_tensor_list(s_sparse_tensor, value_tensor)

        with self.assertRaisesRegexp(
                ValueError,
                "The two structures don't have the same nested structure."):
            structure.to_tensor_list(s_sparse_tensor, value_nest)

        with self.assertRaisesRegexp(
                ValueError,
                "The two structures don't have the same nested structure."):
            structure.to_tensor_list(s_nest, value_tensor)

        with self.assertRaisesRegexp(
                ValueError,
                "The two structures don't have the same nested structure."):
            structure.to_tensor_list(s_nest, value_sparse_tensor)

        with self.assertRaisesRegexp(ValueError, r"Incompatible input:"):
            structure.from_tensor_list(s_tensor, flat_sparse_tensor)

        with self.assertRaisesRegexp(ValueError,
                                     "Expected 1 tensors but got 2."):
            structure.from_tensor_list(s_tensor, flat_nest)

        with self.assertRaisesRegexp(ValueError, "Incompatible input: "):
            structure.from_tensor_list(s_sparse_tensor, flat_tensor)

        with self.assertRaisesRegexp(ValueError,
                                     "Expected 1 tensors but got 2."):
            structure.from_tensor_list(s_sparse_tensor, flat_nest)

        with self.assertRaisesRegexp(ValueError,
                                     "Expected 2 tensors but got 1."):
            structure.from_tensor_list(s_nest, flat_tensor)

        with self.assertRaisesRegexp(ValueError,
                                     "Expected 2 tensors but got 1."):
            structure.from_tensor_list(s_nest, flat_sparse_tensor)

    def testIncompatibleNestedStructure(self):
        # Define three mutually incompatible nested values/structures, and assert
        # that:
        # 1. Using one structure to flatten a value with an incompatible structure
        #    fails.
        # 2. Using one structure to restructure a flattened value with an
        #    incompatible structure fails.

        value_0 = {
            "a": constant_op.constant(37.0),
            "b": constant_op.constant([1, 2, 3])
        }
        s_0 = structure.type_spec_from_value(value_0)
        flat_s_0 = structure.to_tensor_list(s_0, value_0)

        # `value_1` has compatible nested structure with `value_0`, but different
        # classes.
        value_1 = {
            "a":
            constant_op.constant(37.0),
            "b":
            sparse_tensor.SparseTensor(indices=[[0, 0]],
                                       values=[1],
                                       dense_shape=[1, 1])
        }
        s_1 = structure.type_spec_from_value(value_1)
        flat_s_1 = structure.to_tensor_list(s_1, value_1)

        # `value_2` has incompatible nested structure with `value_0` and `value_1`.
        value_2 = {
            "a":
            constant_op.constant(37.0),
            "b": (sparse_tensor.SparseTensor(indices=[[0, 0]],
                                             values=[1],
                                             dense_shape=[1, 1]),
                  sparse_tensor.SparseTensor(indices=[[3, 4]],
                                             values=[-1],
                                             dense_shape=[4, 5]))
        }
        s_2 = structure.type_spec_from_value(value_2)
        flat_s_2 = structure.to_tensor_list(s_2, value_2)

        with self.assertRaisesRegexp(
                ValueError,
                r"SparseTensor.* is not convertible to a tensor with "
                r"dtype.*int32.* and shape \(3,\)"):
            structure.to_tensor_list(s_0, value_1)

        with self.assertRaisesRegexp(
                ValueError,
                "The two structures don't have the same nested structure."):
            structure.to_tensor_list(s_0, value_2)

        with self.assertRaisesRegexp(
                TypeError, "Neither a SparseTensor nor SparseTensorValue"):
            structure.to_tensor_list(s_1, value_0)

        with self.assertRaisesRegexp(
                ValueError,
                "The two structures don't have the same nested structure."):
            structure.to_tensor_list(s_1, value_2)

        # NOTE(mrry): The repr of the dictionaries is not sorted, so the regexp
        # needs to account for "a" coming before or after "b". It might be worth
        # adding a deterministic repr for these error messages (among other
        # improvements).
        with self.assertRaisesRegexp(
                ValueError,
                "The two structures don't have the same nested structure."):
            structure.to_tensor_list(s_2, value_0)

        with self.assertRaisesRegexp(
                ValueError,
                "The two structures don't have the same nested structure."):
            structure.to_tensor_list(s_2, value_1)

        with self.assertRaisesRegexp(ValueError, r"Incompatible input:"):
            structure.from_tensor_list(s_0, flat_s_1)

        with self.assertRaisesRegexp(ValueError,
                                     "Expected 2 tensors but got 3."):
            structure.from_tensor_list(s_0, flat_s_2)

        with self.assertRaisesRegexp(ValueError, "Incompatible input: "):
            structure.from_tensor_list(s_1, flat_s_0)

        with self.assertRaisesRegexp(ValueError,
                                     "Expected 2 tensors but got 3."):
            structure.from_tensor_list(s_1, flat_s_2)

        with self.assertRaisesRegexp(ValueError,
                                     "Expected 3 tensors but got 2."):
            structure.from_tensor_list(s_2, flat_s_0)

        with self.assertRaisesRegexp(ValueError,
                                     "Expected 3 tensors but got 2."):
            structure.from_tensor_list(s_2, flat_s_1)

    @parameterized.named_parameters(
        ("Tensor", dtypes.float32, tensor_shape.scalar(), ops.Tensor,
         structure.TensorStructure(dtypes.float32, [])),
        ("SparseTensor", dtypes.int32, tensor_shape.matrix(
            2, 2), sparse_tensor.SparseTensor,
         structure.SparseTensorStructure(dtypes.int32, [2, 2])),
        ("TensorArray_0", dtypes.int32,
         tensor_shape.as_shape([None, True, 2, 2
                                ]), tensor_array_ops.TensorArray,
         structure.TensorArrayStructure(
             dtypes.int32, [2, 2], dynamic_size=None, infer_shape=True)),
        ("TensorArray_1", dtypes.int32,
         tensor_shape.as_shape([True, None, 2, 2
                                ]), tensor_array_ops.TensorArray,
         structure.TensorArrayStructure(
             dtypes.int32, [2, 2], dynamic_size=True, infer_shape=None)),
        ("TensorArray_2", dtypes.int32,
         tensor_shape.as_shape([True, False, 2, 2
                                ]), tensor_array_ops.TensorArray,
         structure.TensorArrayStructure(
             dtypes.int32, [2, 2], dynamic_size=True, infer_shape=False)),
        ("RaggedTensor", dtypes.int32, tensor_shape.matrix(2, None),
         structure.RaggedTensorStructure(dtypes.int32, [2, None], 1),
         structure.RaggedTensorStructure(dtypes.int32, [2, None], 1)),
        ("Nested", {
            "a": dtypes.float32,
            "b": (dtypes.int32, dtypes.string)
        }, {
            "a": tensor_shape.scalar(),
            "b": (tensor_shape.matrix(2, 2), tensor_shape.scalar())
        }, {
            "a": ops.Tensor,
            "b": (sparse_tensor.SparseTensor, ops.Tensor)
        }, {
            "a":
            structure.TensorStructure(dtypes.float32, []),
            "b": (structure.SparseTensorStructure(dtypes.int32, [2, 2]),
                  structure.TensorStructure(dtypes.string, []))
        }),
    )
    def testConvertLegacyStructure(self, output_types, output_shapes,
                                   output_classes, expected_structure):
        actual_structure = structure.convert_legacy_structure(
            output_types, output_shapes, output_classes)
        self.assertEqual(actual_structure, expected_structure)

    def testNestedNestedStructure(self):
        s = (structure.TensorStructure(dtypes.int64, []),
             (structure.TensorStructure(dtypes.float32, []),
              structure.TensorStructure(dtypes.string, [])))

        int64_t = constant_op.constant(37, dtype=dtypes.int64)
        float32_t = constant_op.constant(42.0)
        string_t = constant_op.constant("Foo")

        nested_tensors = (int64_t, (float32_t, string_t))

        tensor_list = structure.to_tensor_list(s, nested_tensors)
        for expected, actual in zip([int64_t, float32_t, string_t],
                                    tensor_list):
            self.assertIs(expected, actual)

        (actual_int64_t,
         (actual_float32_t,
          actual_string_t)) = structure.from_tensor_list(s, tensor_list)
        self.assertIs(int64_t, actual_int64_t)
        self.assertIs(float32_t, actual_float32_t)
        self.assertIs(string_t, actual_string_t)

        (actual_int64_t,
         (actual_float32_t,
          actual_string_t)) = (structure.from_compatible_tensor_list(
              s, tensor_list))
        self.assertIs(int64_t, actual_int64_t)
        self.assertIs(float32_t, actual_float32_t)
        self.assertIs(string_t, actual_string_t)

    @parameterized.named_parameters(
        ("Tensor", structure.TensorStructure(dtypes.float32, []), 32,
         structure.TensorStructure(dtypes.float32, [32])),
        ("TensorUnknown", structure.TensorStructure(dtypes.float32, []), None,
         structure.TensorStructure(dtypes.float32, [None])),
        ("SparseTensor", structure.SparseTensorStructure(
            dtypes.float32, [None]), 32,
         structure.SparseTensorStructure(dtypes.float32, [32, None])),
        ("SparseTensorUnknown",
         structure.SparseTensorStructure(dtypes.float32, [4]), None,
         structure.SparseTensorStructure(dtypes.float32, [None, 4])),
        ("RaggedTensor",
         structure.RaggedTensorStructure(dtypes.float32, [2, None], 1), 32,
         structure.RaggedTensorStructure(dtypes.float32, [32, 2, None], 2)),
        ("RaggedTensorUnknown",
         structure.RaggedTensorStructure(dtypes.float32, [4, None], 1), None,
         structure.RaggedTensorStructure(dtypes.float32, [None, 4, None], 2)),
        ("Nested", {
            "a":
            structure.TensorStructure(dtypes.float32, []),
            "b": (structure.SparseTensorStructure(dtypes.int32, [2, 2]),
                  structure.TensorStructure(dtypes.string, []))
        }, 128, {
            "a":
            structure.TensorStructure(dtypes.float32, [128]),
            "b": (structure.SparseTensorStructure(dtypes.int32, [128, 2, 2]),
                  structure.TensorStructure(dtypes.string, [128]))
        }),
    )
    def testBatch(self, element_structure, batch_size,
                  expected_batched_structure):
        batched_structure = nest.map_structure(
            lambda component_spec: component_spec._batch(batch_size),
            element_structure)
        self.assertEqual(batched_structure, expected_batched_structure)

    @parameterized.named_parameters(
        ("Tensor", structure.TensorStructure(dtypes.float32, [32]),
         structure.TensorStructure(dtypes.float32, [])),
        ("TensorUnknown", structure.TensorStructure(dtypes.float32, [None]),
         structure.TensorStructure(dtypes.float32, [])),
        ("SparseTensor",
         structure.SparseTensorStructure(dtypes.float32, [32, None]),
         structure.SparseTensorStructure(dtypes.float32, [None])),
        ("SparseTensorUnknown",
         structure.SparseTensorStructure(dtypes.float32, [None, 4]),
         structure.SparseTensorStructure(dtypes.float32, [4])),
        ("RaggedTensor",
         structure.RaggedTensorStructure(dtypes.float32, [32, None, None], 2),
         structure.RaggedTensorStructure(dtypes.float32, [None, None], 1)),
        ("RaggedTensorUnknown",
         structure.RaggedTensorStructure(dtypes.float32, [None, None, None],
                                         2),
         structure.RaggedTensorStructure(dtypes.float32, [None, None], 1)),
        ("Nested", {
            "a":
            structure.TensorStructure(dtypes.float32, [128]),
            "b": (structure.SparseTensorStructure(dtypes.int32, [128, 2, 2]),
                  structure.TensorStructure(dtypes.string, [None]))
        }, {
            "a":
            structure.TensorStructure(dtypes.float32, []),
            "b": (structure.SparseTensorStructure(dtypes.int32, [2, 2]),
                  structure.TensorStructure(dtypes.string, []))
        }),
    )
    def testUnbatch(self, element_structure, expected_unbatched_structure):
        unbatched_structure = nest.map_structure(
            lambda component_spec: component_spec._unbatch(),
            element_structure)
        self.assertEqual(unbatched_structure, expected_unbatched_structure)

    # pylint: disable=g-long-lambda
    @parameterized.named_parameters(
        ("Tensor", lambda: constant_op.constant([[1.0, 2.0], [3.0, 4.0]]),
         lambda: constant_op.constant([1.0, 2.0])),
        ("SparseTensor", lambda: sparse_tensor.SparseTensor(
            indices=[[0, 0], [1, 1]], values=[13, 27], dense_shape=[2, 2]),
         lambda: sparse_tensor.SparseTensor(
             indices=[[0]], values=[13], dense_shape=[2])),
        ("RaggedTensor", lambda: ragged_factory_ops.constant([[[1]], [[2]]]),
         lambda: ragged_factory_ops.constant([[1]])),
        ("Nest", lambda:
         (constant_op.constant([[1.0, 2.0], [3.0, 4.0]]),
          sparse_tensor.SparseTensor(
              indices=[[0, 0], [1, 1]], values=[13, 27], dense_shape=[2, 2])),
         lambda: (constant_op.constant([1.0, 2.0]),
                  sparse_tensor.SparseTensor(
                      indices=[[0]], values=[13], dense_shape=[2]))),
    )
    def testToBatchedTensorList(self, value_fn, element_0_fn):
        batched_value = value_fn()
        s = structure.type_spec_from_value(batched_value)
        batched_tensor_list = structure.to_batched_tensor_list(
            s, batched_value)

        # The batch dimension is 2 for all of the test cases.
        # NOTE(mrry): `tf.shape()` does not currently work for the DT_VARIANT
        # tensors in which we store sparse tensors.
        for t in batched_tensor_list:
            if t.dtype != dtypes.variant:
                self.assertEqual(2, self.evaluate(array_ops.shape(t)[0]))

        # Test that the 0th element from the unbatched tensor is equal to the
        # expected value.
        expected_element_0 = self.evaluate(element_0_fn())
        unbatched_s = nest.map_structure(
            lambda component_spec: component_spec._unbatch(), s)
        actual_element_0 = structure.from_tensor_list(
            unbatched_s, [t[0] for t in batched_tensor_list])

        for expected, actual in zip(nest.flatten(expected_element_0),
                                    nest.flatten(actual_element_0)):
            if sparse_tensor.is_sparse(expected):
                self.assertSparseValuesEqual(expected, actual)
            elif ragged_tensor.is_ragged(expected):
                self.assertRaggedEqual(expected, actual)
            else:
                self.assertAllEqual(expected, actual)
 def _map_fn(i):
     return sparse_tensor.SparseTensor(indices=[[0, 0], [1, 1]],
                                       values=(i * [1, -1]),
                                       dense_shape=[2, 2])
Пример #17
0
def _ExtractImagePatchesGrad(op, grad):

  batch_size, rows_in, cols_in, channels = [
    dim.value for dim in op.inputs[0].get_shape()
  ]
  input_bhwc = array_ops.shape(op.inputs[0])
  batch_size = input_bhwc[0]
  channels = input_bhwc[3]

  _, rows_out, cols_out, _ = [
    dim.value for dim in op.outputs[0].get_shape()
  ]
  _, ksize_r, ksize_c, _ = op.get_attr('ksizes')
  _, stride_r, stride_h, _ = op.get_attr('strides')
  _, rate_r, rate_c, _ = op.get_attr('rates')
  padding = op.get_attr('padding')

  ksize_r_eff = ksize_r + (ksize_r - 1) * (rate_r - 1)
  ksize_c_eff = ksize_c + (ksize_c - 1) * (rate_c - 1)

  if padding == b'SAME':
    rows_out = int(ceil(rows_in / stride_r))
    cols_out = int(ceil(cols_in / stride_h))
    pad_rows = ((rows_out - 1) * stride_r + ksize_r_eff - rows_in) // 2
    pad_cols = ((cols_out - 1) * stride_h + ksize_c_eff - cols_in) // 2

  elif padding == b'VALID':
    rows_out = int(ceil((rows_in - ksize_r_eff + 1) / stride_r))
    cols_out = int(ceil((cols_in - ksize_c_eff + 1) / stride_h))
    pad_rows = (rows_out - 1) * stride_r + ksize_r_eff - rows_in
    pad_cols = (cols_out - 1) * stride_h + ksize_c_eff - cols_in

  pad_rows, pad_cols = max(0, pad_rows), max(0, pad_cols)

  grad_expanded = array_ops.transpose(
    array_ops.reshape(grad, (batch_size, rows_out,
                             cols_out, ksize_r, ksize_c, channels)),
    (1, 2, 3, 4, 0, 5)
  )
  grad_flat = array_ops.reshape(grad_expanded, (-1, batch_size * channels))

  row_steps = range(0, rows_out * stride_r, stride_r)
  col_steps = range(0, cols_out * stride_h, stride_h)

  idx = []
  for i in range(rows_out):
    for j in range(cols_out):
      r_low, c_low = row_steps[i] - pad_rows, col_steps[j] - pad_cols
      r_high, c_high = r_low + ksize_r_eff, c_low + ksize_c_eff

      idx.extend([(r * (cols_in) + c,
                   i * (cols_out * ksize_r * ksize_c) +
                   j * (ksize_r * ksize_c) +
                   ri * (ksize_c) + ci)
                  for (ri, r) in enumerate(range(r_low, r_high, rate_r))
                  for (ci, c) in enumerate(range(c_low, c_high, rate_c))
                  if 0 <= r and r < rows_in and 0 <= c and c < cols_in
      ])

  sp_shape = (rows_in * cols_in,
              rows_out * cols_out * ksize_r * ksize_c)

  sp_mat = sparse_tensor.SparseTensor(
    array_ops.constant(idx, dtype=ops.dtypes.int64),
    array_ops.ones((len(idx),), dtype=ops.dtypes.float32),
    sp_shape
  )

  jac = sparse_ops.sparse_tensor_dense_matmul(sp_mat, grad_flat)

  grad_out = array_ops.reshape(
    jac, (rows_in, cols_in, batch_size, channels)
  )
  grad_out = array_ops.transpose(grad_out, (2, 0, 1, 3))

  return [grad_out]
Пример #18
0
def _to_sparse_tensor(record):
    return sparse_tensor.SparseTensor(**record)
Пример #19
0
  def testIncompatibleNestedStructure(self):
    # Define three mutually incompatible nested values/structures, and assert
    # that:
    # 1. Using one structure to flatten a value with an incompatible structure
    #    fails.
    # 2. Using one structure to restructre a flattened value with an
    #    incompatible structure fails.

    value_0 = {
        "a": constant_op.constant(37.0),
        "b": constant_op.constant([1, 2, 3])
    }
    s_0 = structure.Structure.from_value(value_0)
    flat_s_0 = s_0._to_tensor_list(value_0)

    # `value_1` has compatible nested structure with `value_0`, but different
    # classes.
    value_1 = {
        "a":
            constant_op.constant(37.0),
        "b":
            sparse_tensor.SparseTensor(
                indices=[[0, 0]], values=[1], dense_shape=[1, 1])
    }
    s_1 = structure.Structure.from_value(value_1)
    flat_s_1 = s_1._to_tensor_list(value_1)

    # `value_2` has incompatible nested structure with `value_0` and `value_1`.
    value_2 = {
        "a":
            constant_op.constant(37.0),
        "b": (sparse_tensor.SparseTensor(
            indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
              sparse_tensor.SparseTensor(
                  indices=[[3, 4]], values=[-1], dense_shape=[4, 5]))
    }
    s_2 = structure.Structure.from_value(value_2)
    flat_s_2 = s_2._to_tensor_list(value_2)

    with self.assertRaisesRegexp(
        ValueError, "SparseTensor.* not compatible with the nested structure "
        ".*TensorStructure"):
      s_0._to_tensor_list(value_1)

    with self.assertRaisesRegexp(
        ValueError, "SparseTensor.*SparseTensor.* not compatible with the "
        "nested structure .*TensorStructure"):
      s_0._to_tensor_list(value_2)

    with self.assertRaisesRegexp(
        ValueError, "Tensor.* not compatible with the nested structure "
        ".*SparseTensorStructure"):
      s_1._to_tensor_list(value_0)

    with self.assertRaisesRegexp(
        ValueError, "SparseTensor.*SparseTensor.* not compatible with the "
        "nested structure .*TensorStructure"):
      s_0._to_tensor_list(value_2)

    # NOTE(mrry): The repr of the dictionaries is not sorted, so the regexp
    # needs to account for "a" coming before or after "b". It might be worth
    # adding a deterministic repr for these error messages (among other
    # improvements).
    with self.assertRaisesRegexp(
        ValueError, "Tensor.*Tensor.* not compatible with the nested structure "
        ".*(TensorStructure.*SparseTensorStructure.*SparseTensorStructure|"
        "SparseTensorStructure.*SparseTensorStructure.*TensorStructure)"):
      s_2._to_tensor_list(value_0)

    with self.assertRaisesRegexp(
        ValueError, "(Tensor.*SparseTensor|SparseTensor.*Tensor).* "
        "not compatible with the nested structure .*"
        "(TensorStructure.*SparseTensorStructure.*SparseTensorStructure|"
        "SparseTensorStructure.*SparseTensorStructure.*TensorStructure)"):
      s_2._to_tensor_list(value_1)

    with self.assertRaisesRegexp(
        ValueError, r"Cannot convert.*with dtype.*int32.* and shape \(3,\)"):
      s_0._from_tensor_list(flat_s_1)

    with self.assertRaisesRegexp(
        ValueError, "Expected 2 flat values in NestedStructure but got 3."):
      s_0._from_tensor_list(flat_s_2)

    with self.assertRaisesRegexp(
        ValueError, "SparseTensorStructure corresponds to a single tf.variant "
        "vector of length 3."):
      s_1._from_tensor_list(flat_s_0)

    with self.assertRaisesRegexp(
        ValueError, "Expected 2 flat values in NestedStructure but got 3."):
      s_1._from_tensor_list(flat_s_2)

    with self.assertRaisesRegexp(
        ValueError, "Expected 3 flat values in NestedStructure but got 2."):
      s_2._from_tensor_list(flat_s_0)

    with self.assertRaisesRegexp(
        ValueError, "Expected 3 flat values in NestedStructure but got 2."):
      s_2._from_tensor_list(flat_s_1)
    def testPrepareFeaturesForSQSS(self):
        mode = model_fn_lib.ModeKeys.TRAIN
        seq_feature_name = 'seq_feature'
        sparse_seq_feature_name = 'wire_cast'
        ctx_feature_name = 'ctx_feature'
        input_key_column_name = 'input_key_column'
        sequence_length = 4
        embedding_dimension = 8

        features = {
            input_key_column_name:
            constant_op.constant('input0'),
            sparse_seq_feature_name:
            sparse_tensor.SparseTensor(indices=[[0, 0, 0], [0, 1,
                                                            0], [1, 0, 0],
                                                [1, 1, 0], [1, 1, 1],
                                                [2, 0, 0], [2, 1, 1]],
                                       values=[
                                           b'marlo', b'stringer', b'omar',
                                           b'stringer', b'marlo', b'marlo',
                                           b'omar'
                                       ],
                                       dense_shape=[3, 2, 2]),
            seq_feature_name:
            constant_op.constant(1.0, shape=[sequence_length]),
            ctx_feature_name:
            constant_op.constant(2.0)
        }

        labels = constant_op.constant(5.0, shape=[sequence_length])

        wire_cast = feature_column.sparse_column_with_keys(
            'wire_cast', ['marlo', 'omar', 'stringer'])
        sequence_feature_columns = [
            feature_column.real_valued_column(seq_feature_name, dimension=1),
            feature_column.embedding_column(
                wire_cast,
                dimension=embedding_dimension,
                initializer=init_ops.ones_initializer())
        ]

        context_feature_columns = [
            feature_column.real_valued_column(ctx_feature_name, dimension=1)
        ]

        expected_input_key = b'input0'

        expected_sequence = {
            ssre.RNNKeys.LABELS_KEY:
            np.array([5., 5., 5., 5.]),
            seq_feature_name:
            np.array([1., 1., 1., 1.]),
            sparse_seq_feature_name:
            sparse_tensor.SparseTensor(indices=[[0, 0, 0], [0, 1,
                                                            0], [1, 0, 0],
                                                [1, 1, 0], [1, 1, 1],
                                                [2, 0, 0], [2, 1, 1]],
                                       values=[
                                           b'marlo', b'stringer', b'omar',
                                           b'stringer', b'marlo', b'marlo',
                                           b'omar'
                                       ],
                                       dense_shape=[3, 2, 2]),
        }

        expected_context = {ctx_feature_name: 2.}

        input_key, sequence, context = ssre._prepare_features_for_sqss(
            features, labels, mode, input_key_column_name,
            sequence_feature_columns, context_feature_columns)

        def assert_equal(expected, got):
            self.assertEqual(sorted(expected), sorted(got))
            for k, v in expected.items():
                if isinstance(v, sparse_tensor.SparseTensor):
                    self.assertAllEqual(v.values.eval(), got[k].values)
                    self.assertAllEqual(v.indices.eval(), got[k].indices)
                    self.assertAllEqual(v.dense_shape.eval(),
                                        got[k].dense_shape)
                else:
                    self.assertAllEqual(v, got[k])

        with self.test_session() as sess:
            sess.run(variables.global_variables_initializer())
            sess.run(data_flow_ops.initialize_all_tables())
            actual_input_key, actual_sequence, actual_context = sess.run(
                [input_key, sequence, context])
            self.assertEqual(expected_input_key, actual_input_key)
            assert_equal(expected_sequence, actual_sequence)
            assert_equal(expected_context, actual_context)
Пример #21
0
def indicators_to_sparse_ids(indicators, ignore_value=None, dtype=dtypes.int64):
  """Convert a dense indicator tensor to sparse IDs.

  This is commonly used for converting a dense classification label to sparse.
  In the following example, we have an input of shape (2, 2, num_classes),
  where num_classes=4.

  ```python
  indicators = [
    [
      [0, 0, 1, 0],
      [0, 0, 0, 0]
    ], [
      [1, 0, 1, 1],
      [0, 0, 1, 0]
    ]
  ]
  sparse_ids = indicator_to_sparse_ids(indicators)
  ```

  `sparse_ids` in "jagged" format:
  [
    [
      [2],
      []
    ], [
      [0, 2, 3],
      [2]
    ]
  ]

  `sparse_ids` in `SparseTensor` format:
  ```python
  {
    indices: [[0, 0, 1], [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 1, 0]],
    values: [2, 0, 2, 3, 2],
    dense_shape: [2, 2, 3]
  }
  ```

  Args:
    indicators: Dense `Tensor` of shape `(d0, ..., dn, num_classes)`.
      `ignore_value` values are ignored. For other values (typically, ones), the
      index along the last dimension is returned.
    ignore_value: Entries in `indicators` equal to this value will be
      absent from the returned `SparseTensor`. If `None`, default value of
      `indicators` dtype will be used (e.g. '' for `str`, 0 for `int`).
    dtype: Type of result, must be integer type.

  Returns:
    `SparseTensor` of type `dtype` and shape `(d0, ..., dn, max_num_labels)`,
      where `max_num_labels` is the maximum number of non-zero values in any
      row (in the example above, row (1, 1) has 3 non-zero values, so the result
      shape is (2, 2, 3)). The values of this `SparseTensor` are in the range
      `[0, num_classes)` and correspond to the index of non-ignore values along
      the last dimension of `indicators`.

  Raises:
    ValueError: if `dtype` is not integer.
  """
  if not dtype.is_integer:
    raise ValueError("Invalid dtype {} not integer.".format(dtype))
  with ops.name_scope(
      None, "indicators_to_sparse_ids", (indicators, ignore_value)):
    # Convert indicators to binary ones and zeros. We use int64 since
    # SparseTensor requires int64 indices.
    indicators = ops.convert_to_tensor(indicators, name="indicators")
    missing_indicators = math_ops.equal(
        indicators, _ignore_value_tensor(indicators.dtype, ignore_value),
        name="missing")
    zeros_like_indicators = array_ops.zeros_like(
        indicators, dtype=dtypes.int64, name="zeros")
    binary_indicators = array_ops.where(
        missing_indicators, zeros_like_indicators,
        array_ops.ones_like(indicators, dtype=dtypes.int64, name="ones"),
        name="binary_indicators")

    # Use cumsum along the last dimension to generate per-row indexes.
    # Note that these are 1-based (since 0 indicates missing values), so they're
    # off-by-1 from the actual indices. We'll subtract 1 below. Since they're
    # off-by-one, the max value is the size of the last dimension (i.e.,
    # last_index + 1).
    row_index_indicators = array_ops.where(
        missing_indicators, zeros_like_indicators,
        math_ops.cumsum(binary_indicators, axis=-1), "row_index_indicators")
    result_last_dim = array_ops.reshape(
        math_ops.reduce_max(row_index_indicators), shape=(1,),
        name="result_last_dim")

    # Convert to a SparseTensor. The values of this SparseTensor are the last
    # indices of our result, and the last indices of this SparseTensor (i.e.,
    # the class IDs indicated by `indicators`) are the values of our result, so
    # we use tensor slicing and concat to swap them.
    sparse_row_index_indicators = dense_to_sparse_tensor(
        row_index_indicators, ignore_value=0)
    return sparse_tensor.SparseTensor(
        indices=array_ops.concat((
            sparse_row_index_indicators.indices[:, :-1],
            array_ops.reshape(sparse_row_index_indicators.values - 1, (-1, 1))
        ), axis=1, name="indices"),
        values=math_ops.cast(
            sparse_row_index_indicators.indices[:, -1], dtype=dtype,
            name="values"),
        dense_shape=array_ops.concat(
            (sparse_row_index_indicators.dense_shape[0:-1], result_last_dim),
            axis=0, name="dense_shape"))
Пример #22
0
 def make_sparse(x):
     x_1d = array_ops.reshape(x, [1])
     x_2d = array_ops.reshape(x, [1, 1])
     return sparse_tensor.SparseTensor(x_2d, x_1d, x_1d)
Пример #23
0
 def f(sparse_values, default_value):
   st = sparse_tensor.SparseTensor(
       indices=[[0, 3, 6], [1, 4, 7], [2, 5, 8]],
       values=sparse_values,
       dense_shape=[3, 6, 9])
   return sparse_ops.sparse_tensor_to_dense(st, default_value)
Пример #24
0
    def testInitCrossedColumnWeightsFromCkpt(self):
        sparse_col_1 = fc.sparse_column_with_hash_bucket(column_name="col_1",
                                                         hash_bucket_size=4)
        sparse_col_2 = fc.sparse_column_with_hash_bucket(column_name="col_2",
                                                         hash_bucket_size=4)

        crossed_col = fc.crossed_column(columns=[sparse_col_1, sparse_col_2],
                                        hash_bucket_size=4)

        input_tensor = sparse_tensor_lib.SparseTensor(indices=[[0, 0], [1, 1],
                                                               [2, 2], [3, 3]],
                                                      values=[0, 1, 2, 3],
                                                      dense_shape=[4, 4])

        # Invoking 'weighted_sum_from_feature_columns' will create the crossed
        # column weights variable.
        with variable_scope.variable_scope("run_1"):
            with variable_scope.variable_scope(crossed_col.name):
                # Returns looked up column weights which is same as crossed column
                # weights as well as actual references to weights variables.
                _, col_weights, _ = (
                    feature_column_ops.weighted_sum_from_feature_columns(
                        {
                            sparse_col_1.name: input_tensor,
                            sparse_col_2.name: input_tensor
                        }, [crossed_col], 1))
                # Update the weights since default initializer initializes all weights
                # to 0.0.
                for weight in col_weights.values():
                    assign_op = state_ops.assign(weight[0], weight[0] + 0.5)

        save = saver.Saver()
        ckpt_dir_prefix = os.path.join(self.get_temp_dir(),
                                       "init_crossed_col_w_from_ckpt")
        ckpt_dir = tempfile.mkdtemp(prefix=ckpt_dir_prefix)
        checkpoint_path = os.path.join(ckpt_dir, "model.ckpt")

        with self.test_session() as sess:
            sess.run(variables.global_variables_initializer())
            sess.run(assign_op)
            saved_col_weights = col_weights[crossed_col][0].eval()
            save.save(sess, checkpoint_path)

        crossed_col_initialized = fc.crossed_column(
            columns=[sparse_col_1, sparse_col_2],
            hash_bucket_size=4,
            ckpt_to_load_from=checkpoint_path,
            tensor_name_in_ckpt=("run_1/col_1_X_col_2/"
                                 "weighted_sum_from_feature_columns/"
                                 "col_1_X_col_2/weights"))

        with variable_scope.variable_scope("run_2"):
            # This will initialize the crossed column weights from provided checkpoint
            # and return a [4, 1] tensor which is same as weights variable. Since we
            # won't modify weights, this should be same as 'saved_col_weights'.
            _, col_weights, _ = (
                feature_column_ops.weighted_sum_from_feature_columns(
                    {
                        sparse_col_1.name: input_tensor,
                        sparse_col_2.name: input_tensor
                    }, [crossed_col_initialized], 1))
            col_weights_from_ckpt = col_weights[crossed_col_initialized][0]

        with self.test_session() as sess:
            sess.run(variables.global_variables_initializer())
            loaded_col_weights = col_weights_from_ckpt.eval()

        self.assertAllClose(saved_col_weights, loaded_col_weights)
Пример #25
0
  def _test_sparse_set_difference_3d(self, dtype, invalid_indices=False):
    if invalid_indices:
      indices = constant_op.constant(
          [
              [0, 1, 0],
              [0, 1, 1],  # 0,1
              [1, 0, 0],  # 1,0
              [1, 1, 0],
              [1, 1, 1],
              [1, 1, 2],  # 1,1
              [0, 0, 0],
              [0, 0, 2],  # 0,0
              # 2,0
              [2, 1, 1]  # 2,1
              # 3,*
          ],
          dtypes.int64)
    else:
      indices = constant_op.constant(
          [
              [0, 0, 0],
              [0, 0, 2],  # 0,0
              [0, 1, 0],
              [0, 1, 1],  # 0,1
              [1, 0, 0],  # 1,0
              [1, 1, 0],
              [1, 1, 1],
              [1, 1, 2],  # 1,1
              # 2,0
              [2, 1, 1]  # 2,1
              # 3,*
          ],
          dtypes.int64)
    sp_a = sparse_tensor_lib.SparseTensor(
        indices,
        _constant(
            [
                1,
                9,  # 0,0
                3,
                3,  # 0,1
                1,  # 1,0
                9,
                7,
                8,  # 1,1
                # 2,0
                5  # 2,1
                # 3,*
            ],
            dtype),
        constant_op.constant([4, 2, 3], dtypes.int64))
    sp_b = sparse_tensor_lib.SparseTensor(
        constant_op.constant(
            [
                [0, 0, 0],
                [0, 0, 3],  # 0,0
                # 0,1
                [1, 0, 0],  # 1,0
                [1, 1, 0],
                [1, 1, 1],  # 1,1
                [2, 0, 1],  # 2,0
                [2, 1, 1],  # 2,1
                [3, 0, 0],  # 3,0
                [3, 1, 0]  # 3,1
            ],
            dtypes.int64),
        _constant(
            [
                1,
                3,  # 0,0
                # 0,1
                3,  # 1,0
                7,
                8,  # 1,1
                2,  # 2,0
                5,  # 2,1
                4,  # 3,0
                4  # 3,1
            ],
            dtype),
        constant_op.constant([4, 2, 4], dtypes.int64))

    if invalid_indices:
      with self.assertRaisesRegexp(errors_impl.OpError, "out of order"):
        self._set_difference(sp_a, sp_b, False)
      with self.assertRaisesRegexp(errors_impl.OpError, "out of order"):
        self._set_difference(sp_a, sp_b, True)
    else:
      # a-b
      expected_indices = [
          [0, 0, 0],  # 0,0
          [0, 1, 0],  # 0,1
          [1, 0, 0],  # 1,0
          [1, 1, 0],  # 1,1
          # 2,*
          # 3,*
      ]
      expected_values = _values(
          [
              9,  # 0,0
              3,  # 0,1
              1,  # 1,0
              9,  # 1,1
              # 2,*
              # 3,*
          ],
          dtype)
      expected_shape = [4, 2, 1]
      expected_counts = [
          [
              1,  # 0,0
              1  # 0,1
          ],
          [
              1,  # 1,0
              1  # 1,1
          ],
          [
              0,  # 2,0
              0  # 2,1
          ],
          [
              0,  # 3,0
              0  # 3,1
          ]
      ]

      difference = self._set_difference(sp_a, sp_b, True)
      self._assert_set_operation(
          expected_indices,
          expected_values,
          expected_shape,
          difference,
          dtype=dtype)
      self.assertAllEqual(expected_counts,
                          self._set_difference_count(sp_a, sp_b))

      # b-a
      expected_indices = [
          [0, 0, 0],  # 0,0
          # 0,1
          [1, 0, 0],  # 1,0
          # 1,1
          [2, 0, 0],  # 2,0
          # 2,1
          [3, 0, 0],  # 3,0
          [3, 1, 0]  # 3,1
      ]
      expected_values = _values(
          [
              3,  # 0,0
              # 0,1
              3,  # 1,0
              # 1,1
              2,  # 2,0
              # 2,1
              4,  # 3,0
              4,  # 3,1
          ],
          dtype)
      expected_shape = [4, 2, 1]
      expected_counts = [
          [
              1,  # 0,0
              0  # 0,1
          ],
          [
              1,  # 1,0
              0  # 1,1
          ],
          [
              1,  # 2,0
              0  # 2,1
          ],
          [
              1,  # 3,0
              1  # 3,1
          ]
      ]

      difference = self._set_difference(sp_a, sp_b, False)
      self._assert_set_operation(
          expected_indices,
          expected_values,
          expected_shape,
          difference,
          dtype=dtype)
      self.assertAllEqual(expected_counts,
                          self._set_difference_count(sp_a, sp_b, False))
Пример #26
0
    def testIncompatibleStructure(self):
        # Define three mutually incompatible values/structures, and assert that:
        # 1. Using one structure to flatten a value with an incompatible structure
        #    fails.
        # 2. Using one structure to restructre a flattened value with an
        #    incompatible structure fails.
        value_tensor = constant_op.constant(42.0)
        s_tensor = structure.type_spec_from_value(value_tensor)
        flat_tensor = structure.to_tensor_list(s_tensor, value_tensor)

        value_sparse_tensor = sparse_tensor.SparseTensor(indices=[[0, 0]],
                                                         values=[1],
                                                         dense_shape=[1, 1])
        s_sparse_tensor = structure.type_spec_from_value(value_sparse_tensor)
        flat_sparse_tensor = structure.to_tensor_list(s_sparse_tensor,
                                                      value_sparse_tensor)

        value_nest = {
            "a": constant_op.constant(37.0),
            "b": constant_op.constant([1, 2, 3])
        }
        s_nest = structure.type_spec_from_value(value_nest)
        flat_nest = structure.to_tensor_list(s_nest, value_nest)

        with self.assertRaisesRegexp(
                ValueError,
                r"SparseTensor.* is not convertible to a tensor with "
                r"dtype.*float32.* and shape \(\)"):
            structure.to_tensor_list(s_tensor, value_sparse_tensor)
        with self.assertRaisesRegexp(
                ValueError,
                "The two structures don't have the same nested structure."):
            structure.to_tensor_list(s_tensor, value_nest)

        with self.assertRaisesRegexp(
                TypeError, "Neither a SparseTensor nor SparseTensorValue"):
            structure.to_tensor_list(s_sparse_tensor, value_tensor)

        with self.assertRaisesRegexp(
                ValueError,
                "The two structures don't have the same nested structure."):
            structure.to_tensor_list(s_sparse_tensor, value_nest)

        with self.assertRaisesRegexp(
                ValueError,
                "The two structures don't have the same nested structure."):
            structure.to_tensor_list(s_nest, value_tensor)

        with self.assertRaisesRegexp(
                ValueError,
                "The two structures don't have the same nested structure."):
            structure.to_tensor_list(s_nest, value_sparse_tensor)

        with self.assertRaisesRegexp(ValueError, r"Incompatible input:"):
            structure.from_tensor_list(s_tensor, flat_sparse_tensor)

        with self.assertRaisesRegexp(ValueError,
                                     "Expected 1 tensors but got 2."):
            structure.from_tensor_list(s_tensor, flat_nest)

        with self.assertRaisesRegexp(ValueError, "Incompatible input: "):
            structure.from_tensor_list(s_sparse_tensor, flat_tensor)

        with self.assertRaisesRegexp(ValueError,
                                     "Expected 1 tensors but got 2."):
            structure.from_tensor_list(s_sparse_tensor, flat_nest)

        with self.assertRaisesRegexp(ValueError,
                                     "Expected 2 tensors but got 1."):
            structure.from_tensor_list(s_nest, flat_tensor)

        with self.assertRaisesRegexp(ValueError,
                                     "Expected 2 tensors but got 1."):
            structure.from_tensor_list(s_nest, flat_sparse_tensor)
Пример #27
0
def ctc_loss_v2(labels, logits, label_length, logit_length,
                logits_time_major=True, unique=None,
                blank_index=None, name=None):
  """Computes CTC (Connectionist Temporal Classification) loss.

  This op implements the CTC loss as presented in the article:

  [A. Graves, S. Fernandez, F. Gomez, J. Schmidhuber.
  Connectionist Temporal Classification: Labeling Unsegmented Sequence Data
  with Recurrent Neural Networks. ICML 2006, Pittsburgh, USA,
  pp. 369-376.](http://www.cs.toronto.edu/~graves/icml_2006.pdf)

  Notes:
      - Same as the "Classic CTC" in TensorFlow 1.x's tf.nn.ctc_loss setting of
        preprocess_collapse_repeated=False, ctc_merge_repeated=True
      - Labels may be supplied as either a dense, zero-padded tensor with a
        vector of label sequence lengths OR as a SparseTensor.
      - On TPU and GPU:
          - Only dense padded labels are supported.
      - On CPU:
          - Caller may use SparseTensor or dense padded labels but calling with
            a SparseTensor will be significantly faster.
      - Default blank label is 0 rather num_classes - 1, unless overridden by
        blank_index.

  Args:
    labels: tensor of shape [batch_size, max_label_seq_length] or SparseTensor
    logits: tensor of shape [frames, batch_size, num_labels],
      if logits_time_major == False, shape is [batch_size, frames, num_labels].
    label_length: tensor of shape [batch_size], None if labels is SparseTensor
      Length of reference label sequence in labels.
    logit_length: tensor of shape [batch_size]
      Length of input sequence in logits.
    logits_time_major: (optional) If True (default), logits is shaped
      [time, batch, logits]. If False, shape is [batch, time, logits]
    unique: (optional) Unique label indices as computed by
      ctc_unique_labels(labels).  If supplied, enable a faster, memory
      efficient implementation on TPU.
    blank_index: (optional) Set the class index to use for the blank label.
      Negative values will start from num_classes, ie, -1 will reproduce the
      ctc_loss behavior of using num_classes - 1 for the blank symbol.
      There is some memory/performance overhead to switching from the default
      of 0 as an additional shifted copy of the logits may be created.
    name: A name for this `Op`. Defaults to "ctc_loss_dense".

  Returns:
    loss: tensor of shape [batch_size], negative log probabilities.
  """
  if isinstance(labels, sparse_tensor.SparseTensor):
    if blank_index is None:
      raise ValueError(
          "blank_index must be given when using SparseTensor labels.")

    if blank_index < 0:
      blank_index += _get_dim(logits, 2)

    if blank_index != _get_dim(logits, 2) - 1:
      logits = array_ops.concat([
          logits[:, :, :blank_index],
          logits[:, :, blank_index+1:],
          logits[:, :, blank_index:blank_index+1],
      ], axis=2)
      labels = sparse_tensor.SparseTensor(
          labels.indices,
          array_ops.where(labels.values < blank_index,
                          labels.values,
                          labels.values - 1),
          labels.dense_shape)

    return ctc_loss(labels=labels,
                    inputs=logits,
                    sequence_length=logit_length,
                    time_major=logits_time_major)

  if blank_index is None:
    blank_index = 0

  return ctc_loss_dense(labels=labels,
                        logits=logits,
                        label_length=label_length,
                        logit_length=logit_length,
                        logits_time_major=logits_time_major,
                        unique=unique,
                        blank_index=blank_index,
                        name=name)
Пример #28
0
    def testIncompatibleNestedStructure(self):
        # Define three mutually incompatible nested values/structures, and assert
        # that:
        # 1. Using one structure to flatten a value with an incompatible structure
        #    fails.
        # 2. Using one structure to restructure a flattened value with an
        #    incompatible structure fails.

        value_0 = {
            "a": constant_op.constant(37.0),
            "b": constant_op.constant([1, 2, 3])
        }
        s_0 = structure.type_spec_from_value(value_0)
        flat_s_0 = structure.to_tensor_list(s_0, value_0)

        # `value_1` has compatible nested structure with `value_0`, but different
        # classes.
        value_1 = {
            "a":
            constant_op.constant(37.0),
            "b":
            sparse_tensor.SparseTensor(indices=[[0, 0]],
                                       values=[1],
                                       dense_shape=[1, 1])
        }
        s_1 = structure.type_spec_from_value(value_1)
        flat_s_1 = structure.to_tensor_list(s_1, value_1)

        # `value_2` has incompatible nested structure with `value_0` and `value_1`.
        value_2 = {
            "a":
            constant_op.constant(37.0),
            "b": (sparse_tensor.SparseTensor(indices=[[0, 0]],
                                             values=[1],
                                             dense_shape=[1, 1]),
                  sparse_tensor.SparseTensor(indices=[[3, 4]],
                                             values=[-1],
                                             dense_shape=[4, 5]))
        }
        s_2 = structure.type_spec_from_value(value_2)
        flat_s_2 = structure.to_tensor_list(s_2, value_2)

        with self.assertRaisesRegexp(
                ValueError,
                r"SparseTensor.* is not convertible to a tensor with "
                r"dtype.*int32.* and shape \(3,\)"):
            structure.to_tensor_list(s_0, value_1)

        with self.assertRaisesRegexp(
                ValueError,
                "The two structures don't have the same nested structure."):
            structure.to_tensor_list(s_0, value_2)

        with self.assertRaisesRegexp(
                TypeError, "Neither a SparseTensor nor SparseTensorValue"):
            structure.to_tensor_list(s_1, value_0)

        with self.assertRaisesRegexp(
                ValueError,
                "The two structures don't have the same nested structure."):
            structure.to_tensor_list(s_1, value_2)

        # NOTE(mrry): The repr of the dictionaries is not sorted, so the regexp
        # needs to account for "a" coming before or after "b". It might be worth
        # adding a deterministic repr for these error messages (among other
        # improvements).
        with self.assertRaisesRegexp(
                ValueError,
                "The two structures don't have the same nested structure."):
            structure.to_tensor_list(s_2, value_0)

        with self.assertRaisesRegexp(
                ValueError,
                "The two structures don't have the same nested structure."):
            structure.to_tensor_list(s_2, value_1)

        with self.assertRaisesRegexp(ValueError, r"Incompatible input:"):
            structure.from_tensor_list(s_0, flat_s_1)

        with self.assertRaisesRegexp(ValueError,
                                     "Expected 2 tensors but got 3."):
            structure.from_tensor_list(s_0, flat_s_2)

        with self.assertRaisesRegexp(ValueError, "Incompatible input: "):
            structure.from_tensor_list(s_1, flat_s_0)

        with self.assertRaisesRegexp(ValueError,
                                     "Expected 2 tensors but got 3."):
            structure.from_tensor_list(s_1, flat_s_2)

        with self.assertRaisesRegexp(ValueError,
                                     "Expected 3 tensors but got 2."):
            structure.from_tensor_list(s_2, flat_s_0)

        with self.assertRaisesRegexp(ValueError,
                                     "Expected 3 tensors but got 2."):
            structure.from_tensor_list(s_2, flat_s_1)
Пример #29
0
 def generator():
     yield sparse_tensor.SparseTensor(indices=[[0, 0], [1, 2]],
                                      values=constant_op.constant(
                                          [1, 2], dtype=dtypes.int64),
                                      dense_shape=[3, 4])
Пример #30
0
def queue_parsed_features(parsed_features,
                          keys=None,
                          feature_queue_capacity=100,
                          num_enqueue_threads=2,
                          name=None):
    """Speeds up parsing by using queues to do it asynchronously.

  This function adds the tensors in `parsed_features` to a queue, which allows
  the parsing (or any other expensive op before this) to be asynchronous wrt the
  rest of the training graph. This greatly improves read latency and speeds up
  training since the data will already be parsed and ready when each step of
  training needs it.

  All queue runners are added to the queue runners collection, and may be
  started via `start_queue_runners`.

  All ops are added to the default graph.

  Args:
    parsed_features: A dict of string key to `Tensor` or `SparseTensor` objects.
    keys: `Tensor` of string keys.
    feature_queue_capacity: Capacity of the parsed features queue.
    num_enqueue_threads: Number of threads to enqueue the parsed example queue.
      Using multiple threads to enqueue the parsed example queue helps maintain
      a full queue when the subsequent computations overall are cheaper than
      parsing.
    name: Name of resulting op.

  Returns:
    Returns tuple of:
    - `Tensor` corresponding to `keys` if provided, otherwise `None`.
    -  A dict of string key to `Tensor` or `SparseTensor` objects corresponding
       to `parsed_features`.
  Raises:
    ValueError: for invalid inputs.
  """

    args = list(parsed_features.values())
    if keys is not None:
        args += [keys]

    with ops.name_scope(name, 'queue_parsed_features', args):
        # Lets also add preprocessed tensors into the queue types for each item of
        # the queue.
        tensors_to_enqueue = []
        # Each entry contains the key, and a boolean which indicates whether the
        # tensor was a sparse tensor.
        tensors_mapping = []
        # TODO(sibyl-Aix6ihai): Most of the functionality here is about pushing sparse
        # tensors into a queue. This could be taken care in somewhere else so others
        # can reuse it. Also, QueueBase maybe extended to handle sparse tensors
        # directly.
        for key in sorted(parsed_features.keys()):
            tensor = parsed_features[key]
            if isinstance(tensor, sparse_tensor.SparseTensor):
                tensors_mapping.append((key, True))
                tensors_to_enqueue.extend(
                    [tensor.indices, tensor.values, tensor.shape])
            else:
                tensors_mapping.append((key, False))
                tensors_to_enqueue.append(tensor)

        if keys is not None:
            tensors_to_enqueue.append(keys)

        queue_dtypes = [x.dtype for x in tensors_to_enqueue]
        input_queue = data_flow_ops.FIFOQueue(feature_queue_capacity,
                                              queue_dtypes)

        # Add a summary op to debug if our feature queue is full or not.
        summary.scalar(
            'queue/parsed_features/%s/fraction_of_%d_full' %
            (input_queue.name, feature_queue_capacity),
            math_ops.cast(input_queue.size(), dtypes.float32) *
            (1. / feature_queue_capacity))

        # Use a single QueueRunner with multiple threads to enqueue so the queue is
        # always full. The threads are coordinated so the last batch will not be
        # lost.
        enqueue_ops = [
            input_queue.enqueue(tensors_to_enqueue)
            for _ in range(num_enqueue_threads)
        ]
        queue_runner.add_queue_runner(
            queue_runner.QueueRunner(
                input_queue,
                enqueue_ops,
                queue_closed_exception_types=(errors.OutOfRangeError,
                                              errors.CancelledError)))

        dequeued_tensors = input_queue.dequeue()

        # Reset shapes on dequeued tensors.
        for i in range(len(tensors_to_enqueue)):
            dequeued_tensors[i].set_shape(tensors_to_enqueue[i].get_shape())

        # Recreate feature mapping according to the original dictionary.
        dequeued_parsed_features = {}
        index = 0
        for key, is_sparse_tensor in tensors_mapping:
            if is_sparse_tensor:
                # Three tensors are (indices, values, shape).
                dequeued_parsed_features[key] = sparse_tensor.SparseTensor(
                    dequeued_tensors[index], dequeued_tensors[index + 1],
                    dequeued_tensors[index + 2])
                index += 3
            else:
                dequeued_parsed_features[key] = dequeued_tensors[index]
                index += 1

        dequeued_keys = None
        if keys is not None:
            dequeued_keys = dequeued_tensors[-1]

        return dequeued_keys, dequeued_parsed_features