Пример #1
0
  def testFromValueRowIdsWithBadNRows(self):
    value_rowids = constant_op.constant([0, 0, 2, 2, 2, 3, 4], dtypes.int64)
    nrows = constant_op.constant(5, dtypes.int64)

    with self.assertRaisesRegex(ValueError, r'Expected nrows >= 0; got -2'):
      RowPartition.from_value_rowids(
          value_rowids=array_ops.placeholder_with_default(value_rowids, None),
          nrows=-2)

    with self.assertRaisesRegex(
        ValueError, r'Expected nrows >= value_rowids\[-1\] \+ 1; got nrows=2, '
        r'value_rowids\[-1\]=4'):
      RowPartition.from_value_rowids(value_rowids=value_rowids, nrows=2)

    with self.assertRaisesRegex(
        ValueError, r'Expected nrows >= value_rowids\[-1\] \+ 1; got nrows=4, '
        r'value_rowids\[-1\]=4'):
      RowPartition.from_value_rowids(value_rowids=value_rowids, nrows=4)

    with self.assertRaisesRegex(ValueError, r'Shape \(7, 1\) must have rank 1'):
      RowPartition.from_value_rowids(
          value_rowids=array_ops.expand_dims(value_rowids, 1), nrows=nrows)

    with self.assertRaisesRegex(ValueError, r'Shape \(1,\) must have rank 0'):
      RowPartition.from_value_rowids(
          value_rowids=value_rowids, nrows=array_ops.expand_dims(nrows, 0))
Пример #2
0
    def testRowPartitionConstructionErrors(self):
        row_splits = constant_op.constant([0, 2, 2, 5, 6, 7], dtypes.int64)

        with self.assertRaisesRegex(ValueError,
                                    'RowPartition constructor is private'):
            RowPartition(row_splits=row_splits)

        with self.assertRaisesRegex(
                TypeError, 'Row-partitioning argument must be a Tensor'):
            RowPartition(row_splits=[0, 2, 2, 5, 6, 7],
                         internal=row_partition._row_partition_factory_key)

        with self.assertRaisesRegex(ValueError,
                                    r'Shape \(6, 1\) must have rank 1'):
            RowPartition(row_splits=array_ops.expand_dims(row_splits, 1),
                         internal=row_partition._row_partition_factory_key)

        with self.assertRaisesRegex(TypeError,
                                    'Cached value must be a Tensor or None.'):
            RowPartition(row_splits=row_splits,
                         row_lengths=[2, 3, 4],
                         internal=row_partition._row_partition_factory_key)

        with self.assertRaisesRegex(ValueError, 'Inconsistent dtype'):
            RowPartition(row_splits=constant_op.constant([0, 3], dtypes.int64),
                         nrows=constant_op.constant(1, dtypes.int32),
                         internal=row_partition._row_partition_factory_key)
Пример #3
0
  def testMergePrecomputedEncodingsFastPaths(self):
    # Same object: x gets returned as-is.
    x = RowPartition.from_row_splits([0, 3, 8, 8])
    self.assertIs(x.merge_precomputed_encodings(x), x)

    # Same encoding tensor objects: x gets returned as-is.
    y = RowPartition.from_row_splits(x.row_splits(), validate=False)
    self.assertIs(x.merge_precomputed_encodings(y), x)
Пример #4
0
 def testMergePrecomputedEncodingsWithMatchingTensors(self):
   # The encoding tensors for `a` are a superset of the encoding tensors
   # for `b`, and where they overlap, they the same tensor objects.
   a = RowPartition.from_value_rowids([0, 0, 3, 4, 4, 4])
   b = RowPartition.from_row_splits(a.row_splits(), validate=False)
   self.assertIs(a.merge_precomputed_encodings(b), a)
   self.assertIs(b.merge_precomputed_encodings(a), a)
   self.assertIsNot(a, b)
Пример #5
0
  def testTwoDeepStructuredTensor(self):
    rt = tf.RaggedTensor.from_value_rowids(
        tf.constant([[1, 2], [3, 4], [5, 6]]), [0, 0, 1])

    struct = _make_structured_tensor([2], {"r": rt})
    struct_2 = struct.partition_outer_dimension(
        RowPartition.from_row_splits([0, 1, 2]))
    struct_3 = struct_2.partition_outer_dimension(
        RowPartition.from_row_splits([0, 1, 2]))
    p = structured_tensor_to_prensor.structured_tensor_to_prensor(struct_3)
    rt_value = p.get_descendant(path.create_path("data.data.r.data"))
    self.assertAllEqual(rt_value.node.parent_index, [0, 0, 1, 1, 2, 2])
    self.assertAllEqual(rt_value.node.values, [1, 2, 3, 4, 5, 6])
Пример #6
0
 def testFromUniformRowLengthBugConvertToTensor(self):
     # This originally failed to run because nrows was dtypes.int32. I think
     # we may need to consider the semantics of the type of a RowPartition
     # if preferred_dtype is unspecified. Also, looking at convert_to_tensor:
     # dtype specifies the type of the output.
     # preferred_dtype/dtype_hint is a suggestion, and dtype_hint is the new
     # name.
     nrows = constant_op.constant(3, dtype=dtypes.int32)
     nvals = constant_op.constant(12, dtype=dtypes.int64)
     row_length = constant_op.constant(4, dtype=dtypes.int64)
     RowPartition.from_uniform_row_length(row_length,
                                          nvals=nvals,
                                          nrows=nrows)
Пример #7
0
 def testFromUniformRowLengthWithPlaceholders2(self):
   nvals = array_ops.placeholder_with_default(6, None)
   ph_rowlen = array_ops.placeholder_with_default(3, None)
   rt2 = RowPartition.from_uniform_row_length(
       nvals=nvals, uniform_row_length=ph_rowlen)
   const_nvals2 = self.evaluate(rt2.nvals())
   self.assertEqual(const_nvals2, 6)
Пример #8
0
 def testFromRowSplitsWithDifferentSplitTypes(self):
   splits1 = [0, 2, 2, 5, 6, 7]
   splits2 = np.array([0, 2, 2, 5, 6, 7], np.int64)
   splits3 = np.array([0, 2, 2, 5, 6, 7], np.int32)
   splits4 = constant_op.constant([0, 2, 2, 5, 6, 7], dtypes.int64)
   splits5 = constant_op.constant([0, 2, 2, 5, 6, 7], dtypes.int32)
   rt1 = RowPartition.from_row_splits(splits1)
   rt2 = RowPartition.from_row_splits(splits2)
   rt3 = RowPartition.from_row_splits(splits3)
   rt4 = RowPartition.from_row_splits(splits4)
   rt5 = RowPartition.from_row_splits(splits5)
   self.assertEqual(rt1.row_splits().dtype, dtypes.int64)
   self.assertEqual(rt2.row_splits().dtype, dtypes.int64)
   self.assertEqual(rt3.row_splits().dtype, dtypes.int32)
   self.assertEqual(rt4.row_splits().dtype, dtypes.int64)
   self.assertEqual(rt5.row_splits().dtype, dtypes.int32)
Пример #9
0
def _get_specified_row_partition():
  """Needed for merge_with_spec tests. Normally, nvals isn't set."""
  return RowPartition(
      row_splits=constant_op.constant([0, 3, 8], dtype=dtypes.int64),
      nrows=constant_op.constant(2, dtype=dtypes.int64),
      nvals=constant_op.constant(8, dtype=dtypes.int64),
      internal=row_partition._row_partition_factory_key)
Пример #10
0
 def testFromUniformRowLengthWithPlaceholders1(self):
   nvals = array_ops.placeholder_with_default(
       constant_op.constant(6, dtype=dtypes.int64), None)
   rt1 = RowPartition.from_uniform_row_length(
       nvals=nvals, uniform_row_length=3)
   const_nvals1 = self.evaluate(rt1.nvals())
   self.assertEqual(const_nvals1, 6)
Пример #11
0
 def testFromValue(self):
   self.assertEqual(
       RowPartitionSpec.from_value(RowPartition.from_row_splits([0, 2, 8, 8])),
       RowPartitionSpec(nrows=3))
   self.assertEqual(
       RowPartitionSpec.from_value(
           RowPartition.from_row_lengths([5, 3, 0, 2])),
       RowPartitionSpec(nrows=4))
   self.assertEqual(
       RowPartitionSpec.from_value(
           RowPartition.from_value_rowids([0, 2, 2, 8])),
       RowPartitionSpec(nrows=9, nvals=4))
   self.assertEqual(
       RowPartitionSpec.from_value(
           RowPartition.from_uniform_row_length(
               nvals=12, uniform_row_length=3)),
       RowPartitionSpec(nvals=12, uniform_row_length=3))
Пример #12
0
 def testEmpty2DRagged(self):
   struct = structured_tensor.StructuredTensor.from_fields(
       fields={},
       shape=[2, None],
       row_partitions=[RowPartition.from_row_splits([0, 3, 5])])
   p = structured_tensor_to_prensor.structured_tensor_to_prensor(struct)
   child_node = p.get_child("data").node
   self.assertAllEqual(child_node.parent_index, [0, 0, 0, 1, 1])
Пример #13
0
 def testFromUniformRowPartitionNvalsStaticNoValidate(self):
     rp = RowPartition.from_uniform_row_length(3,
                                               nrows=4,
                                               nvals=12,
                                               validate=False)
     self.assertAllEqual(4, rp.static_nrows)
     self.assertAllEqual(3, rp.static_uniform_row_length)
     self.assertAllEqual(12, rp.static_nvals)
def _expand_st_row_partitions(st, axis):
  """Create the row_partitions for expand_dims."""
  if axis == 0:
    if st.shape.rank == 0:
      return ()
    nvals = st.nrows()
    new_partition = RowPartition.from_uniform_row_length(
        nvals, nvals, nrows=1, validate=False)
    return (new_partition,) + st.row_partitions
  elif axis == st.rank:
    nvals = (
        st.row_partitions[axis - 2].nvals() if (axis - 2 >= 0) else st.nrows())
    return st.row_partitions + (RowPartition.from_uniform_row_length(
        1, nvals, nrows=nvals, validate=False),)
  else:
    nvals = (
        st.row_partitions[axis - 1].nrows() if (axis - 1 >= 0) else st.nrows())
    return st.row_partitions[:axis - 1] + (RowPartition.from_uniform_row_length(
        1, nvals, nrows=nvals, validate=False),) + st.row_partitions[axis - 1:]
Пример #15
0
  def testFromRowSplits(self):
    row_splits = constant_op.constant([0, 2, 2, 5, 6, 7], dtypes.int64)

    rp = RowPartition.from_row_splits(row_splits, validate=False)
    self.assertEqual(rp.dtype, dtypes.int64)

    rp_row_splits = rp.row_splits()
    rp_nrows = rp.nrows()

    self.assertIs(rp_row_splits, row_splits)
    self.assertAllEqual(rp_nrows, 5)
Пример #16
0
def _child_node_to_structured_tensor(
        node: prensor.ChildNodeTensor, fields: Mapping[path.Step,
                                                       prensor.Prensor],
        nrows: tf.Tensor) -> structured_tensor.StructuredTensor:
    """Convert a map of prensors to map of structured tensors."""
    st = structured_tensor.StructuredTensor.from_fields(fields=fields,
                                                        shape=tf.TensorShape(
                                                            [None]))
    row_partition = RowPartition.from_value_rowids(
        value_rowids=node.parent_index, nrows=nrows)
    return st.partition_outer_dimension(row_partition)
 def testRowPartitionStr(self):
   row_splits = [0, 2, 5, 6, 6, 7]
   rp = RowPartition.from_row_splits(row_splits, validate=False)
   if context.executing_eagerly():
     expected_repr = 'tf.RowPartition(row_splits=[0 2 5 6 6 7])'
   else:
     expected_repr = ('tf.RowPartition(row_splits='
                      'Tensor("RowPartitionFromRowSplits/row_splits:0", '
                      'shape=(6,), dtype=int64))')
   self.assertEqual(repr(rp), expected_repr)
   self.assertEqual(str(rp), expected_repr)
Пример #18
0
  def testFromRowLengths(self):
    row_lengths = constant_op.constant([2, 0, 3, 1, 1], dtypes.int64)

    rp = RowPartition.from_row_lengths(row_lengths, validate=False)
    self.assertEqual(rp.dtype, dtypes.int64)

    rp_row_lengths = rp.row_lengths()
    rp_nrows = rp.nrows()

    self.assertIs(rp_row_lengths, row_lengths)  # nrows
    self.assertAllEqual(rp_nrows, 5)
    self.assertAllEqual(rp_row_lengths, row_lengths)
Пример #19
0
    def testRaggedTensorConstructionErrors(self):
        row_splits = constant_op.constant([0, 2, 2, 5, 6, 7], dtypes.int64)

        with self.assertRaisesRegexp(ValueError,
                                     'RaggedTensor constructor is private'):
            RowPartition(row_splits=row_splits)

        with self.assertRaisesRegexp(
                TypeError, 'Row-partitioning argument must be a Tensor'):
            RowPartition(row_splits=[0, 2, 2, 5, 6, 7], internal=True)

        with self.assertRaisesRegexp(ValueError,
                                     r'Shape \(6, 1\) must have rank 1'):
            RowPartition(row_splits=array_ops.expand_dims(row_splits, 1),
                         internal=True)

        with self.assertRaisesRegexp(TypeError,
                                     'Cached value must be a Tensor or None.'):
            RowPartition(row_splits=row_splits,
                         cached_row_lengths=[2, 3, 4],
                         internal=True)
Пример #20
0
 def testRowPartitionStrUniformRowLength(self):
     rp = RowPartition.from_uniform_row_length(5, nvals=10, nrows=2)
     if context.executing_eagerly():
         expected_repr = ('tf.RowPartition(nrows=2, uniform_row_length=5)')
     else:
         expected_repr = (
             'tf.RowPartition(nrows='
             'Tensor("RowPartitionFromUniformRowLength/'
             'nrows:0", shape=(), dtype=int64), '
             'uniform_row_length=Tensor("RowPartitionFromUniformRowLength/'
             'uniform_row_length:0", shape=(), dtype=int64))')
     self.assertEqual(repr(rp), expected_repr)
     self.assertEqual(str(rp), expected_repr)
Пример #21
0
  def testFromValueRowIdsWithExplicitNRows(self):
    value_rowids = constant_op.constant([0, 0, 2, 2, 2, 3, 4], dtypes.int64)
    nrows = constant_op.constant(7, dtypes.int64)

    rp = RowPartition.from_value_rowids(value_rowids, nrows, validate=False)

    rp_value_rowids = rp.value_rowids()
    rp_nrows = rp.nrows()
    rp_row_splits = rp.row_splits()

    self.assertIs(rp_value_rowids, value_rowids)  # value_rowids
    self.assertIs(rp_nrows, nrows)  # nrows
    self.assertAllEqual(rp_row_splits, [0, 2, 2, 5, 6, 7, 7, 7])
Пример #22
0
  def testFromValueRowIdsWithDerivedNRowsDynamic(self):
    # nrows is not known at graph creation time.
    value_rowids = constant_op.constant([0, 0, 2, 2, 2, 3, 4], dtypes.int64)
    value_rowids = array_ops.placeholder_with_default(value_rowids, shape=None)

    rp = RowPartition.from_value_rowids(value_rowids, validate=False)

    rp_value_rowids = rp.value_rowids()
    rp_nrows = rp.nrows()

    self.assertIs(rp_value_rowids, value_rowids)  # value_rowids
    self.assertAllEqual(rp_value_rowids, value_rowids)
    self.assertAllEqual(rp_nrows, 5)
Пример #23
0
 def testRowPartitionStr(self):
   row_splits = [0, 2, 5, 6, 6, 7]
   rt = RowPartition.from_row_splits(row_splits, validate=False)
   splits_type = 'int64'
   if context.executing_eagerly():
     expected_repr = ('tf.RowPartition(row_splits=tf.Tensor([0 2 5 6 6 7], '
                      'shape=(6,), dtype=int64))')
   else:
     expected_repr = ('tf.RowPartition(row_splits='
                      'Tensor("RowPartitionFromRowSplits/row_splits:0", '
                      'shape=(6,), dtype={}))').format(splits_type)
   self.assertEqual(repr(rt), expected_repr)
   self.assertEqual(str(rt), expected_repr)
Пример #24
0
    def testFromRowLimits(self):
        row_limits = constant_op.constant([2, 2, 5, 6, 7], dtypes.int64)

        rt = RowPartition.from_row_limits(row_limits, validate=False)
        self.assertEqual(rt.dtype, dtypes.int64)

        rt_row_limits = rt.row_limits()
        rt_row_splits = rt.row_splits
        rt_nrows = rt.nrows()

        self.assertAllEqual(rt_nrows, 5)
        self.assertAllEqual(rt_row_limits, row_limits)
        self.assertAllEqual(rt_row_splits, [0, 2, 2, 5, 6, 7])
def _expand_dims(st, axis):
    """tf.expand_dims, but works on StructuredTensor too.

  Note: the implementation does not work if axis > 1, and will throw a
  ValueError.

  Args:
    st: a Tensor, RaggedTensor, or StructuredTensor.
    axis: the axis to insert a dimension before.

  Returns:
    a tensor with one more dimension (see tf.expand_dims).
  Raises:
    ValueError:
      if the axis is not valid.
  """
    if not isinstance(st, structured_tensor.StructuredTensor):
        return tf.expand_dims(st, axis)
    nn_axis = _expand_dims_nonnegative_axis(axis, st.rank)
    if st.rank == 0:
        return _expand_dims_scalar(st)
    if nn_axis == 0:
        # Here, we can add a dimension 1 at the front.
        nrows = st.nrows()
        return st.partition_outer_dimension(
            RowPartition.from_uniform_row_length(nrows, nrows))
    elif nn_axis == 1:
        # Again, by partitioning the first dimension into vectors of length 1,
        # we can solve this problem.
        nrows = st.nrows()
        return st.partition_outer_dimension(
            RowPartition.from_uniform_row_length(
                tf.constant(1, dtype=nrows.dtype), nrows))
    else:
        # Note: this is unreachable in the current code.
        raise ValueError(
            "Unimplemented: non-negative axis > 1 for _expand_dims")
Пример #26
0
  def testFromValueRowIdsWithExplicitNRowsEqualToDefault(self):
    value_rowids = constant_op.constant([0, 0, 2, 2, 2, 3, 4], dtypes.int64)
    nrows = constant_op.constant(5, dtypes.int64)

    rt = RowPartition.from_value_rowids(value_rowids, nrows, validate=False)

    rt_value_rowids = rt.value_rowids()
    rt_nrows = rt.nrows()
    rt_row_splits = rt.row_splits()

    self.assertIs(rt_value_rowids, value_rowids)  # value_rowids
    self.assertIs(rt_nrows, nrows)  # nrows
    self.assertAllEqual(rt_value_rowids, value_rowids)
    self.assertAllEqual(rt_nrows, nrows)
    self.assertAllEqual(rt_row_splits, [0, 2, 2, 5, 6, 7])
Пример #27
0
  def testFromValueRowIdsWithDerivedNRows(self):
    # nrows is known at graph creation time.
    value_rowids = constant_op.constant([0, 0, 2, 2, 2, 3, 4], dtypes.int64)
    # TODO(martinz): add nrows
    rp = RowPartition.from_value_rowids(value_rowids, validate=False)
    self.assertEqual(rp.dtype, dtypes.int64)

    rp_row_splits = rp.row_splits()
    rp_value_rowids = rp.value_rowids()
    rp_nrows = rp.nrows()

    self.assertIs(rp_value_rowids, value_rowids)  # value_rowids
    self.assertAllEqual(rp_value_rowids, value_rowids)
    self.assertAllEqual(rp_nrows, 5)
    self.assertAllEqual(rp_row_splits, [0, 2, 2, 5, 6, 7])
Пример #28
0
def _merge_dims(value, outer_axis, inner_axis):
  """Merges `outer_axis...inner_axis` of `value` into a single dimension."""
  assert outer_axis < inner_axis
  if isinstance(value, (ops.Tensor, ragged_tensor.RaggedTensor)):
    return ragged_tensor.merge_dims(value, outer_axis, inner_axis)
  else:
    assert isinstance(value, StructuredTensor)

    # Build the new fields.
    fields = dict((k, _merge_dims(v, outer_axis, inner_axis))
                  for (k, v) in value._fields.items())

    # Build the new shape.
    value_shape = value.shape
    shape = (
        value_shape[:outer_axis] +
        [value_shape[outer_axis:inner_axis].num_elements()] +
        value_shape[inner_axis + 1:])

    # Build the new row_partitions & nrows
    if outer_axis == 0:
      if inner_axis == value.shape.rank - 1:
        partitions = ()
        nrows = value.row_partitions[-1].nvals()
      else:
        partitions = value.row_partitions[inner_axis:]
        nrows = partitions[0].nrows()
    else:
      # Use tf.gather to merge row_splits from the merged row partitions.
      merged_splits = value.row_partitions[outer_axis - 1].row_splits()
      for dim in range(outer_axis, inner_axis):
        merged_splits = array_ops.gather(value.row_partitions[dim].row_splits(),
                                         merged_splits)

      partitions = (
          value.row_partitions[:outer_axis - 1] +
          (RowPartition.from_row_splits(merged_splits),) +
          value.row_partitions[inner_axis:])
      nrows = partitions[0].nrows()

    return StructuredTensor(
        fields,
        shape,
        nrows,
        partitions,
        internal=_structured_tensor_factory_key)
Пример #29
0
  def testWithPrecomputedSplits(self):
    rp = RowPartition.from_row_splits([0, 2, 8])

    rp_with_row_splits = rp.with_precomputed_row_splits()
    self.assertTrue(rp_with_row_splits.has_precomputed_row_splits())

    self.assertFalse(rp.has_precomputed_row_lengths())
    rp_with_row_lengths = rp.with_precomputed_row_lengths()
    self.assertTrue(rp_with_row_lengths.has_precomputed_row_lengths())

    self.assertFalse(rp.has_precomputed_value_rowids())
    rp_with_value_rowids = rp.with_precomputed_value_rowids()
    self.assertTrue(rp_with_value_rowids.has_precomputed_value_rowids())

    self.assertFalse(rp.has_precomputed_nrows())
    rp_with_nrows = rp.with_precomputed_nrows()
    self.assertTrue(rp_with_nrows.has_precomputed_nrows())
Пример #30
0
def _row_partitions_for_uniform_shape(shape, rank):
    """Returns row partitions for the given shape Tensor.

  Args:
    shape: A vector describing a uniform shape.
    rank: The number of dimensions to generate row partitions for

  Returns:
    A list of (rank-1) `RowPartition`s with uniform row length.
  """
    shape_cumprod = math_ops.cumprod(shape[:rank])
    # pylint: disable=g-complex-comprehension
    return tuple([
        RowPartition.from_uniform_row_length(uniform_row_length=shape[i + 1],
                                             nvals=shape_cumprod[i + 1],
                                             nrows=shape_cumprod[i])
        for i in range(rank - 1)
    ])