def segment_ids_to_row_splits(segment_ids, num_segments=None, out_type=None, name=None): """Generates the RaggedTensor `row_splits` corresponding to a segmentation. Returns an integer vector `splits`, where `splits[0] = 0` and `splits[i] = splits[i-1] + count(segment_ids==i)`. Example: >>> print(tf.ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4])) tf.Tensor([0 3 3 5 6 9], shape=(6,), dtype=int64) Args: segment_ids: A 1-D integer Tensor. num_segments: A scalar integer indicating the number of segments. Defaults to `max(segment_ids) + 1` (or zero if `segment_ids` is empty). out_type: The dtype for the return value. Defaults to `segment_ids.dtype`, or `tf.int64` if `segment_ids` does not have a dtype. name: A name prefix for the returned tensor (optional). Returns: A sorted 1-D integer Tensor, with `shape=[num_segments + 1]`. """ if out_type is None: if isinstance(segment_ids, ops.Tensor): out_type = segment_ids.dtype elif isinstance(num_segments, ops.Tensor): out_type = num_segments.dtype else: out_type = dtypes.int64 else: out_type = dtypes.as_dtype(out_type) with ops.name_scope(name, "SegmentIdsToRaggedSplits", [segment_ids]) as name: # Note: we cast int64 tensors to int32, since bincount currently only # supports int32 inputs. segment_ids = ragged_util.convert_to_int_tensor(segment_ids, "segment_ids", dtype=dtypes.int32) segment_ids.shape.assert_has_rank(1) if num_segments is not None: num_segments = ragged_util.convert_to_int_tensor( num_segments, "num_segments", dtype=dtypes.int32) num_segments.shape.assert_has_rank(0) row_lengths = math_ops.bincount(segment_ids, minlength=num_segments, maxlength=num_segments, dtype=out_type) splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0) # Update shape information, if possible. if num_segments is not None: const_num_segments = tensor_util.constant_value(num_segments) if const_num_segments is not None: splits.set_shape( tensor_shape.TensorShape([const_num_segments + 1])) return splits
def segment_ids_to_row_splits(segment_ids, num_segments=None, out_type=None, name=None): """Generates the RaggedTensor `row_splits` corresponding to a segmentation. Returns an integer vector `splits`, where `splits[0] = 0` and `splits[i] = splits[i-1] + count(segment_ids==i)`. Example: ```python >>> ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4]).eval() [ 0 3 3 5 6 9 ] ``` Args: segment_ids: A 1-D integer Tensor. num_segments: A scalar integer indicating the number of segments. Defaults to `max(segment_ids) + 1` (or zero if `segment_ids` is empty). out_type: The dtype for the return value. Defaults to `segment_ids.dtype`, or `tf.int64` if `segment_ids` does not have a dtype. name: A name prefix for the returned tensor (optional). Returns: A sorted 1-D integer Tensor, with `shape=[num_segments + 1]`. """ if out_type is None: if isinstance(segment_ids, ops.Tensor): out_type = segment_ids.dtype elif isinstance(num_segments, ops.Tensor): out_type = num_segments.dtype else: out_type = dtypes.int64 else: out_type = dtypes.as_dtype(out_type) with ops.name_scope(name, "SegmentIdsToRaggedSplits", [segment_ids]) as name: # Note: we cast int64 tensors to int32, since bincount currently only # supports int32 inputs. segment_ids = ragged_util.convert_to_int_tensor(segment_ids, "segment_ids", dtype=dtypes.int32) segment_ids.shape.assert_has_rank(1) if num_segments is not None: num_segments = ragged_util.convert_to_int_tensor(num_segments, "num_segments", dtype=dtypes.int32) num_segments.shape.assert_has_rank(0) row_lengths = math_ops.bincount( segment_ids, minlength=num_segments, maxlength=num_segments, dtype=out_type) splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0) # Update shape information, if possible. if num_segments is not None: const_num_segments = tensor_util.constant_value(num_segments) if const_num_segments is not None: splits.set_shape(tensor_shape.TensorShape([const_num_segments + 1])) return splits
def from_dim_sizes(dim_sizes): """Constructs a ragged shape from a list of dimension sizes. This list contains a single tensor for each dimension, where the tensor is a scalar if the dimension is uniform, or a vector if the dimension is ragged. Args: dim_sizes: List of int64 scalars or vectors. Returns: A RaggedTensorDynamicShape. """ with ops.name_scope(None, 'RaggedTensorDynamicShapeFromDimensionSizes', [dim_sizes]): dim_sizes = tuple( ragged_util.convert_to_int_tensor( size, dtype=dtypes.int64, name='dim_sizes') for size in dim_sizes) # Split the dimensions into partitioned & inner dimensions. inner_split = 0 for dim, dim_size in enumerate(dim_sizes): if dim_size.shape.ndims == 1: inner_split = dim + 1 elif dim_size.shape.ndims != 0: raise ValueError( 'Each dim_size must be a scalar or a vector') return RaggedTensorDynamicShape(dim_sizes[:inner_split], dim_sizes[inner_split:])
def from_dim_sizes(dim_sizes): """Constructs a ragged shape from a list of dimension sizes. This list contains a single tensor for each dimension, where the tensor is a scalar if the dimension is uniform, or a vector if the dimension is ragged. Args: dim_sizes: List of int64 scalars or vectors. Returns: A RaggedTensorDynamicShape. """ with ops.name_scope(None, 'RaggedTensorDynamicShapeFromDimensionSizes', [dim_sizes]): dim_sizes = tuple( ragged_util.convert_to_int_tensor( size, dtype=dtypes.int64, name='dim_sizes') for size in dim_sizes) # Split the dimensions into partitioned & inner dimensions. inner_split = 0 for dim, dim_size in enumerate(dim_sizes): if dim_size.shape.ndims == 1: inner_split = dim + 1 elif dim_size.shape.ndims != 0: raise ValueError('Each dim_size must be a scalar or a vector') return RaggedTensorDynamicShape(dim_sizes[:inner_split], dim_sizes[inner_split:])
def __init__(self, partitioned_dim_sizes, inner_dim_sizes): """Creates a RaggedTensorDynamicShape. Args: partitioned_dim_sizes: A `list` of 0-D or 1-D integer `Tensor`, one for each partitioned dimension. If dimension `d` is uniform, then `partitioned_dim_sizes[d]` must be an integer scalar, specifying the size of all slices across dimension `d`. If dimension `d` is ragged, then `partitioned_dim_sizes[d]` must be an integer vector, specifying the size of each slice across dimension `d`. inner_dim_sizes: A 1-D integer `Tensor`, whose length is equal to the number of inner dimensions. `inner_dim_sizes[n]` is the size of all slices across the `n`th inner dimension (which is the `(len(partitioned_dim_sizes)+n)`th dimension in the overall tensor. """ assert isinstance(partitioned_dim_sizes, (list, tuple)) with ops.name_scope(None, 'RaggedTensorDynamicShape', (partitioned_dim_sizes, inner_dim_sizes)): partitioned_dim_sizes = tuple( ragged_util.convert_to_int_tensor( size, dtype=dtypes.int64, name='partitioned_dimension_size') for size in partitioned_dim_sizes) inner_dim_sizes = ragged_util.convert_to_int_tensor( inner_dim_sizes, dtype=dtypes.int64, name='inner_dim_sizes') # Validate shapes. if partitioned_dim_sizes: for axis, dimension_size in enumerate(partitioned_dim_sizes): if dimension_size.shape.ndims is None: raise ValueError( 'rank of partitioned_dim_sizes[%d] is unknown' % axis) dimension_size.shape.with_rank_at_most(1) if partitioned_dim_sizes[0].shape.ndims == 1: raise ValueError( 'outermost partitioned dimension must be uniform') if partitioned_dim_sizes[-1].shape.ndims == 0: raise ValueError( 'innermost partitioned dimension must be ragged') inner_dim_sizes.shape.assert_has_rank(1) self._partitioned_dim_sizes = partitioned_dim_sizes self._inner_dim_sizes = inner_dim_sizes
def segment_ids_to_row_splits(segment_ids, num_segments=None, name=None): """Generates the RaggedTensor `splits` vector corresponding to a segmentation. Returns an integer vector `splits`, where `splits[0] = 0` and `splits[i] = splits[i-1] + count(segment_ids==i)`. Example: ```python >>> ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4]).eval() [ 0 3 3 5 6 9 ] ``` Args: segment_ids: A 1-D integer Tensor. num_segments: A scalar integer indicating the number of segments. Defaults to `max(segment_ids) + 1` (or zero if `segment_ids` is empty). name: A name prefix for the returned tensor (optional). Returns: A sorted 1-D int64 Tensor, with `shape=[num_segments + 1]`. """ with ops.name_scope(name, "SegmentIdsToRaggedSplits", [segment_ids]) as name: segment_ids = ragged_util.convert_to_int_tensor( segment_ids, "segment_ids") segment_ids.shape.assert_has_rank(1) if num_segments is not None: num_segments = ragged_util.convert_to_int_tensor( num_segments, "num_segments") num_segments.shape.assert_has_rank(0) row_lengths = math_ops.bincount(segment_ids, minlength=num_segments, maxlength=num_segments, dtype=dtypes.int64) splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0) # Update shape information, if possible. if num_segments is not None: const_num_segments = tensor_util.constant_value(num_segments) if const_num_segments is not None: splits.set_shape( tensor_shape.TensorShape([const_num_segments + 1])) return splits
def segment_ids_to_row_splits(segment_ids, num_segments=None, name=None): """Generates the RaggedTensor `row_splits` corresponding to a segmentation. Returns an integer vector `splits`, where `splits[0] = 0` and `splits[i] = splits[i-1] + count(segment_ids==i)`. Example: ```python >>> ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4]).eval() [ 0 3 3 5 6 9 ] ``` Args: segment_ids: A 1-D integer Tensor. num_segments: A scalar integer indicating the number of segments. Defaults to `max(segment_ids) + 1` (or zero if `segment_ids` is empty). name: A name prefix for the returned tensor (optional). Returns: A sorted 1-D int64 Tensor, with `shape=[num_segments + 1]`. """ with ops.name_scope(name, "SegmentIdsToRaggedSplits", [segment_ids]) as name: segment_ids = ragged_util.convert_to_int_tensor(segment_ids, "segment_ids") segment_ids.shape.assert_has_rank(1) if num_segments is not None: num_segments = ragged_util.convert_to_int_tensor(num_segments, "num_segments") num_segments.shape.assert_has_rank(0) row_lengths = math_ops.bincount( segment_ids, minlength=num_segments, maxlength=num_segments, dtype=dtypes.int64) splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0) # Update shape information, if possible. if num_segments is not None: const_num_segments = tensor_util.constant_value(num_segments) if const_num_segments is not None: splits.set_shape(tensor_shape.TensorShape([const_num_segments + 1])) return splits
def __init__(self, partitioned_dim_sizes, inner_dim_sizes): """Creates a RaggedTensorDynamicShape. Args: partitioned_dim_sizes: A `list` of 0-D or 1-D integer `Tensor`, one for each partitioned dimension. If dimension `d` is uniform, then `partitioned_dim_sizes[d]` must be an integer scalar, specifying the size of all slices across dimension `d`. If dimension `d` is ragged, then `partitioned_dim_sizes[d]` must be an integer vector, specifying the size of each slice across dimension `d`. inner_dim_sizes: A 1-D integer `Tensor`, whose length is equal to the number of inner dimensions. `inner_dim_sizes[n]` is the size of all slices across the `n`th inner dimension (which is the `(len(partitioned_dim_sizes)+n)`th dimension in the overall tensor. """ assert isinstance(partitioned_dim_sizes, (list, tuple)) with ops.name_scope(None, 'RaggedTensorDynamicShape', (partitioned_dim_sizes, inner_dim_sizes)): partitioned_dim_sizes = tuple( ragged_util.convert_to_int_tensor( size, dtype=dtypes.int64, name='partitioned_dimension_size') for size in partitioned_dim_sizes) inner_dim_sizes = ragged_util.convert_to_int_tensor( inner_dim_sizes, dtype=dtypes.int64, name='inner_dim_sizes') # Validate shapes. if partitioned_dim_sizes: for axis, dimension_size in enumerate(partitioned_dim_sizes): if dimension_size.shape.ndims is None: raise ValueError( 'rank of partitioned_dim_sizes[%d] is unknown' % axis) dimension_size.shape.with_rank_at_most(1) if partitioned_dim_sizes[0].shape.ndims == 1: raise ValueError('outermost partitioned dimension must be uniform') if partitioned_dim_sizes[-1].shape.ndims == 0: raise ValueError('innermost partitioned dimension must be ragged') inner_dim_sizes.shape.assert_has_rank(1) self._partitioned_dim_sizes = partitioned_dim_sizes self._inner_dim_sizes = inner_dim_sizes
def tile(input, multiples, name=None): # pylint: disable=redefined-builtin """Constructs a `RaggedTensor` by tiling a given `RaggedTensor`. The values of `input` are replicated `multiples[i]` times along the `i`th dimension (for each dimension `i`). For every dimension `axis` in `input`, the length of each output element in that dimension is the length of corresponding input element multiplied by `multiples[axis]`. Args: input: A `RaggedTensor`. multiples: A 1-D integer `Tensor`. Length must be the same as the number of dimensions in `input`. name: A name for the operation (optional). Returns: A `RaggedTensor` with the same type, rank, and ragged_rank as `input`. #### Example: ```python >>> rt = tf.ragged.constant([[1, 2], [3]]) >>> ragged.tile(rt, [3, 2]) [[1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3]] ``` """ with ops.name_scope(name, 'RaggedTile', [input, multiples]): input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input, name='input') if not ragged_tensor.is_ragged(input): return array_ops.tile(input, multiples, name) multiples = ragged_util.convert_to_int_tensor( multiples, name='multiples', dtype=input.row_splits.dtype) multiples.shape.assert_has_rank(1) # If the constant value of `multiples` is available, then we can use it # to skip tiling dimensions where `multiples=1`. const_multiples = tensor_util.constant_value(multiples) return ragged_tensor.RaggedTensor.from_nested_row_splits( _tile_ragged_values(input, multiples, const_multiples), _tile_ragged_splits(input, multiples, const_multiples), validate=False)
def tile(input, multiples, name=None): # pylint: disable=redefined-builtin """Constructs a `RaggedTensor` by tiling a given `RaggedTensor`. The values of `input` are replicated `multiples[i]` times along the `i`th dimension (for each dimension `i`). For every dimension `axis` in `input`, the length of each output element in that dimension is the length of corresponding input element multiplied by `multiples[axis]`. Args: input: A `RaggedTensor`. multiples: A 1-D integer `Tensor`. Length must be the same as the number of dimensions in `input`. name: A name for the operation (optional). Returns: A `RaggedTensor` with the same type, rank, and ragged_rank as `input`. #### Example: ```python >>> rt = tf.ragged.constant([[1, 2], [3]]) >>> ragged.tile(rt, [3, 2]) [[1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3]] ``` """ with ops.name_scope(name, 'RaggedTile', [input, multiples]): input = ragged_tensor.convert_to_tensor_or_ragged_tensor( input, name='input') if not ragged_tensor.is_ragged(input): return array_ops.tile(input, multiples, name) multiples = ragged_util.convert_to_int_tensor( multiples, name='multiples', dtype=input.row_splits.dtype) multiples.shape.assert_has_rank(1) # If the constant value of `multiples` is available, then we can use it # to skip tiling dimensions where `multiples=1`. const_multiples = tensor_util.constant_value(multiples) return ragged_tensor.RaggedTensor.from_nested_row_splits( _tile_ragged_values(input, multiples, const_multiples), _tile_ragged_splits(input, multiples, const_multiples), validate=False)
def broadcast_dimension(self, axis, lengths): """Returns a shape that is broadcast-compatible with self & lengths. * If dimension[axis] is uniform and lengths is a scalar, the check that either lengths==1 or axis==1 or lengths==axis, and tile dimension[axis] with tf.where(lengths==axis, 1, axis) repeats. * If dimension[axis] is uniform and lengths is a vector, then check that dimension[axis]==1, and raggedly tile dimension[axis] with lengths repeats. (we can skip tiling if we statically know that slice_lengths == 1??) * If dimension[axis] is ragged and lengths is a scalar, then check that lengths==1. * If dimension[axis] is ragged and lengths is a vector, then check that self.dimension_size(axis) == lengths. Args: axis: `int`. The dimension to broadcast. lengths: 0-D or 1-D integer `Tensor`. Returns: A `RaggedTensorDynamicShape`. """ lengths = ragged_util.convert_to_int_tensor( lengths, name='lengths', dtype=self.dim_size_dtype) # Check whether lengths is a scalar (for uniform dimensions) or # vector (for ragged dimensions). if lengths.shape.ndims is None: raise ValueError('lengths must have a known rank.') elif lengths.shape.ndims > 1: raise ValueError('lengths must be a scalar or vector') else: lengths_is_scalar = (lengths.shape.ndims == 0) # Verify that the shapes are compatible. if self.is_ragged(axis): if lengths_is_scalar: condition = math_ops.equal(lengths, 1) else: condition = math_ops.reduce_all( math_ops.equal(lengths, self.dimension_size(axis))) else: axis_dim_size = self.dimension_size(axis) if lengths_is_scalar: condition = ( math_ops.equal(lengths, 1) | math_ops.equal(axis_dim_size, 1) | math_ops.equal(axis_dim_size, lengths)) else: condition = math_ops.equal(axis_dim_size, 1) broadcast_err = [ 'Unable to broadcast: dimension size mismatch in dimension', axis, 'lengths=', lengths, 'dim_size=', self.dimension_size(axis) ] broadcast_check = control_flow_ops.Assert( condition, data=broadcast_err, summarize=10) with ops.control_dependencies([broadcast_check]): # Partitioned dimensions: if axis < self.num_partitioned_dimensions: if self.is_ragged(axis): # Use an identity op to make sure the check actually gets run. return RaggedTensorDynamicShape( self._partitioned_dim_sizes, array_ops.identity(self.inner_dim_sizes)) else: return self._broadcast_uniform_partitioned_dimension(axis, lengths) # Inner dimensions: else: if lengths_is_scalar: return self._broadcast_inner_dimension_to_uniform(axis, lengths) else: if axis == 0: raise ValueError('Unable to broadcast: ' 'outermost dimension must be uniform.') return self._broadcast_inner_dimension_to_ragged(axis, lengths)
def broadcast_dimension(self, axis, lengths): """Returns a shape that is broadcast-compatible with self & lengths. * If dimension[axis] is uniform and lengths is a scalar, the check that either lengths==1 or axis==1 or lengths==axis, and tile dimension[axis] with tf.where(lengths==axis, 1, axis) repeats. * If dimension[axis] is uniform and lengths is a vector, then check that dimension[axis]==1, and raggedly tile dimension[axis] with lengths repeats. (we can skip tiling if we statically know that slice_lengths == 1??) * If dimension[axis] is ragged and lengths is a scalar, then check that lengths==1. * If dimension[axis] is ragged and lengths is a vector, then check that self.dimension_size(axis) == lengths. Args: axis: `int`. The dimension to broadcast. lengths: 0-D or 1-D integer `Tensor`. Returns: A `RaggedTensorDynamicShape`. """ lengths = ragged_util.convert_to_int_tensor(lengths, name='lengths', dtype=self.dim_size_dtype) # Check whether lengths is a scalar (for uniform dimensions) or # vector (for ragged dimensions). if lengths.shape.ndims is None: raise ValueError('lengths must have a known rank.') elif lengths.shape.ndims > 1: raise ValueError('lengths must be a scalar or vector') else: lengths_is_scalar = (lengths.shape.ndims == 0) # Verify that the shapes are compatible. if self.is_ragged(axis): if lengths_is_scalar: condition = math_ops.equal(lengths, 1) else: condition = math_ops.reduce_all( math_ops.equal(lengths, self.dimension_size(axis))) else: axis_dim_size = self.dimension_size(axis) if lengths_is_scalar: condition = (math_ops.equal(lengths, 1) | math_ops.equal(axis_dim_size, 1) | math_ops.equal(axis_dim_size, lengths)) else: condition = math_ops.equal(axis_dim_size, 1) broadcast_err = [ 'Unable to broadcast: dimension size mismatch in dimension', axis, 'lengths=', lengths, 'dim_size=', self.dimension_size(axis) ] broadcast_check = control_flow_ops.Assert(condition, data=broadcast_err, summarize=10) with ops.control_dependencies([broadcast_check]): # Partitioned dimensions: if axis < self.num_partitioned_dimensions: if self.is_ragged(axis): # Use an identity op to make sure the check actually gets run. return RaggedTensorDynamicShape( self._partitioned_dim_sizes, array_ops.identity(self.inner_dim_sizes)) else: return self._broadcast_uniform_partitioned_dimension( axis, lengths) # Inner dimensions: else: if lengths_is_scalar: return self._broadcast_inner_dimension_to_uniform( axis, lengths) else: if axis == 0: raise ValueError( 'Unable to broadcast: ' 'outermost dimension must be uniform.') return self._broadcast_inner_dimension_to_ragged( axis, lengths)
def from_tensor(tensor, lengths=None, padding=None, ragged_rank=1, name=None): """Converts a `Tensor` into a `RaggedTensor`. The set of absent/default values may be specified using a vector of lengths or a padding value (but not both). If `lengths` is specified, then the output tensor will satisfy `output[row] = tensor[row][:lengths[row]]`. If `padding` is specified, then any row *suffix* consisting entirely of `padding` will be excluded from the returned `RaggedTensor`. If neither `lengths` nor `padding` is specified, then the returned `RaggedTensor` will have no absent/default values. Examples: ```python >>> dt = tf.constant([[5, 7, 0], [0, 3, 0], [6, 0, 0]]) >>> ragged.from_tensor(dt).eval().tolist() [[5, 7, 0], [0, 3, 0], [6, 0, 0]] >>> ragged.from_tensor(dt, lengths=[2, 0, 3]).eval().tolist() [[5, 7], [], [6, 0, 0]] >>> ragged.from_tensor(dt, padding=0).eval().tolist() [[5, 7], [0, 3], [6]] ``` Args: tensor: The `Tensor` to convert. Must have rank `ragged_rank + 1` or higher. lengths: An optional set of row lengths, specified using a 1-D integer `Tensor` whose length is equal to `tensor.shape[0]` (the number of rows in `tensor`). If specified, then `output[row]` will contain `tensor[row][:lengths[row]]`. Negative lengths are treated as zero. padding: An optional padding value. If specified, then any row suffix consisting entirely of `padding` will be excluded from the returned RaggedTensor. `padding` is a `Tensor` with the same dtype as `tensor` and with `shape=tensor.shape[ragged_rank + 1:]`. ragged_rank: Integer specifying the ragged rank for the returned `RaggedTensor`. Must be greater than zero. name: A name prefix for the returned tensors (optional). Returns: A `RaggedTensor` with the specified `ragged_rank`. The shape of the returned ragged tensor is compatible with the shape of `tensor`. Raises: ValueError: If both `lengths` and `padding` are specified. """ if lengths is not None and padding is not None: raise ValueError('Specify lengths or padding, but not both') if not isinstance(ragged_rank, int): raise TypeError('ragged_rank expected int, got %r' % ragged_rank) if ragged_rank <= 0: raise ValueError('ragged_rank must be greater than 0; got %s' % ragged_rank) with ops.name_scope(name, 'RaggedFromTensor', [tensor, lengths, padding]): tensor = ops.convert_to_tensor(tensor, name='tensor') tensor.shape.with_rank_at_least(ragged_rank + 1) input_shape = array_ops.shape(tensor, out_type=dtypes.int64) ncols = input_shape[1] # Handle ragged_rank>1 via recursion: # If the output should have multiple ragged dimensions, then first # flatten the tensor to eliminate all but the last ragged dimension, # and recursively convert that flattened tensor. Then add on the splits # for the dimensions that we flattened out. if ragged_rank > 1: # Flatten `tensor` to eliminate all but the last ragged dimension. new_shape = array_ops.concat([ constant_op.constant([-1], dtypes.int64), input_shape[ragged_rank:] ], axis=0) flattened = array_ops.reshape(tensor, new_shape) # Recursively convert the flattened tensor. values = from_tensor(flattened, lengths, padding) # The total number of elements in each dimension. E.g., if # input_shape=[3, 4, 5, 6], then dim[2] has 3*4*5 elements in total. dim_size = math_ops.cumprod(input_shape) # Construct splits tensors for the dimensions that were flattened. new_splits = [ math_ops.range(0, dim_size[dim - 1] + 1) * input_shape[dim] for dim in range(1, ragged_rank) ] return ragged_factory_ops.from_nested_row_splits( values, new_splits) # If padding was specified, then use it to find row lengths. if padding is not None: padding = ops.convert_to_tensor(padding, name='padding', dtype=tensor.dtype) padding.shape.assert_is_compatible_with(tensor.shape[2:]) # Find places where the padding is equal to the tensor. (This will # broadcast `padding` across the outermost 2 dimensions of `tensor`, # so `has_default_value.shape = tensor.shape`.) has_default_value = math_ops.equal(padding, tensor) # If the padding isn't a scalar, then require that all values in the # padding match each item in the tensor. After this block of code, # `has_default.shape = tensor.shape[:2]`. (Unfortunately, we can't just # use reduce_all for both cases, becaue when you pass an empty `axis` # list to reduce_all, it reduces all axes; but we want it to reduce no # axes -- i.e., to be a no-op.) tensor_rank = array_ops.rank(tensor) reduce_axis = math_ops.range(2, tensor_rank) has_default = control_flow_ops.cond( tensor_rank > 2, lambda: math_ops.reduce_all(has_default_value, axis=reduce_axis), lambda: has_default_value) has_default.set_shape(tensor_shape.TensorShape([None, None])) has_default.set_shape(tensor.shape[:2]) # Use has_default it to find the length of each row: for each non-default # item in a row, calculate the length that the row needs to have to # include that item; and then take the max of those values (across each # row). has_nondefault = math_ops.logical_not(has_default) has_nondefault = math_ops.cast(has_nondefault, dtypes.int64) length_for_nondefault_value = ( has_nondefault * array_ops.expand_dims(math_ops.range(1, ncols + 1), 0)) lengths = math_ops.reduce_max(length_for_nondefault_value, axis=1) # If we have lengths (either directly supplied, or computed from paddings), # then use those to construct splits; and then use masking to get the # corresponding values. if lengths is not None: lengths = ragged_util.convert_to_int_tensor( lengths, 'lengths', dtypes.int64) lengths.shape.assert_has_rank(1) lengths = math_ops.minimum(lengths, ncols) lengths = math_ops.maximum(lengths, 0) limits = math_ops.cumsum(lengths) splits = array_ops.concat( [array_ops.zeros([1], dtypes.int64), limits], axis=0) mask = array_ops.sequence_mask(lengths, maxlen=ncols) values = array_ops.boolean_mask(tensor, mask) return ragged_factory_ops.from_row_splits(values, splits) # If neither padding nor lengths were specified, then create a splits # vector that contains no default values, and reshape the input tensor # to form the values for the RaggedTensor. nrows = input_shape[0] nvals = nrows * ncols splits = math_ops.range(nrows + 1) * ncols values_shape = array_ops.concat([[nvals], input_shape[2:]], axis=0) values = array_ops.reshape(tensor, values_shape) return ragged_factory_ops.from_row_splits(values, splits)
def from_tensor(tensor, lengths=None, padding=None, ragged_rank=1, name=None): """Converts a `Tensor` into a `RaggedTensor`. The set of absent/default values may be specified using a vector of lengths or a padding value (but not both). If `lengths` is specified, then the output tensor will satisfy `output[row] = tensor[row][:lengths[row]]`. If `padding` is specified, then any row *suffix* consisting entirely of `padding` will be excluded from the returned `RaggedTensor`. If neither `lengths` nor `padding` is specified, then the returned `RaggedTensor` will have no absent/default values. Examples: ```python >>> dt = tf.constant([[5, 7, 0], [0, 3, 0], [6, 0, 0]]) >>> ragged.from_tensor(dt).eval().tolist() [[5, 7, 0], [0, 3, 0], [6, 0, 0]] >>> ragged.from_tensor(dt, lengths=[2, 0, 3]).eval().tolist() [[5, 7], [], [6, 0, 0]] >>> ragged.from_tensor(dt, padding=0).eval().tolist() [[5, 7], [0, 3], [6]] ``` Args: tensor: The `Tensor` to convert. Must have rank `ragged_rank + 1` or higher. lengths: An optional set of row lengths, specified using a 1-D integer `Tensor` whose length is equal to `tensor.shape[0]` (the number of rows in `tensor`). If specified, then `output[row]` will contain `tensor[row][:lengths[row]]`. Negative lengths are treated as zero. padding: An optional padding value. If specified, then any row suffix consisting entirely of `padding` will be excluded from the returned RaggedTensor. `padding` is a `Tensor` with the same dtype as `tensor` and with `shape=tensor.shape[ragged_rank + 1:]`. ragged_rank: Integer specifying the ragged rank for the returned `RaggedTensor`. Must be greater than zero. name: A name prefix for the returned tensors (optional). Returns: A `RaggedTensor` with the specified `ragged_rank`. The shape of the returned ragged tensor is compatible with the shape of `tensor`. Raises: ValueError: If both `lengths` and `padding` are specified. """ if lengths is not None and padding is not None: raise ValueError('Specify lengths or padding, but not both') if not isinstance(ragged_rank, int): raise TypeError('ragged_rank expected int, got %r' % ragged_rank) if ragged_rank <= 0: raise ValueError('ragged_rank must be greater than 0; got %s' % ragged_rank) with ops.name_scope(name, 'RaggedFromTensor', [tensor, lengths, padding]): tensor = ops.convert_to_tensor(tensor, name='tensor') tensor.shape.with_rank_at_least(ragged_rank + 1) input_shape = array_ops.shape(tensor, out_type=dtypes.int64) ncols = input_shape[1] # Handle ragged_rank>1 via recursion: # If the output should have multiple ragged dimensions, then first # flatten the tensor to eliminate all but the last ragged dimension, # and recursively convert that flattened tensor. Then add on the splits # for the dimensions that we flattened out. if ragged_rank > 1: # Flatten `tensor` to eliminate all but the last ragged dimension. new_shape = array_ops.concat( [constant_op.constant([-1], dtypes.int64), input_shape[ragged_rank:]], axis=0) flattened = array_ops.reshape(tensor, new_shape) # Recursively convert the flattened tensor. values = from_tensor(flattened, lengths, padding) # The total number of elements in each dimension. E.g., if # input_shape=[3, 4, 5, 6], then dim[2] has 3*4*5 elements in total. dim_size = math_ops.cumprod(input_shape) # Construct splits tensors for the dimensions that were flattened. new_splits = [ math_ops.range(0, dim_size[dim - 1] + 1) * input_shape[dim] for dim in range(1, ragged_rank) ] return ragged_factory_ops.from_nested_row_splits(values, new_splits) # If padding was specified, then use it to find row lengths. if padding is not None: padding = ops.convert_to_tensor( padding, name='padding', dtype=tensor.dtype) padding.shape.assert_is_compatible_with(tensor.shape[2:]) # Find places where the padding is equal to the tensor. (This will # broadcast `padding` across the outermost 2 dimensions of `tensor`, # so `has_default_value.shape = tensor.shape`.) has_default_value = math_ops.equal(padding, tensor) # If the padding isn't a scalar, then require that all values in the # padding match each item in the tensor. After this block of code, # `has_default.shape = tensor.shape[:2]`. (Unfortunately, we can't just # use reduce_all for both cases, becaue when you pass an empty `axis` # list to reduce_all, it reduces all axes; but we want it to reduce no # axes -- i.e., to be a no-op.) tensor_rank = array_ops.rank(tensor) reduce_axis = math_ops.range(2, tensor_rank) has_default = control_flow_ops.cond( tensor_rank > 2, lambda: math_ops.reduce_all(has_default_value, axis=reduce_axis), lambda: has_default_value) has_default.set_shape(tensor_shape.TensorShape([None, None])) has_default.set_shape(tensor.shape[:2]) # Use has_default it to find the length of each row: for each non-default # item in a row, calculate the length that the row needs to have to # include that item; and then take the max of those values (across each # row). has_nondefault = math_ops.logical_not(has_default) has_nondefault = math_ops.cast(has_nondefault, dtypes.int64) length_for_nondefault_value = ( has_nondefault * array_ops.expand_dims( math_ops.range(1, ncols + 1), 0)) lengths = math_ops.reduce_max(length_for_nondefault_value, axis=1) # If we have lengths (either directly supplied, or computed from paddings), # then use those to construct splits; and then use masking to get the # corresponding values. if lengths is not None: lengths = ragged_util.convert_to_int_tensor(lengths, 'lengths', dtypes.int64) lengths.shape.assert_has_rank(1) lengths = math_ops.minimum(lengths, ncols) lengths = math_ops.maximum(lengths, 0) limits = math_ops.cumsum(lengths) splits = array_ops.concat( [array_ops.zeros([1], dtypes.int64), limits], axis=0) mask = array_ops.sequence_mask(lengths, maxlen=ncols) values = array_ops.boolean_mask(tensor, mask) return ragged_factory_ops.from_row_splits(values, splits) # If neither padding nor lengths were specified, then create a splits # vector that contains no default values, and reshape the input tensor # to form the values for the RaggedTensor. nrows = input_shape[0] nvals = nrows * ncols splits = math_ops.range(nrows + 1) * ncols values_shape = array_ops.concat([[nvals], input_shape[2:]], axis=0) values = array_ops.reshape(tensor, values_shape) return ragged_factory_ops.from_row_splits(values, splits)