def testBinaryElementwiseOp(self, x, y, op=math_ops.add, **extra_args): use_kwargs = extra_args.pop('use_kwargs', ()) x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x) y = ragged_tensor.convert_to_tensor_or_ragged_tensor(y) if 'x' in use_kwargs and 'y' in use_kwargs: result = op(x=x, y=y, **extra_args) elif 'y' in use_kwargs: result = op(x, y=y, **extra_args) else: result = op(x, y, **extra_args) # Run the wrapped op on the dense values, for comparison. dense_x = x.flat_values if isinstance(x, ragged_tensor.RaggedTensor) else x dense_y = y.flat_values if isinstance(y, ragged_tensor.RaggedTensor) else y expected_flat_values = array_ops.reshape( op(dense_x, dense_y, **extra_args), [-1]) # Check that the result has the expected shape. self.assertSameShape(y, result) # Check that the result has the expected (flattened) values. if isinstance(result, ragged_tensor.RaggedTensor): result_flat_values = array_ops.reshape(result.flat_values, [-1]) else: result_flat_values = array_ops.reshape(result, [-1]) self.assertAllEqual(expected_flat_values, result_flat_values)
def testConvertNumpyArrayError(self, value, message, dtype=None, preferred_dtype=None): with self.assertRaisesRegexp(ValueError, message): ragged_tensor.convert_to_tensor_or_ragged_tensor(value, dtype, preferred_dtype)
def testConvertTensorError(self, pylist, message, dtype=None, preferred_dtype=None): tensor = constant_op.constant(pylist) with self.assertRaisesRegexp(ValueError, message): ragged_tensor.convert_to_tensor_or_ragged_tensor(tensor, dtype, preferred_dtype)
def testRaggedAddWithBroadcasting(self, x, y, expected, doc): expected_rrank = getattr(expected, 'ragged_rank', 0) x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x, dtype=dtypes.int32) y = ragged_tensor.convert_to_tensor_or_ragged_tensor(y, dtype=dtypes.int32) result = x + y result_rrank = getattr(result, 'ragged_rank', 0) self.assertEqual(expected_rrank, result_rrank) if hasattr(expected, 'tolist'): expected = expected.tolist() self.assertRaggedEqual(result, expected)
def testConvertRaggedTensorError(self, pylist, message, dtype=None, preferred_dtype=None): rt = ragged_factory_ops.constant(pylist) with self.assertRaisesRegexp(ValueError, message): ragged_tensor.convert_to_tensor_or_ragged_tensor(rt, dtype, preferred_dtype)
def testListValuedElementwiseOp(self, inputs, op=math_ops.add_n, **extra_args): use_kwargs = extra_args.pop('use_kwargs', False) inputs = [ ragged_tensor.convert_to_tensor_or_ragged_tensor(x) for x in inputs ] if use_kwargs: result = op(inputs=inputs, **extra_args) else: result = op(inputs, **extra_args) # Run the wrapped op on the dense values, for comparison. dense_inputs = [ x.flat_values if isinstance(x, ragged_tensor.RaggedTensor) else x for x in inputs ] expected_flat_values = array_ops.reshape( op(dense_inputs, **extra_args), [-1]) # Check that the result has the expected shape. self.assertSameShape(inputs[0], result) # Check that the result has the expected (flattened) values. if isinstance(result, ragged_tensor.RaggedTensor): result_flat_values = array_ops.reshape(result.flat_values, [-1]) else: result_flat_values = array_ops.reshape(result, [-1]) self.assertAllEqual(expected_flat_values, result_flat_values)
def string_split_v2(input, sep=None, maxsplit=-1, name=None): # pylint: disable=redefined-builtin """Split elements of `input` based on `sep` into a `RaggedTensor`. Let N be the size of `input` (typically N will be the batch size). Split each element of `input` based on `sep` and return a `SparseTensor` or `RaggedTensor` containing the split tokens. Empty tokens are ignored. Example: ```python >>> tf.strings.split('hello world') <Tensor ['hello', 'world']> >>> tf.strings.split(['hello world', 'a b c']) <tf.RaggedTensor [['hello', 'world'], ['a', 'b', 'c']]> ``` If `sep` is given, consecutive delimiters are not grouped together and are deemed to delimit empty strings. For example, `input` of `"1<>2<><>3"` and `sep` of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty string, consecutive whitespace are regarded as a single separator, and the result will contain no empty strings at the start or end if the string has leading or trailing whitespace. Note that the above mentioned behavior matches python's str.split. Args: input: A string `Tensor` of rank `N`, the strings to split. If `rank(input)` is not known statically, then it is assumed to be `1`. sep: `0-D` string `Tensor`, the delimiter string. maxsplit: An `int`. If `maxsplit > 0`, limit of the split of the result. name: A name for the operation (optional). Raises: ValueError: If sep is not a string. Returns: A `RaggedTensor` of rank `N+1`, the strings split according to the delimiter. """ with ops.name_scope(name, "StringSplit", [input]): input = ragged_tensor.convert_to_tensor_or_ragged_tensor( input, dtype=dtypes.string, name="input") if isinstance(input, ragged_tensor.RaggedTensor): return input.with_flat_values( string_split_v2(input.flat_values, sep, maxsplit)) rank = input.shape.ndims if rank == 0: return string_split_v2(array_ops.stack([input]), sep, maxsplit)[0] elif rank == 1 or rank is None: sparse_result = string_ops.string_split_v2( input, sep=sep, maxsplit=maxsplit) return ragged_tensor.RaggedTensor.from_value_rowids( values=sparse_result.values, value_rowids=sparse_result.indices[:, 0], nrows=sparse_result.dense_shape[0], validate=False) else: return string_split_v2( ragged_tensor.RaggedTensor.from_tensor(input), sep, maxsplit)
def broadcast_to(rt_input, shape, broadcast_inner_dimensions=True): """Broadcasts a potentially ragged tensor to a ragged shape. Tiles `rt_input` as necessary to match the given shape. Behavior is undefined if `rt_input` is not broadcast-compatible with `shape`. Args: rt_input: The potentially ragged tensor to broadcast. shape: A `RaggedTensorDynamicShape` broadcast_inner_dimensions: If false, then inner dimensions will not be tiled. Returns: A potentially ragged tensor whose values are taken from `rt_input`, and whose shape matches `shape`. """ if not isinstance(shape, RaggedTensorDynamicShape): raise TypeError('shape must be a RaggedTensorDynamicShape') rt_input = ragged_tensor.convert_to_tensor_or_ragged_tensor(rt_input) # Broadcasting to a uniform shape. if shape.num_partitioned_dimensions == 0: return _broadcast_to_uniform_shape(rt_input, shape, broadcast_inner_dimensions) else: return _broadcast_to_ragged_shape(rt_input, shape, broadcast_inner_dimensions)
def normalize_tensors(tensors): """Converts a nested structure of tensor-like objects to tensors. * `SparseTensor`-like inputs are converted to `SparseTensor`. * `TensorArray` inputs are passed through. * Everything else is converted to a dense `Tensor`. Args: tensors: A nested structure of tensor-like, list, `SparseTensor`, `SparseTensorValue`, or `TensorArray` objects. Returns: A nested structure of tensor, `SparseTensor`, or `TensorArray` objects. """ flat_tensors = nest.flatten(tensors) prepared = [] with ops.name_scope("normalize_tensors"): for i, t in enumerate(flat_tensors): if sparse_tensor_lib.is_sparse(t): prepared.append(sparse_tensor_lib.SparseTensor.from_value(t)) elif ragged_tensor.is_ragged(t): prepared.append( ragged_tensor.convert_to_tensor_or_ragged_tensor( t, name="component_%d" % i)) elif isinstance(t, tensor_array_ops.TensorArray): prepared.append(t) else: prepared.append(ops.convert_to_tensor(t, name="component_%d" % i)) return nest.pack_sequence_as(tensors, prepared)
def _replace_ragged_with_flat_values(value, nested_splits_lists): """Replace RaggedTensors with their flat_values, and record their splits. Returns a copy of `value`, with any nested `RaggedTensor`s replaced by their `flat_values` tensor. Looks inside lists, tuples, and dicts. Appends each `RaggedTensor`'s `nested_splits` to `nested_splits_lists`. Args: value: The value that should be transformed by replacing `RaggedTensors`. nested_splits_lists: An output parameter used to record the `nested_splits` for any `RaggedTensors` that were replaced. Returns: A copy of `value` with nested `RaggedTensors` replaced by their `values`. """ # Base case if ragged_tensor.is_ragged(value): value = ragged_tensor.convert_to_tensor_or_ragged_tensor(value) nested_splits_lists.append(value.nested_row_splits) return value.flat_values # Recursion cases def recurse(v): return _replace_ragged_with_flat_values(v, nested_splits_lists) if isinstance(value, list): return [recurse(v) for v in value] elif isinstance(value, tuple): return tuple(recurse(v) for v in value) elif isinstance(value, dict): return dict((k, recurse(v)) for (k, v) in value.items()) else: return value
def _unicode_decode(input, input_encoding, errors, replacement_char, replace_control_characters, with_offsets): """Decodes each string into a sequence of codepoints.""" input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input, name="input") input_ndims = input.shape.ndims if input_ndims is None: raise ValueError("Rank of `input` must be statically known.") if input_ndims > 1: # Convert to a ragged tensor with ragged_rank = input_ndims - 1. if not ragged_tensor.is_ragged(input): input = ragged_tensor.RaggedTensor.from_tensor( input, ragged_rank=input_ndims - 1) elif input.ragged_rank < input_ndims - 1: input = input.with_flat_values( ragged_tensor.RaggedTensor.from_tensor( input.flat_values, ragged_rank=input_ndims - input.ragged_rank + 1)) # Reshape the input to a flat vector, and apply the gen_string_ops op. if ragged_tensor.is_ragged(input): flat_input = array_ops.reshape(input.flat_values, [-1]) else: flat_input = array_ops.reshape(input, [-1]) if with_offsets: decode_op = gen_string_ops.unicode_decode_with_offsets else: decode_op = gen_string_ops.unicode_decode flat_result = decode_op( input=flat_input, input_encoding=input_encoding, errors=errors, replacement_char=replacement_char, replace_control_characters=replace_control_characters) if input_ndims == 0: codepoints = flat_result.char_values if with_offsets: offsets = flat_result.char_to_byte_starts else: codepoints = ragged_tensor.RaggedTensor.from_row_splits( flat_result.char_values, flat_result.row_splits, validate=False) if input_ndims > 1: codepoints = input.with_flat_values(codepoints) if with_offsets: offsets = ragged_tensor.RaggedTensor.from_row_splits( flat_result.char_to_byte_starts, flat_result.row_splits, validate=False) if input_ndims > 1: offsets = input.with_flat_values(offsets) if with_offsets: return codepoints, offsets else: return codepoints
def testConvertNumpyArray(self, value, dtype=None, preferred_dtype=None, expected_dtype=None): if expected_dtype is None: expected_dtype = value.dtype if dtype is None else dtype converted = ragged_tensor.convert_to_tensor_or_ragged_tensor( value, dtype, preferred_dtype) self.assertEqual(dtypes.as_dtype(expected_dtype), converted.dtype) self.assertAllEqual(value, converted)
def testConvertRaggedTensorValue(self, value, dtype=None, preferred_dtype=None, expected_dtype=None): if expected_dtype is None: expected_dtype = value.dtype if dtype is None else dtype converted = ragged_tensor.convert_to_tensor_or_ragged_tensor( value, dtype, preferred_dtype) self.assertEqual(value.ragged_rank, converted.ragged_rank) self.assertEqual(dtypes.as_dtype(expected_dtype), converted.dtype) self.assertEqual(value.to_list(), self.eval_to_list(converted))
def dropout_v2(x: ragged_tensor.Ragged, rate, noise_shape=None, seed=None, name=None): """Ragged dispatch target for tf.nn.dropout.""" if noise_shape is not None: raise ValueError('noise_shape is not supported yet for RaggedTensor x') with ops.name_scope(name, 'RaggedNNDropout', [x, rate]): x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x, name='x') return x.with_flat_values( nn_ops.dropout_v2(x.flat_values, rate=rate, seed=seed))
def from_tensor(cls, rt_input): """Constructs a ragged shape for a potentially ragged tensor.""" with ops.name_scope(None, 'RaggedTensorDynamicShapeFromTensor', [rt_input]): rt_input = ragged_tensor.convert_to_tensor_or_ragged_tensor(rt_input) if not ragged_tensor.is_ragged(rt_input): return cls([], array_ops.shape(rt_input)) else: partitioned_dim_sizes = ( (rt_input.nrows(),) + rt_input.nested_row_lengths()) return RaggedTensorDynamicShape( partitioned_dim_sizes, array_ops.shape(rt_input.flat_values)[1:])
def test_Bidirectional_ragged_input(self, merge_mode): np.random.seed(100) rnn = keras.layers.LSTM units = 3 x = ragged_factory_ops.constant( [[[1, 1, 1], [1, 1, 1]], [[1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]]], ragged_rank=1) x = math_ops.cast(x, 'float32') # pylint: disable=g-long-lambda with self.cached_session(): if merge_mode == 'ave': merge_func = lambda y, y_rev: (y + y_rev) / 2 elif merge_mode == 'concat': merge_func = lambda y, y_rev: ragged_concat_ops.concat( (y, y_rev), axis=-1) elif merge_mode == 'mul': merge_func = lambda y, y_rev: (y * y_rev) # pylint: enable=g-long-lambda inputs = keras.Input( shape=(None, 3), batch_size=4, dtype='float32', ragged=True) layer = keras.layers.Bidirectional( rnn(units, return_sequences=True), merge_mode=merge_mode) f_merged = keras.backend.function([inputs], layer(inputs)) f_forward = keras.backend.function([inputs], layer.forward_layer(inputs)) f_backward = keras.backend.function( [inputs], array_ops.reverse(layer.backward_layer(inputs), axis=[1])) y_merged = f_merged(x) y_expected = merge_func( ragged_tensor.convert_to_tensor_or_ragged_tensor(f_forward(x)), ragged_tensor.convert_to_tensor_or_ragged_tensor(f_backward(x))) y_merged = ragged_tensor.convert_to_tensor_or_ragged_tensor(y_merged) self.assertAllClose(y_merged.flat_values, y_expected.flat_values)
def tokenize_with_offsets(self, input): # pylint: disable=redefined-builtin """Tokenizes a tensor of UTF-8 strings on whitespaces. The strings are split on ICU defined whitespace characters. These whitespace characters are dropped. Args: input: A `RaggedTensor`or `Tensor` of UTF-8 strings with any shape. Returns: A tuple `(tokens, start_offsets, limit_offsets)` where: * `tokens`: A `RaggedTensor` of tokenized text. * `start_offsets`: A `RaggedTensor` of the tokens' starting byte offset. * `limit_offsets`: A `RaggedTensor` of the tokens' ending byte offset. """ name = None with ops.name_scope(name, "WhitespaceTokenize", [input]): input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) if input_tensor.shape.ndims is None: raise ValueError("Rank of input_tensor must be statically known.") if ragged_tensor.is_ragged(input_tensor): if input_tensor.flat_values.shape.ndims > 1: # If the flat_values of our ragged tensor is multi-dimensional, we can # process it separately and our output will have the same nested # splits as our input. (tokens, starts, limits) = self.tokenize_with_offsets(input_tensor.flat_values) return (input_tensor.with_flat_values(tokens), input_tensor.with_flat_values(starts), input_tensor.with_flat_values(limits)) else: # Recursively process the values of the ragged tensor. (tokens, starts, limits) = self.tokenize_with_offsets(input_tensor.values) return (input_tensor.with_values(tokens), input_tensor.with_values(starts), input_tensor.with_values(limits)) else: if input_tensor.shape.ndims > 1: # Convert the input tensor to ragged and process it. return self.tokenize_with_offsets( ragged_conversion_ops.from_tensor(input_tensor)) elif input_tensor.shape.ndims == 0: (tokens, starts, limits) = self.tokenize_with_offsets( array_ops.stack([input_tensor])) return tokens.values, starts.values, limits.values else: # Our rank 1 tensor is the correct shape, so we can process it as # normal. return self._whitespace_tokenize_with_offsets_encode_decode_wrapper( input_tensor)
def from_tensor(cls, rt_input, dim_size_dtype=None): """Constructs a ragged shape for a potentially ragged tensor.""" with ops.name_scope(None, 'RaggedTensorDynamicShapeFromTensor', [rt_input]): rt_input = ragged_tensor.convert_to_tensor_or_ragged_tensor(rt_input) if not ragged_tensor.is_ragged(rt_input): return cls([], array_ops.shape(rt_input)) else: partitioned_dim_sizes = ( (rt_input.nrows(),) + rt_input.nested_row_lengths()) return RaggedTensorDynamicShape( partitioned_dim_sizes, array_ops.shape(rt_input.flat_values)[1:], dim_size_dtype=dim_size_dtype)
def _convert_to_structured_field_value(value): """Converts `value` to a Tensor, RaggedTensor, or StructuredTensor.""" if isinstance(value, (ops.Tensor, ragged_tensor.RaggedTensor, StructuredTensor)): return value elif ragged_tensor.is_ragged(value): return ragged_tensor.convert_to_tensor_or_ragged_tensor(value) else: try: return ops.convert_to_tensor(value) except (ValueError, TypeError): raise TypeError('Unexpected type for value in `fields`: %r' % value)
def bitcast( input: ragged_tensor.RaggedOrDense, # pylint: disable=redefined-builtin type, # pylint: disable=redefined-builtin name=None) -> ragged_tensor.RaggedOrDense: """RaggedTensor dispatch override for tf.bitcast.""" type = dtypes.as_dtype(type) with ops.name_scope(name, 'Bitcast', [input]): input = ragged_tensor.convert_to_tensor_or_ragged_tensor( input, name='input') if (input.dtype.size < type.size and input.flat_values.shape.rank < 2): raise ValueError('`input.flat_values` is required to have rank >= 2 when ' 'input.dtype.size < type.size. Actual rank: ' f'{input.flat_values.shape.rank}') return input.with_flat_values(array_ops.bitcast(input.flat_values, type))
def ragged_binary_elementwise_op(op, x, y): """Binary elementwise api handler for RaggedTensors.""" x_is_ragged = ragged_tensor.is_ragged(x) y_is_ragged = ragged_tensor.is_ragged(y) # Convert args to tensors. x = ragged_tensor.convert_to_tensor_or_ragged_tensor( x, preferred_dtype=(y.dtype if y_is_ragged else None)) y = ragged_tensor.convert_to_tensor_or_ragged_tensor( y, preferred_dtype=x.dtype) if x_is_ragged and y_is_ragged: x, y = ragged_tensor.match_row_splits_dtypes(x, y) # Perform broadcasting, when appropraite if ((x_is_ragged and y_is_ragged) or (x_is_ragged and x.flat_values.shape.ndims <= y.shape.ndims) or (y_is_ragged and y.flat_values.shape.ndims <= x.shape.ndims)): bcast_shape = ragged_tensor_shape.broadcast_dynamic_shape( ragged_tensor_shape.RaggedTensorDynamicShape.from_tensor(x), ragged_tensor_shape.RaggedTensorDynamicShape.from_tensor(y)) x = ragged_tensor_shape.broadcast_to(x, bcast_shape, broadcast_inner_dimensions=False) y = ragged_tensor_shape.broadcast_to(y, bcast_shape, broadcast_inner_dimensions=False) x_values = x.flat_values if ragged_tensor.is_ragged(x) else x y_values = y.flat_values if ragged_tensor.is_ragged(y) else y mapped_values = op(x_values, y_values) if isinstance(mapped_values, bool): return mapped_values # Special case for tensor_equals. if ragged_tensor.is_ragged(x): return x.with_flat_values(mapped_values) else: return y.with_flat_values(mapped_values)
def string_bytes_split(input, name=None): # pylint: disable=redefined-builtin """Split string elements of `input` into bytes. Examples: ```python >>> tf.strings.bytes_split('hello') ['h', 'e', 'l', 'l', 'o'] >>> tf.strings.bytes_split(['hello', '123']) <RaggedTensor [['h', 'e', 'l', 'l', 'o'], ['1', '2', '3']]> ``` Note that this op splits strings into bytes, not unicode characters. To split strings into unicode characters, use `tf.strings.unicode_split`. See also: `tf.io.decode_raw`, `tf.strings.split`, `tf.strings.unicode_split`. Args: input: A string `Tensor` or `RaggedTensor`: the strings to split. Must have a statically known rank (`N`). name: A name for the operation (optional). Returns: A `RaggedTensor` of rank `N+1`: the bytes that make up the source strings. """ with ops.name_scope(name, "StringsByteSplit", [input]): input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input, name="input") if isinstance(input, ragged_tensor.RaggedTensor): return input.with_flat_values(string_bytes_split( input.flat_values)) rank = input.shape.ndims if rank is None: raise ValueError("input must have a statically-known rank.") if rank == 0: return string_bytes_split(array_ops.stack([input]))[0] elif rank == 1: indices, values, shape = gen_string_ops.string_split( input, delimiter="", skip_empty=False) return ragged_tensor.RaggedTensor.from_value_rowids( values=values, value_rowids=indices[:, 0], nrows=shape[0], validate=False) else: return string_bytes_split( ragged_tensor.RaggedTensor.from_tensor(input))
def reverse(tensor: ragged_tensor.Ragged, axis, name=None): """Reverses a RaggedTensor along the specified axes. #### Example: >>> data = tf.ragged.constant([ ... [[1, 2], [3, 4]], [[5, 6]], [[7, 8], [9, 10], [11, 12]]]) >>> tf.reverse(data, axis=[0, 2]) <tf.RaggedTensor [[[8, 7], [10, 9], [12, 11]], [[6, 5]], [[2, 1], [4, 3]]]> Args: tensor: A 'RaggedTensor' to reverse. axis: A list or tuple of 'int' or a constant 1D 'tf.Tensor'. The indices of the axes to reverse. name: A name prefix for the returned tensor (optional). Returns: A 'RaggedTensor'. """ type_error_msg = ('`axis` must be a list of int or a constant tensor' 'when reversing axes in a ragged tensor') with ops.name_scope(name, 'Reverse', [tensor, axis]): if isinstance(axis, ops.Tensor): axis = tensor_util.constant_value(axis) if axis is None: raise TypeError(type_error_msg) elif not (isinstance(axis, (list, tuple)) and all(isinstance(dim, int) for dim in axis)): raise TypeError(type_error_msg) tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor( tensor, name='tensor') # Allow usage of negative values to specify innermost axes. axis = [ array_ops.get_positive_axis(dim, tensor.shape.rank, 'axis[%d]' % i, 'rank(tensor)') for i, dim in enumerate(axis) ] # We only need to slice up to the max axis. If the axis list # is empty, it should be 0. slices = [slice(None)] * (max(axis) + 1 if axis else 0) for dim in axis: slices[dim] = slice(None, None, -1) return tensor[tuple(slices)]
def __init__(self, shape, fields): """Creates a `StructuredTensor` from a dictionary of fields. Args: shape: A `TensorShape`: static information about the shape of the `StructuredTensor`. Must have a known `rank`. fields: A dictionary mapping from string to `Tensor`, `RaggedTensor`, or `StructuredTensor`, providing the values for individual fields in each structure. If `ndims > 0`, then every tensor in `fields` must have the same shape in the first `shape.rank` dimensions; and that shape must be compatible with `shape`. Returns: A `StructuredTensor`. """ shape = tensor_shape.as_shape(shape) if shape.rank is None: raise ValueError("StructuredTensor's shape must have known rank.") if not isinstance(fields, dict): raise TypeError('fields must be a dictionary, got %s' % type(fields).__name__) self._fields = {} with ops.name_scope(None, 'StructuredTensor', fields.values()): for (key, value) in fields.items(): if not isinstance(key, str): raise TypeError('Unexpected type for key in `fields`: %r' % key) if not _FIELD_NAME_RE.match(key): raise ValueError('Field name %r is not currently allowed.' % key) if not isinstance( value, (ops.Tensor, ragged_tensor.RaggedTensor, StructuredTensor)): if ragged_tensor.is_ragged(value): value = ragged_tensor.convert_to_tensor_or_ragged_tensor(value) else: try: value = ops.convert_to_tensor(value) except (ValueError, TypeError): raise TypeError('Unexpected type for value in `fields`: %r' % value) self._fields[key] = value # Check the static TensorShape for this StructuredTensor. shape = tensor_shape.as_shape(shape) rank = shape.ndims if rank is None: raise ValueError("StructuredTensor's shape must have known rank.") self._static_shape = shape if rank > 0: for value in self._fields.values(): self._static_shape = self._static_shape.merge_with(value.shape[:rank])
def _ragged_nn_dropout_v1(x, keep_prob=None, noise_shape=None, seed=None, name=None, rate=None): if noise_shape is not None: raise ValueError('noise_shape is not supported yet for RaggedTensor x') with ops.name_scope(name, 'RaggedNNDropout', [x, rate]): x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x, name='x') return x.with_flat_values( nn_ops.dropout(x.flat_values, keep_prob=keep_prob, seed=seed, rate=rate))
def detokenize(self, input, name=None): # pylint: disable=redefined-builtin """Detokenizes input codepoints (integers) to UTF-8 strings. Args: input: A `RaggedTensor` or `Tensor` of codepoints (ints) with a rank of at least 1. name: The name argument that is passed to the op function. Returns: A N-1 dimensional string tensor of the detokenized text. """ name = None with ops.name_scope(name, "UnicodeCharTokenize", [input, self]): input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) return ragged_string_ops.unicode_encode(input_tensor, "UTF-8")
def tokenize(self, input, name=None): # pylint: disable=redefined-builtin """Tokenizes a tensor of UTF-8 strings. Args: input: A `RaggedTensor` or `Tensor` of UTF-8 strings with any shape. name: The name argument that is passed to the op function. Returns: A `RaggedTensor` of tokenized text. The returned shape is the shape of the input tensor with an added ragged dimension for tokens of each string. """ with ops.name_scope(name, "SentenceTokenizer", [input, self]): input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor( input) if input_tensor.shape.ndims is None: raise ValueError( "Rank of input_tensor must be statically known.") if ragged_tensor.is_ragged(input_tensor): # Recursively process the values of the ragged tensor. tokens = self.tokenize(input_tensor.flat_values) return input_tensor.with_flat_values(tokens) else: if input_tensor.shape.ndims > 1: # Convert the input tensor to ragged and process it. return self.tokenize( ragged_conversion_ops.from_tensor(input_tensor)) elif input_tensor.shape.ndims == 0: tokens = self.tokenize(array_ops.stack([input_tensor])) return tokens.values else: # Our rank 1 tensor is the correct shape, so we can process it as # normal. (output_values, row_splits) = ( gen_sentencepiece_tokenizer.sentencepiece_tokenize_op( self._model_resource.resource_handle, input_tensor, self.nbest_size, self.alpha, self.add_bos, self.add_eos, self.reverse, self.out_type, return_nbest=self.return_nbest)) tokens = RaggedTensor.from_nested_row_splits( flat_values=output_values, nested_row_splits=[row_splits], validate=False) return tokens
def normalize_element(element): """Normalizes a nested structure of element components. * Components matching `SparseTensorSpec` are converted to `SparseTensor`. * Components matching `RaggedTensorSpec` are converted to `RaggedTensor`. * Components matching `DatasetSpec` or `TensorArraySpec` are passed through. * `CompositeTensor` components are passed through. * All other components are converted to `Tensor`. Args: element: A nested structure of individual components. Returns: A nested structure of `Tensor`, `Dataset`, `SparseTensor`, `RaggedTensor`, or `TensorArray` objects. """ components = nest.flatten(element) normalized_components = [] with ops.name_scope("normalize_element"): # Imported here to avoid circular dependency. from tensorflow.python.data.ops import dataset_ops # pylint: disable=g-import-not-at-top for i, t in enumerate(components): try: spec = type_spec_from_value(t, use_fallback=False) except TypeError: # TypeError indicates it was not possible to compute a `TypeSpec` for # the value. As a fallback try converting the value to a tensor. normalized_components.append( ops.convert_to_tensor(t, name="component_%d" % i)) else: if isinstance(spec, sparse_tensor.SparseTensorSpec): normalized_components.append( sparse_tensor.SparseTensor.from_value(t)) elif isinstance(spec, ragged_tensor.RaggedTensorSpec): normalized_components.append( ragged_tensor.convert_to_tensor_or_ragged_tensor( t, name="component_%d" % i)) elif isinstance(spec, (tensor_array_ops.TensorArraySpec, dataset_ops.DatasetSpec)): normalized_components.append(t) elif isinstance(spec, NoneTensorSpec): normalized_components.append(NoneTensor()) elif isinstance(t, composite_tensor.CompositeTensor): normalized_components.append(t) else: normalized_components.append( ops.convert_to_tensor(t, name="component_%d" % i)) return nest.pack_sequence_as(element, normalized_components)
def lookup(self, inputs): """Perform a table lookup.""" # Sparse tensors don't play nicely with tensor conversion, so we handle # them before attempting to convert lists or arrays to tensors. if isinstance( inputs, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): return self._sparse_lookup(inputs) # Try to convert lists/arrays to tensors or RaggedTensors. inputs = ragged_tensor.convert_to_tensor_or_ragged_tensor(inputs) # Run the lookup operation on the converted tensor. if ragged_tensor.is_ragged(inputs): return self._ragged_lookup(inputs) else: return self._tensor_lookup(inputs)
def tokenize_with_offsets(self, input_strs): """Tokenizes a tensor of UTF-8 strings into words with [start,end) offsets. Args: input_strs: An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings. Returns: A tuple `(tokens, start_offsets, limit_offsets)` where: * `tokens` is a `RaggedTensor` of strings where `tokens[i1...iN, j]` is the string content of the `j-th` token in `input_strs[i1...iN]` * `start_offsets` is a `RaggedTensor` of int64s where `start_offsets[i1...iN, j]` is the byte offset for the start of the `j-th` token in `input_strs[i1...iN]`. * `limit_offsets` is a `RaggedTensor` of int64s where `limit_offsets[i1...iN, j]` is the byte offset immediately after the end of the `j-th` token in `input_strs[i...iN]`. """ input_strs = ragged_tensor.convert_to_tensor_or_ragged_tensor( input_strs) rank = input_strs.shape.ndims if rank is None: raise ValueError('input must have a known rank.') # Currently, the hub_module accepts only rank 1 input tensors, and outputs # rank 2 tokens/starts/ends. To handle input of different ranks (0, 2, 3, # etc), we first convert the input into a rank 1 tensor, then run the # module, and finally convert the output back to the expected shape. if rank == 0: # Build a rank 1 input batch with one string. input_batch = array_ops.stack([input_strs]) # [1, (number codepoints)] tokens, starts, ends = self._predict_tokens(input_batch) return tokens.flat_values, starts.flat_values, ends.flat_values elif rank == 1: return self._predict_tokens(input_strs) else: if not ragged_tensor.is_ragged(input_strs): input_strs = ragged_tensor.RaggedTensor.from_tensor( input_strs, ragged_rank=rank - 1) # [number strings, (number codepoints)] tokens, starts, limits = self._predict_tokens( input_strs.flat_values) tokens = input_strs.with_flat_values(tokens) starts = input_strs.with_flat_values(starts) limits = input_strs.with_flat_values(limits) return tokens, starts, limits
def testUnaryElementwiseOp(self, x, op=math_ops.abs, **extra_args): x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x) result = op(x, **extra_args) # Run the wrapped op on the dense values, for comparison. dense_x = x.flat_values if isinstance(x, ragged_tensor.RaggedTensor) else x expected_flat_values = array_ops.reshape(op(dense_x, **extra_args), [-1]) # Check that the result has the expected shape. self.assertSameShape(x, result) # Check that the result has the expected (flattened) values. if isinstance(result, ragged_tensor.RaggedTensor): result_flat_values = array_ops.reshape(result.flat_values, [-1]) else: result_flat_values = array_ops.reshape(result, [-1]) self.assertAllEqual(expected_flat_values, result_flat_values)
def down_sample(source, freq_vocab, replacement='', threshold=1e-3, min_freq=0, seed=None, name=None): """Randomly down-sample high frequency tokens in `source` with `replacement` value. Args: source: string `Tensor` or `RaggedTensor` or `SparseTensor` of any shape, items to be sampled. freq_vocab: `Counter` with frequencies vocabulary. replacement: `string`, value to set instead of downsampled ones threshold: `float`, items occurrence threshold. min_freq: `int`, items below that frequency will be treated as unique. seed: `int`, used to create a random seed (optional). See @{tf.random.set_seed} for behavior. name: `string`, a name for the operation (optional). Returns: A boolean `Tensor` of same shape as source: "keep" flags. """ with tf.name_scope(name or 'down_sample'): if isinstance(source, sparse_tensor.SparseTensorValue) or isinstance(source, sparse_tensor.SparseTensor): source = sparse_tensor.convert_to_tensor_or_sparse_tensor(source, dtype=tf.string, name=name) else: source = ragged_tensor.convert_to_tensor_or_ragged_tensor(source, dtype=tf.string, name=name) if not tf.string.is_compatible_with(source.dtype): raise RuntimeError('"Source" must have dtype compatible with "string". ' 'Actual: {}'.format(source.dtype)) if isinstance(source, tf.SparseTensor): return tf.SparseTensor( values=down_sample(source.values, freq_vocab, replacement, threshold, min_freq, seed), indices=source.indices, dense_shape=source.dense_shape ) elif isinstance(source, tf.RaggedTensor): return source.with_flat_values( down_sample(source.flat_values, freq_vocab, replacement, threshold, min_freq, seed) ) keep = sample_mask( source=source, freq_vocab=freq_vocab, threshold=threshold, min_freq=min_freq, seed=seed, ) return tf.where(keep, source, replacement)
def ragged_tensor_to_string(rt, summarize=None): """Returns a scalar string tensor with the contents of a RaggedTensor. Requires that `rt.shape.rank` is not `None`. Note: this converts the entire `RaggedTensor` into a single string scalar. If you want to convert individual elements, use `tf.strings.as_string(rt)`. >>> rt1 = tf.ragged.constant([[1, 2, 3], [4, 5]]) >>> ragged_tensor_to_string(rt1).numpy() b'[[1, 2, 3], [4, 5]]' >>> rt2 = tf.ragged.constant([[['a'], ['b', 'c']], [['d', 'e', 'f'], []]]) >>> ragged_tensor_to_string(rt2).numpy() b"[[['a'], ['b', 'c']], [['d', 'e', 'f'], []]]" >>> rt3 = tf.ragged.constant([[1], [2, 3, 4, 5, 6], [], [], [7], [8, 9]]) >>> ragged_tensor_to_string(rt3, summarize=2).numpy() b'[[1], [2, 3, ..., 5, 6], ..., [7], [8, 9]]' Args: rt: The RaggedTensor that should be converted to a string. summarize: If specified, then only the first and last `summarize` elements within each dimension are included in the string. If `-1` or `None`, then all elements are included. """ if (summarize is not None and summarize != -1 and not (isinstance(summarize, int) and summarize > 0)): raise ValueError( "Expected summarize to be -1 or a positive int, got %r" % summarize) with ops.name_scope(None, "AsString", [rt]): rt = ragged_tensor.convert_to_tensor_or_ragged_tensor(rt) if rt.shape.rank is None: raise ValueError( "RaggedTensor to_string requires that rt.shape.rank " "is not None.") # Convert all elements of `rt` to strings. if rt.dtype == dtypes.string: escaped = string_ops.regex_replace(rt.flat_values, r"(['\\])", r"\\\1") str_t = rt.with_flat_values("'" + escaped + "'") else: str_t = rt.with_flat_values(string_ops.as_string(rt.flat_values)) return _ragged_tensor_to_string(str_t, summarize)
def string_bytes_split(input, name=None): # pylint: disable=redefined-builtin """Split string elements of `input` into bytes. Examples: ```python >>> tf.strings.to_bytes('hello') ['h', 'e', 'l', 'l', 'o'] >>> tf.strings.to_bytes(['hello', '123']) <RaggedTensor [['h', 'e', 'l', 'l', 'o'], ['1', '2', '3']]> ``` Note that this op splits strings into bytes, not unicode characters. To split strings into unicode characters, use `tf.strings.unicode_split`. See also: `tf.io.decode_raw`, `tf.strings.split`, `tf.strings.unicode_split`. Args: input: A string `Tensor` or `RaggedTensor`: the strings to split. Must have a statically known rank (`N`). name: A name for the operation (optional). Returns: A `RaggedTensor` of rank `N+1`: the bytes that make up the soruce strings. """ with ops.name_scope(name, "StringsByteSplit", [input]): input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input, name="input") if isinstance(input, ragged_tensor.RaggedTensor): return input.with_flat_values(string_bytes_split(input.flat_values)) rank = input.shape.ndims if rank is None: raise ValueError("input must have a statically-known rank.") if rank == 0: return string_bytes_split(array_ops.stack([input]))[0] elif rank == 1: indices, values, shape = gen_string_ops.string_split( input, delimiter="", skip_empty=False) return ragged_tensor.RaggedTensor.from_value_rowids( values=values, value_rowids=indices[:, 0], nrows=shape[0], validate=False) else: return string_bytes_split(ragged_tensor.RaggedTensor.from_tensor(input))
def func(data): with ops.name_scope(name, 'NGrams', [data, width]): data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data') slices = [] for start in range(width): stop = None if start - width + 1 == 0 else start - width + 1 if axis >= 0: idx = [slice(None)] * axis + [slice(start, stop)] else: idx = [Ellipsis, slice(start, stop)] + [slice(None)] * (-axis - 1) slices.append(data[idx]) # Stack the slices. stack_axis = axis + 1 if axis >= 0 else axis windowed_data = array_ops.stack(slices, stack_axis) return string_ops.reduce_join( windowed_data, axis=axis, separator=string_separator)
def cont_bow(source, window, seed=None, name=None): """Generates `Continuous bag-of-words` target and context pairs from batched list of tokens. Args: source: `2-D` string `Tensor` or `RaggedTensor`, batched lists of tokens [sentences, tokens]. window: `int`, size of context before and after target token, must be > 0. seed: `int`, used to create a random seed (optional). See @{tf.random.set_seed} for behavior. name: `string`, a name for the operation (optional). Returns: `1-D` string `Tensor`: target tokens. `2-D` string `RaggedTensor`: context tokens. `2-D` int32 `RaggedTensor`: context positions. """ with tf.name_scope(name or 'cont_bow'): source = ragged_tensor.convert_to_tensor_or_ragged_tensor( source, name='source') if source.shape.rank != 2: raise ValueError('Rank of `source` must equals 2') if not ragged_tensor.is_ragged(source): source = ragged_tensor.RaggedTensor.from_tensor(source, ragged_rank=1) if source.ragged_rank != 1: raise ValueError('Ragged rank of `source` must equals 1') seed1, seed2 = random_seed.get_seed(seed) target, context_values, context_splits, context_positions = tfmiss_ops.miss_cont_bow( source_values=source.values, source_splits=source.row_splits, window=window, seed=seed1, seed2=seed2) context = tf.RaggedTensor.from_row_splits(context_values, context_splits) position = tf.RaggedTensor.from_row_splits(context_positions, context_splits) return target, context, position
def reduce_variance(input_tensor: ragged_tensor.Ragged, axis=None, keepdims=False, name=None): """For docs, see: _RAGGED_REDUCE_DOCSTRING.""" with ops.name_scope(name, 'RaggedReduceVariance', [input_tensor, axis]): input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor( input_tensor, name='input_tensor') if input_tensor.dtype.is_complex: raise ValueError( 'reduce_variance is not supported for RaggedTensors with complex dtypes.' ) square_of_input = math_ops.square(input_tensor) mean_of_square = reduce_mean(square_of_input, axis=axis, keepdims=keepdims) mean = reduce_mean(input_tensor, axis=axis, keepdims=keepdims) square_of_mean = math_ops.square(mean) # Note: the above method of computing variance is not numerically stable, # and can result in negative variances. Here we clip to >= 0. return math_ops.maximum(mean_of_square - square_of_mean, 0)
def detokenize(self, input, name=None): # pylint: disable=redefined-builtin """Detokenizes tokens into preprocessed text. Args: input: A `RaggedTensor` or `Tensor` of UTF-8 string tokens with a rank of at least 1. name: The name argument that is passed to the op function. Returns: A N-1 dimensional string Tensor or RaggedTensor of the detokenized text. """ with ops.name_scope(name, "SentenceTokenizer", [input, self]): input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor( input) if input_tensor.shape.ndims is None: raise ValueError( "Rank of input_tensor must be statically known.") if input_tensor.shape.ndims == 0: raise ValueError("Rank of input_tensor must be at least 1.") if ragged_tensor.is_ragged(input_tensor): if input_tensor.flat_values.shape.ndims > 1: # If the flat_values of our ragged tensor is multi-dimensional, we can # process it separately and our output will have the same nested # splits as our input. tokens = self.detokenize(input_tensor.flat_values) return input_tensor.with_flat_values(tokens) elif input_tensor.ragged_rank > 1: # Recursively process the values of the ragged tensor. tokens = self.detokenize(input_tensor.values) return input_tensor.with_values(tokens) else: return gen_sentencepiece_tokenizer.sentencepiece_detokenize_op( self._model_resource.resource_handle, input_tensor.flat_values, input_tensor.row_splits, self.add_bos, self.add_eos, self.reverse) else: if input_tensor.shape.ndims > 1: # Convert the input tensor to ragged and process it. return self.detokenize( ragged_conversion_ops.from_tensor(input_tensor)) else: tokens = self.detokenize(array_ops.stack([input_tensor])) return array_ops.reshape(tokens, [])
def ragged_one_hot(indices, depth, on_value=None, off_value=None, axis=None, dtype=None, name=None): """Applies tf.one_hot along the values of a RaggedTensor.""" with ops.name_scope(name, 'RaggedOneHot', [indices]): indices = ragged_tensor.convert_to_tensor_or_ragged_tensor( indices, name='indices') if axis is not None: axis = ragged_util.get_positive_axis(axis, indices.shape.ndims) if axis < indices.ragged_rank: raise ValueError( 'axis may not be less than indices.ragged_rank.') return indices.with_flat_values( array_ops.one_hot(indices.flat_values, depth, on_value, off_value, axis, dtype, name))
def _replace_ragged_with_flat_values(value, partition_lists, flat_values_nrows): """Replace RaggedTensors with their flat_values, and record their partitions. Returns a copy of `value`, with any nested `RaggedTensor`s replaced by their `flat_values` tensor. Looks inside lists, tuples, and dicts. Appends each `RaggedTensor`'s `RowPartition`s to `partition_lists`. Args: value: The value that should be transformed by replacing `RaggedTensors`. partition_lists: An output parameter used to record the row partitions for any `RaggedTensors` that were replaced. flat_values_nrows: An output parameter used to record the outer dimension size for each replacement `flat_values` (when known). Contains a list of int. Returns: A copy of `value` with nested `RaggedTensors` replaced by their `values`. """ # Base case if ragged_tensor.is_ragged(value): value = ragged_tensor.convert_to_tensor_or_ragged_tensor(value) partition_lists.append(value._nested_row_partitions) # pylint: disable=protected-access nrows = tensor_shape.dimension_at_index(value.flat_values.shape, 0).value if nrows is not None: flat_values_nrows.append(nrows) return value.flat_values # Recursion cases def recurse(v): return _replace_ragged_with_flat_values(v, partition_lists, flat_values_nrows) if isinstance(value, list): return [recurse(v) for v in value] elif isinstance(value, tuple): return tuple(recurse(v) for v in value) elif isinstance(value, dict): return dict((k, recurse(v)) for (k, v) in value.items()) else: return value
def test_merge_with_ragged_input(self, layer): ragged_data = tf.ragged.constant( [[1., 1., 1.], [1., 1.], [1., 1., 1., 1.]], ragged_rank=1) dense_data = ragged_data.to_tensor() input1 = keras.Input(shape=(None, ), ragged=True) input2 = keras.Input(shape=(None, ), ragged=True) out = keras.layers.Add()([input1, input2]) model = keras.models.Model(inputs=[input1, input2], outputs=out) out_ragged = model.predict([ragged_data, ragged_data], steps=1) out_ragged = ragged_tensor.convert_to_tensor_or_ragged_tensor( out_ragged).to_tensor() input1 = keras.Input(shape=(None, )) input2 = keras.Input(shape=(None, )) out = keras.layers.Add()([input1, input2]) model = keras.models.Model(inputs=[input1, input2], outputs=out) out_dense = model.predict([dense_data, dense_data], steps=1) self.assertAllEqual(out_dense, out_ragged)
def gather(params, indices, validate_indices=None, name=None, axis=None, batch_dims=0): """tf.gather for structured tensors. Does not support (yet) checks on illegal axis values, et cetera. Indices must be a ragged or dense tensor. Args: params: a structured tensor to be gathered indices: a ragged tensor or tensor to gather by. validate_indices: whether to validate the indices name: the name of the op(s). axis: the axis in params to gather on. batch_dims: the number of batch dimensions. Returns: the params reorganized according to indices. """ if name is None: name = 'gather' with ops.name_scope(name): if axis is None: axis = batch_dims ndims_name = params.shape.rank axis = array_ops.get_positive_axis(axis, ndims_name) indices = ragged_tensor.convert_to_tensor_or_ragged_tensor( indices, name='indices') def leaf_op(p): return array_ops.gather( p, indices, validate_indices=validate_indices, axis=axis, batch_dims=batch_dims, name=None) return _extend_op_single(params, leaf_op)
def tile(input, multiples, name=None): # pylint: disable=redefined-builtin """Constructs a `RaggedTensor` by tiling a given `RaggedTensor`. The values of `input` are replicated `multiples[i]` times along the `i`th dimension (for each dimension `i`). For every dimension `axis` in `input`, the length of each output element in that dimension is the length of corresponding input element multiplied by `multiples[axis]`. Args: input: A `RaggedTensor`. multiples: A 1-D integer `Tensor`. Length must be the same as the number of dimensions in `input`. name: A name for the operation (optional). Returns: A `RaggedTensor` with the same type, rank, and ragged_rank as `input`. #### Example: ```python >>> rt = tf.ragged.constant([[1, 2], [3]]) >>> ragged.tile(rt, [3, 2]) [[1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3]] ``` """ with ops.name_scope(name, 'RaggedTile', [input, multiples]): input = ragged_tensor.convert_to_tensor_or_ragged_tensor( input, name='input') if not ragged_tensor.is_ragged(input): return array_ops.tile(input, multiples, name) multiples = ragged_util.convert_to_int_tensor( multiples, name='multiples', dtype=input.row_splits.dtype) multiples.shape.assert_has_rank(1) # If the constant value of `multiples` is available, then we can use it # to skip tiling dimensions where `multiples=1`. const_multiples = tensor_util.constant_value(multiples) return ragged_tensor.RaggedTensor.from_nested_row_splits( _tile_ragged_values(input, multiples, const_multiples), _tile_ragged_splits(input, multiples, const_multiples), validate=False)
def tile(input, multiples, name=None): # pylint: disable=redefined-builtin """Constructs a `RaggedTensor` by tiling a given `RaggedTensor`. The values of `input` are replicated `multiples[i]` times along the `i`th dimension (for each dimension `i`). For every dimension `axis` in `input`, the length of each output element in that dimension is the length of corresponding input element multiplied by `multiples[axis]`. Args: input: A `RaggedTensor`. multiples: A 1-D integer `Tensor`. Length must be the same as the number of dimensions in `input`. name: A name for the operation (optional). Returns: A `RaggedTensor` with the same type, rank, and ragged_rank as `input`. #### Example: ```python >>> rt = tf.ragged.constant([[1, 2], [3]]) >>> ragged.tile(rt, [3, 2]) [[1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3]] ``` """ with ops.name_scope(name, 'RaggedTile', [input, multiples]): input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input, name='input') if not ragged_tensor.is_ragged(input): return array_ops.tile(input, multiples, name) multiples = ragged_util.convert_to_int_tensor( multiples, name='multiples', dtype=input.row_splits.dtype) multiples.shape.assert_has_rank(1) # If the constant value of `multiples` is available, then we can use it # to skip tiling dimensions where `multiples=1`. const_multiples = tensor_util.constant_value(multiples) return ragged_tensor.RaggedTensor.from_nested_row_splits( _tile_ragged_values(input, multiples, const_multiples), _tile_ragged_splits(input, multiples, const_multiples), validate=False)
def char_ngrams(source, minn, maxn, itself, skip=None, name=None): """Split unicode strings into character ngrams. Args: source: `Tensor` or `RaggedTensor` of any shape, strings to split minn: Minimum length of character ngram maxn: Maximum length of character ngram itself: Strategy for source word preserving. One of `"asis"`, `"never"`, `"always"`, `"alone"`. skip: list of strings to pass without changes or None. name: A name for the operation (optional). Returns: `Tensor` if rank(source) is 0, `RaggedTensor` with an additional dimension otherwise. """ with tf.name_scope(name or 'char_ngrams'): source = ragged_tensor.convert_to_tensor_or_ragged_tensor( source, name='source', dtype=tf.string) if source.shape.rank is None: raise ValueError('Rank of `source` must be statically known.') if not isinstance(source, tf.RaggedTensor) and source.shape.rank > 1: source = ragged_tensor.RaggedTensor.from_tensor( source, ragged_rank=source.shape.rank - 1) if isinstance(source, tf.RaggedTensor): return source.with_flat_values( char_ngrams(source.flat_values, minn, maxn, itself, skip)) result_values, result_splits = tfmiss_ops.miss_char_ngrams( source=source, minn=minn, maxn=maxn, itself=itself.upper(), skip=skip or [], ) if source.shape.rank == 0: return result_values return tf.RaggedTensor.from_row_splits(result_values, result_splits)
def normalize_utf8_with_offsets_map(input, normalization_form="NFKC", name=None): """Normalizes each UTF-8 string in the input tensor using the specified rule. Returns normalized strings and an offset map used by another operation to map post-normalized string offsets to pre-normalized string offsets. See http://unicode.org/reports/tr15/ Args: input: A `Tensor` or `RaggedTensor` of type string. (Must be UTF-8.) normalization_form: One of the following string values ('NFC', 'NFKC', 'NFD', 'NFKD'). Default is 'NFKC'. NOTE: `NFD` and `NFKD` for `normalize_utf8_with_offsets_map` will not be available until the tf.text release w/ ICU 69 (scheduled after 4/2021). name: The name for this op (optional). Returns: A tuple of (results, offsets_map) where: results: A `Tensor` or `RaggedTensor` of type string, with normalized contents. offsets_map: A `Tensor` or `RaggedTensor` of type `variant`, used to map the post-normalized string offsets to pre-normalized string offsets. It has the same shape as the results tensor. offsets_map is an input to `find_source_offsets` op. """ with ops.name_scope(name, "NormalizeUTF8WithOffsets", [input]): input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor( input, dtype=dtypes.string) if ragged_tensor.is_ragged(input_tensor): result, offsets_map = gen_normalize_ops.normalize_utf8_with_offsets_map( input_tensor.flat_values, normalization_form) return input_tensor.with_flat_values( result), input_tensor.with_flat_values(offsets_map) else: return gen_normalize_ops.normalize_utf8_with_offsets_map( input_tensor, normalization_form)
def _ragged_segment_aggregate(unsorted_segment_op, data, segment_ids, num_segments, name=None): """Aggregates along segments of a RaggedTensor using `unsorted_segment_op`. Returns a RaggedTensor `output` with `num_segments` rows, where the row `output[i]` is formed by combining all rows of `data` whose corresponding `segment_id` is `i`. The values in each row are combined using `unsorted_segment_op`. The length of the row `output[i]` will be the maximum of the lengths of all rows of `data` whose corresponding `segment_id` is `i`. If no `data` rows correspond to a given segment ID, then the output row for that segment ID will be empty. Args: unsorted_segment_op: The tensorflow `op` that should be used to combine values in each row. Must have the same signature and basic behavior as `unsorted_segment_sum`, `unsorted_segment_max`, etc. data: A `RaggedTensor` containing the values to be combined. segment_ids: A `Tensor` or `RaggedTensor`. Must have type `int64` or `int32`. `segment_ids.shape` must be a prefix of `data.shape`. `segment_ids` is not required to be sorted. num_segments: An `int32` or `int64` scalar. name: A name prefix for the returned tensor (optional). Returns: A `RaggedTensor` containing the aggregated values. The returned tensor has the same dtype as `data`, and its shape is `[num_segments] + data.shape[segment_ids.rank:]`. Raises: ValueError: If segment_ids.shape is not a prefix of data.shape. """ if not (ragged_tensor.is_ragged(data) or ragged_tensor.is_ragged(segment_ids)): return unsorted_segment_op(data, segment_ids, num_segments, name) with ops.name_scope(name, 'RaggedSegment', [data, segment_ids, num_segments]) as name: data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data') segment_ids = ragged_tensor.convert_to_tensor_or_ragged_tensor( segment_ids, name='segment_ids') if ragged_tensor.is_ragged(segment_ids): if not ragged_tensor.is_ragged(data): raise ValueError('segment_ids.shape must be a prefix of data.shape, ' 'but segment_ids is ragged and data is not.') check_splits = check_ops.assert_equal( segment_ids.row_splits, data.row_splits, message='segment_ids.shape must be a prefix of data.shape') with ops.control_dependencies([check_splits]): return _ragged_segment_aggregate(unsorted_segment_op, data.values, segment_ids.values, num_segments, name) segment_ids = math_ops.cast(segment_ids, dtypes.int64) # Find the length of each row in data. (dtype=int64, shape=[data_nrows]) data_row_lengths = data.row_splits[1:] - data.row_splits[:-1] # Find the length that each output row will have. The length of the row # corresponding to segment `id` is `max(data_row_lengths[i])` where # `segment_ids[i]=id`. (dtype=int64, shape=[output_nrows]) output_row_lengths = math_ops.maximum( math_ops.unsorted_segment_max(data_row_lengths, segment_ids, num_segments), 0) assert output_row_lengths.dtype == dtypes.int64 # Build the splits tensor for the output RaggedTensor. output_splits = array_ops.concat([ array_ops.zeros([1], dtypes.int64), math_ops.cumsum(output_row_lengths) ], axis=0) # For each row in `data`, find the start & limit position where that row's # values will be aggregated in output.values. data_row_to_out_row_start = array_ops.gather(output_splits, segment_ids) data_row_to_out_row_limit = data_row_to_out_row_start + data_row_lengths # For each value in `data.values`, find the position where it will # aggregated in `output.values`. # Get the target output values index for each data values index. data_val_to_out_val_index = range(data_row_to_out_row_start, data_row_to_out_row_limit).values # Recursively aggregate the values. output_values = _ragged_segment_aggregate(unsorted_segment_op, data.values, data_val_to_out_val_index, output_splits[-1]) return ragged_tensor.RaggedTensor.from_row_splits(output_values, output_splits)
def unicode_encode(input, output_encoding, errors="replace", replacement_char=65533, name=None): r"""Encodes each sequence of Unicode code points in `input` into a string. `result[i1...iN]` is the string formed by concatenating the Unicode codepoints `input[1...iN, :]`, encoded using `output_encoding`. Args: input: An `N+1` dimensional potentially ragged integer tensor with shape `[D1...DN, num_chars]`. output_encoding: Unicode encoding that should be used to encode each codepoint sequence. Can be `"UTF-8"`, `"UTF-16-BE"`, or `"UTF-32-BE"`. errors: Specifies the response when an invalid codepoint is encountered (optional). One of: * `'replace'`: Replace invalid codepoint with the `replacement_char`. (default) * `'ignore'`: Skip invalid codepoints. * `'strict'`: Raise an exception for any invalid codepoint. replacement_char: The replacement character codepoint to be used in place of any invalid input when `errors='replace'`. Any valid unicode codepoint may be used. The default value is the default unicode replacement character which is 0xFFFD (U+65533). name: A name for the operation (optional). Returns: A `N` dimensional `string` tensor with shape `[D1...DN]`. #### Example: ```python >>> input = [[71, 246, 246, 100, 110, 105, 103, 104, 116], [128522]] >>> unicode_encode(input, 'UTF-8') ['G\xc3\xb6\xc3\xb6dnight', '\xf0\x9f\x98\x8a'] ``` """ with ops.name_scope(name, "UnicodeEncode", [input]): input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) if input_tensor.shape.ndims is None: raise ValueError("Rank of input_tensor must be statically known.") if ragged_tensor.is_ragged(input_tensor): if input_tensor.flat_values.shape.ndims > 1: # If the flat_values of our ragged tensor is multi-dimensional, we can # process it separately and our output will have the same nested splits # as our input. return input_tensor.with_flat_values( unicode_encode(input_tensor.flat_values, output_encoding, errors, replacement_char)) elif input_tensor.ragged_rank > 1: # Recursively process the values of the ragged tensor. return input_tensor.with_values( unicode_encode(input_tensor.values, output_encoding, errors, replacement_char)) else: # Our ragged tensor is of the correct shape (rank 1 flat_values tensor # with ragged_rank of 1) so we can process it as normal. return gen_string_ops.unicode_encode( input_values=input_tensor.values, input_splits=input_tensor.row_splits, output_encoding=output_encoding, errors=errors, replacement_char=replacement_char) else: if input_tensor.shape.ndims == 2: # The input tensor is of the correct 2-D shape, it's just not ragged. return unicode_encode( ragged_tensor.RaggedTensor.from_tensor(input_tensor), output_encoding, errors, replacement_char) elif input_tensor.shape.ndims > 2: # We need to initially flatten the input tensor to 2-D, and then can # reshape the output of our processed flattened tensor. flat_input_tensor = array_ops.reshape( input_tensor, array_ops.stack([-1, array_ops.shape(input_tensor)[-1]])) flat_output_tensor = unicode_encode(flat_input_tensor, output_encoding, errors, replacement_char) return array_ops.reshape(flat_output_tensor, input_tensor.shape[:-1]) elif input_tensor.shape.ndims == 0: raise ValueError("input_tensor's rank must be at least 1.") else: # Our input tensor is rank 1, so we create a ragged tensor with an added # dimension to create the correct input shape & type, and then remove # the additional dimension from the output and return the string scalar. ragged_input_tensor = ragged_tensor.RaggedTensor.from_row_splits( input_tensor, array_ops.stack( [0, array_ops.shape(input_tensor, out_type=dtypes.int32)[0]]), validate=False) output_tensor = unicode_encode(ragged_input_tensor, output_encoding, errors, replacement_char) return array_ops.reshape(output_tensor, [])
def squeeze(input, axis=None, name=None): # pylint: disable=redefined-builtin """Ragged compatible squeeze. If `input` is a `tf.Tensor`, then this calls `tf.squeeze`. If `input` is a `tf.RaggedTensor`, then this operation takes `O(N)` time, where `N` is the number of elements in the squeezed dimensions. Args: input: A potentially ragged tensor. The input to squeeze. axis: An optional list of ints. Defaults to `None`. If the `input` is ragged, it only squeezes the dimensions listed. It fails if `input` is ragged and axis is []. If `input` is not ragged it calls tf.squeeze. Note that it is an error to squeeze a dimension that is not 1. It must be in the range of [-rank(input), rank(input)). name: A name for the operation (optional). Returns: A potentially ragged tensor. Contains the same data as input, but has one or more dimensions of size 1 removed. """ with ops.name_scope(name, 'RaggedSqueeze', [input]): input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) if isinstance(input, ops.Tensor): return array_ops.squeeze(input, axis, name) if axis is None: raise ValueError('Ragged.squeeze must have an axis argument.') if isinstance(axis, int): axis = [axis] elif ((not isinstance(axis, (list, tuple))) or (not all(isinstance(d, int) for d in axis))): raise TypeError('Axis must be a list or tuple of integers.') dense_dims = [] ragged_dims = [] # Normalize all the dims in axis to be positive axis = [ragged_util.get_positive_axis(d, input.shape.ndims) for d in axis] for dim in axis: if dim > input.ragged_rank: dense_dims.append(dim - input.ragged_rank) else: ragged_dims.append(dim) # Make sure the specified ragged dimensions are squeezable. assertion_list = [] scalar_tensor_one = constant_op.constant(1, dtype=input.row_splits.dtype) for i, r in enumerate(input.nested_row_lengths()): if i + 1 in ragged_dims: assertion_list.append( control_flow_ops.Assert( math_ops.reduce_all(math_ops.equal(r, scalar_tensor_one)), ['the given axis (axis = %d) is not squeezable!' % (i + 1)])) if 0 in ragged_dims: scalar_tensor_two = constant_op.constant(2, dtype=dtypes.int32) assertion_list.append( control_flow_ops.Assert( math_ops.equal( array_ops.size(input.row_splits), scalar_tensor_two), ['the given axis (axis = 0) is not squeezable!'])) # Till now, we are sure that the ragged dimensions are squeezable. squeezed_rt = None squeezed_rt = control_flow_ops.with_dependencies(assertion_list, input.flat_values) if dense_dims: # Gives error if the dense dimension is not squeezable. squeezed_rt = array_ops.squeeze(squeezed_rt, dense_dims) remaining_row_splits = [] remaining_row_splits = list() for i, row_split in enumerate(input.nested_row_splits): # each row_splits tensor is for dimension #(i+1) . if (i + 1) not in ragged_dims: remaining_row_splits.append(row_split) # Take care of the first row if it is to be squeezed. if remaining_row_splits and 0 in ragged_dims: remaining_row_splits.pop(0) squeezed_rt = RaggedTensor.from_nested_row_splits(squeezed_rt, remaining_row_splits) # Corner case: when removing all the ragged dimensions and the output is # a scalar tensor e.g. ragged.squeeze(ragged.constant([[[1]]])). if set(range(0, input.ragged_rank + 1)).issubset(set(ragged_dims)): squeezed_rt = array_ops.squeeze(squeezed_rt, [0], name) return squeezed_rt
def _ragged_reduce_aggregate(reduce_op, unsorted_segment_op, rt_input, axis, keepdims, name=None): """Aggregates across axes of a RaggedTensor using the given `Tensor` ops. Reduces `rt_input` along the dimensions given in `axis`. The rank of the tensor is reduced by 1 for each entry in `axis`. If `axis` is not specified, then all dimensions are reduced, and a scalar value is returned. This op assumes that `reduce_op` and `unsorted_segment_op` are associative; if not, then reducing multiple axes will return incorrect results. (In particular, reducing multiple axes is currently implemented by reducing the axes one at a time.) Args: reduce_op: The tensorflow `op` that should be used to reduce values in uniform dimensions. Must have the same signature and basic behavior as `reduce_sum`, `reduce_max`, etc. unsorted_segment_op: The tensorflow `op` that should be used to combine values in ragged dimensions. Must have the same signature and basic behavior as `unsorted_segment_sum`, `unsorted_segment_max`, etc. rt_input: A `Tensor` or `RaggedTensor` containing the values to be reduced. axis: The axis or axes to reduce. May be `None` (to reduce all axes), an `int` (to reduce a single axis), a `list` or `tuple` of `int` (to reduce a given set of axes), or a `Tensor` with a constant value. Must be in the range `[0, rt_input.rank)`. keepdims: If true, retains reduced dimensions with length 1. name: A name prefix for the returned tensor (optional). Returns: A `RaggedTensor` containing the reduced values. The returned tensor has the same dtype as `data`, and its shape is given by removing the dimensions specified in `axis` from `rt_input.shape`. The `ragged_rank` of the returned tensor is given by substracting any ragged dimensions specified in `axis` from `rt_input.ragged_rank`. Raises: ValueError: If `axis` contains a `Tensor` whose value is not constant. """ if not ragged_tensor.is_ragged(rt_input): return reduce_op(rt_input, axis, name=name) if keepdims: raise ValueError('keepdims=True is not supported for RaggedTensors.') if isinstance(axis, ops.Tensor): axis = tensor_util.constant_value(axis) if axis is None: raise ValueError('axis must be known at graph construction time.') if isinstance(axis, np.ndarray): axis = axis.tolist() # When reducing all axes, just ignore splits & reduce the inner values. if axis is None: return reduce_op(rt_input.flat_values, None, name=name) with ops.name_scope(name, 'RaggedReduce', [rt_input, axis]): if isinstance(axis, (tuple, list)): if not axis: return rt_input elif len(axis) == 1: axis = axis[0] else: # When reducing multiple axes, just reduce one at a time. This is less # efficient, and only works for associative ops. (In particular, it # does not work for reduce_mean.) However, reducing multiple axes at # once will probably require a nontrivial c++ op. axis = sorted(axis) inner_reduced = _ragged_reduce_aggregate(reduce_op, unsorted_segment_op, rt_input, axis[-1], keepdims) return _ragged_reduce_aggregate(reduce_op, unsorted_segment_op, inner_reduced, axis[:-1], keepdims) rt_input = ragged_tensor.convert_to_tensor_or_ragged_tensor( rt_input, name='rt_input') axis = ragged_util.get_positive_axis(axis, rt_input.shape.ndims) if axis == 0: # out[i_1, i_2, ..., i_N] = sum_{j} rt_input[j, i_1, i_2, ..., i_N] row_lengths = rt_input.row_splits[1:] - rt_input.row_splits[:-1] num_segments = math_ops.maximum(math_ops.reduce_max(row_lengths), 0) segment_ids = range(row_lengths).values return _ragged_segment_aggregate(unsorted_segment_op, rt_input.values, segment_ids, num_segments) elif axis == 1: # out[i_0, i_1, i_2, ..., i_N] = sum_{j} rt_input[i_0, j, i_2, ..., i_N] num_segments = array_ops.shape(rt_input.row_splits)[0] - 1 segment_ids = segment_id_ops.row_splits_to_segment_ids( rt_input.row_splits) return _ragged_segment_aggregate(unsorted_segment_op, rt_input.values, segment_ids, num_segments) else: # out[i_0, ..., i_[axis-1], i_axis+1], ..., i_N] = # sum_{j} rt_input [i_0, ..., i_[axis-1], j, i_axis+1], ..., i_N] return rt_input.with_values( _ragged_reduce_aggregate(reduce_op, unsorted_segment_op, rt_input.values, axis - 1, keepdims))
def _ragged_stack_concat_helper(rt_inputs, axis, stack_values): """Helper function to concatenate or stack ragged tensors. Args: rt_inputs: A list of RaggedTensors or Tensors to combine. axis: The axis along which to concatenate or stack. stack_values: A boolean -- if true, then stack values; otherwise, concatenate them. Returns: A RaggedTensor. Raises: ValueError: If rt_inputs is empty, or if axis is out of range. """ # Validate parameters. if not rt_inputs: raise ValueError('rt_inputs may not be empty.') # Convert input tensors. rt_inputs = [ ragged_tensor.convert_to_tensor_or_ragged_tensor( rt_input, name='rt_input') for rt_input in rt_inputs ] row_splits_dtype, rt_inputs = ragged_tensor.match_row_splits_dtypes( *rt_inputs, return_dtype=True) rt_inputs = list(rt_inputs) # Special case: if there's only one input, then return it as-is. if len(rt_inputs) == 1: if stack_values: return ragged_array_ops.expand_dims(rt_inputs[0], axis=axis) else: return rt_inputs[0] # Check the rank (number of dimensions) of the input tensors. ndims = None for rt in rt_inputs: if ndims is None: ndims = rt.shape.ndims else: rt.shape.assert_has_rank(ndims) out_ndims = ndims if (ndims is None or not stack_values) else ndims + 1 axis = ragged_util.get_positive_axis(axis, out_ndims) # If all the inputs are Tensors, and we're combining the final dimension, # then we can delegate to the tf.stack/tf.concat operation, and return a # Tensor. if all(not ragged_tensor.is_ragged(rt) for rt in rt_inputs): if ndims is not None and (axis == out_ndims - 1 or axis == ndims - 1): if stack_values: return array_ops.stack(rt_inputs, axis) else: return array_ops.concat(rt_inputs, axis) # Convert any Tensor inputs to RaggedTensors. This makes it # possible to concatenate Tensors and RaggedTensors together. for i in range(len(rt_inputs)): if not ragged_tensor.is_ragged(rt_inputs[i]): rt_inputs[i] = ragged_tensor.RaggedTensor.from_tensor( rt_inputs[i], ragged_rank=1, row_splits_dtype=row_splits_dtype) # Convert the input tensors to all have the same ragged_rank. ragged_rank = max(max(rt.ragged_rank for rt in rt_inputs), 1) rt_inputs = [_increase_ragged_rank_to(rt, ragged_rank, row_splits_dtype) for rt in rt_inputs] if axis == 0: return _ragged_stack_concat_axis_0(rt_inputs, stack_values) elif axis == 1: return _ragged_stack_concat_axis_1(rt_inputs, stack_values) else: # axis > 1: recurse. values = [rt.values for rt in rt_inputs] splits = [[rt_input.row_splits] for rt_input in rt_inputs] with ops.control_dependencies(ragged_util.assert_splits_match(splits)): return ragged_tensor.RaggedTensor.from_row_splits( _ragged_stack_concat_helper(values, axis - 1, stack_values), splits[0][0], validate=False)
def where(condition, x=None, y=None, name=None): """Return the elements, either from `x` or `y`, depending on the `condition`. : If both `x` and `y` are `None`: Returns the coordinates of true elements of `condition`. The coordinates are returned in a 2-D tensor with shape `[num_true_values, dim_size(condition)]`, where `result[i]` is the coordinates of the `i`th true value (in row-major order). : If both `x` and `y` are non-`None`: Returns a tensor formed by selecting values from `x` where condition is true, and from `y` when condition is false. In particular: : If `condition`, `x`, and `y` all have the same shape: * `result[i1...iN] = x[i1...iN]` if `condition[i1...iN]` is true. * `result[i1...iN] = y[i1...iN]` if `condition[i1...iN]` is false. : Otherwise: * `condition` must be a vector. * `x` and `y` must have the same number of dimensions. * The outermost dimensions of `condition`, `x`, and `y` must all have the same size. * `result[i] = x[i]` if `condition[i]` is true. * `result[i] = y[i]` if `condition[i]` is false. Args: condition: A potentially ragged tensor of type `bool` x: A potentially ragged tensor (optional). y: A potentially ragged tensor (optional). Must be specified if `x` is specified. Must have the same rank and type as `x`. name: A name of the operation (optional) Returns: : If both `x` and `y` are `None`: A `Tensor` with shape `(num_true, dim_size(condition))`. : Otherwise: A potentially ragged tensor with the same type, rank, and outermost dimension size as `x` and `y`. `result.ragged_rank = max(x.ragged_rank, y.ragged_rank)`. Raises: ValueError: When exactly one of `x` or `y` is non-`None`; or when `condition`, `x`, and `y` have incompatible shapes. #### Examples: ```python >>> # Coordinates where condition is true. >>> condition = tf.ragged.constant_value( ... [[True, False, True], [False, True]]) >>> ragged.where(condition) [[0, 0], [0, 2], [1, 1]] >>> # Elementwise selection between x and y, based on condition. >>> condition = tf.ragged.constant_value( ... [[True, False, True], [False, True]]) >>> x = tf.ragged.constant_value([['A', 'B', 'C'], ['D', 'E']]) >>> y = tf.ragged.constant_value([['a', 'b', 'c'], ['d', 'e']]) >>> ragged.where(condition, x, y) [['A', 'b', 'C'], ['d', 'E']] >>> # Row selection between x and y, based on condition. >>> condition = [True, False] >>> x = tf.ragged.constant_value([['A', 'B', 'C'], ['D', 'E']]) >>> y = tf.ragged.constant_value([['a', 'b', 'c'], ['d', 'e']]) >>> ragged.where(condition, x, y) [['A', 'B', 'C'], ['d', 'e']] ``` """ if (x is None) != (y is None): raise ValueError('x and y must be either both None or both non-None') with ops.name_scope('RaggedWhere', name, [condition, x, y]): condition = ragged_tensor.convert_to_tensor_or_ragged_tensor( condition, name='condition') if x is None: return _coordinate_where(condition) else: x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x, name='x') y = ragged_tensor.convert_to_tensor_or_ragged_tensor(y, name='y') return _elementwise_where(condition, x, y)
def gather(params, indices, validate_indices=None, axis=0, batch_dims=0, name=None): """Gathers ragged slices from `params` axis `0` according to `indices`. Returns `RaggedTensor` output, such that: ```python output.shape = indices.shape + params.shape[1:] output.ragged_rank = indices.shape.ndims + params.ragged_rank output[i...j, d0...dn] = params[indices[i...j], d0...dn] ``` `params` may be ragged. `indices` may be ragged. `indices` must have dtype `int32` or `int64`. If any index is out of bounds, then an error is returned. Examples: ```python >>> params = tf.constant(['a', 'b', 'c', 'd', 'e']) >>> indices = tf.constant([3, 1, 2, 1, 0]) >>> ragged_params = tf.ragged.constant([['a', 'b', 'c'], ['d'], [], ['e']]) >>> ragged_indices = tf.ragged.constant([[3, 1, 2], [1], [], [0]]) >>> print ragged.gather(params, ragged_indices) [['d', 'b', 'c'], ['b'], [], ['a']] >>> print ragged.gather(ragged_params, indices) [['e'], ['d'], [], ['d'], ['a', 'b', 'c']] >>> print ragged.gather(ragged_params, ragged_indices) [[['e'], ['d'], []], [['d']], [], [['a', 'b', 'c']]] ``` Args: params: The potentially ragged tensor from which to gather values. Must be at least rank 1. indices: The potentially ragged tensor indicating which values to gather. Must have dtype `int32` or `int64`. Values must be in the range `[0, params.shape[0]]`. validate_indices: Ignored. axis: Must be zero. batch_dims: Must be zero. name: A name for the operation (optional). Returns: A `RaggedTensor`, where `output.dtype=params.dtype` and `output.shape=indices.shape + params.shape[1:]` and `output.ragged_rank=indices.shape.ndims + params.ragged_rank`. Raises: ValueError: If indices.shape.ndims is not known statically. """ del validate_indices if not isinstance(axis, int) or axis != 0: raise ValueError('axis != 0 is not supported for ragged gather yet.') if not isinstance(batch_dims, int) or batch_dims != 0: raise ValueError('batch_dims != 0 is not supported for ragged gather yet.') with ops.name_scope(name, 'RaggedGather', [params, indices]): params = ragged_tensor.convert_to_tensor_or_ragged_tensor( params, name='params') indices = ragged_tensor.convert_to_tensor_or_ragged_tensor( indices, name='indices') params, indices = ragged_tensor.match_row_splits_dtypes(params, indices) if ragged_tensor.is_ragged(indices): return indices.with_values(gather(params, indices.values)) if not ragged_tensor.is_ragged(params): return array_ops.gather(params, indices) indices = ops.convert_to_tensor(indices) if indices.shape.ndims is None: raise ValueError('indices.shape.ndims must be known statically') result = gen_ragged_array_ops.ragged_gather( indices=indices, params_dense_values=params.flat_values, params_nested_splits=params.nested_row_splits, OUTPUT_RAGGED_RANK=indices.shape.ndims + len(params.nested_row_splits) - 1) # Compose the RaggedTensor from splits & values. return ragged_tensor.RaggedTensor.from_nested_row_splits( result.output_dense_values, result.output_nested_splits, validate=False)
def map_fn(fn, elems, dtype=None, parallel_iterations=None, back_prop=True, swap_memory=False, infer_shape=True, name=None): """map on the list of tensors unpacked from `elems` on dimension 0. The simplest version of `map_fn` repeatedly applies the callable `fn` to a sequence of elements from first to last. The elements are made of the tensors unpacked from `elems`. `dtype` is the data type of the return value of `fn`. Users must provide `dtype` if it is different from the data type of `elems`. Suppose that `elems` is unpacked into `values`, a list of tensors. The shape of the result tensor is `[values.shape[0]] + fn(values[0]).shape`. This method also allows multi-arity `elems` and output of `fn`. If `elems` is a (possibly nested) list or tuple of tensors, then each of these tensors must have a matching first (unpack) dimension. The signature of `fn` may match the structure of `elems`. That is, if `elems` is `(t1, [t2, t3, [t4, t5]])`, then an appropriate signature for `fn` is: `fn = lambda (t1, [t2, t3, [t4, t5]]):`. Furthermore, `fn` may emit a different structure than its input. For example, `fn` may look like: `fn = lambda t1: return (t1 + 1, t1 - 1)`. In this case, the `dtype` parameter is not optional: `dtype` must be a type or (possibly nested) tuple of types matching the output of `fn`. To apply a functional operation to the nonzero elements of a SparseTensor one of the following methods is recommended. First, if the function is expressible as TensorFlow ops, use ```python result = SparseTensor(input.indices, fn(input.values), input.dense_shape) ``` If, however, the function is not expressible as a TensorFlow op, then use ```python result = SparseTensor( input.indices, map_fn(fn, input.values), input.dense_shape) ``` instead. When executing eagerly, map_fn does not execute in parallel even if `parallel_iterations` is set to a value > 1. You can still get the performance benefits of running a function in parallel by using the `tf.contrib.eager.defun` decorator, ```python # Assume the function being used in map_fn is fn. # To ensure map_fn calls fn in parallel, use the defun decorator. @tf.contrib.eager.defun def func(tensor): return tf.map_fn(fn, tensor) ``` Note that if you use the defun decorator, any non-TensorFlow Python code that you may have written in your function won't get executed. See `tf.contrib.eager.defun` for more details. The recommendation would be to debug without defun but switch to defun to get performance benefits of running map_fn in parallel. Args: fn: The callable to be performed. It accepts one argument, which will have the same (possibly nested) structure as `elems`. Its output must have the same structure as `dtype` if one is provided, otherwise it must have the same structure as `elems`. elems: A tensor or (possibly nested) sequence of tensors, each of which will be unpacked along their first dimension. The nested sequence of the resulting slices will be applied to `fn`. dtype: (optional) The output type(s) of `fn`. If `fn` returns a structure of Tensors differing from the structure of `elems`, then `dtype` is not optional and must have the same structure as the output of `fn`. Use `RaggedTensorType` to declare an output of type `RaggedTensor`. parallel_iterations: (optional) The number of iterations allowed to run in parallel. When graph building, the default value is 10. While executing eagerly, the default value is set to 1. back_prop: (optional) True enables support for back propagation. swap_memory: (optional) True enables GPU-CPU memory swapping. infer_shape: (optional) False disables tests for consistent output shapes. name: (optional) Name prefix for the returned tensors. Returns: A possibly nested sequence of potentially ragged tensors. Each tensor packs the results of applying `fn` to tensors unpacked from `elems` along the first dimension, from first to last. Raises: TypeError: if `fn` is not callable or the structure of the output of `fn` and `dtype` do not match, or if elems is a SparseTensor. ValueError: if the lengths of the output of `fn` and `dtype` do not match. #### Examples: ```python elems = np.array([1, 2, 3, 4, 5, 6]) squares = map_fn(lambda x: x * x, elems) # squares == [1, 4, 9, 16, 25, 36] ``` ```python elems = (np.array([1, 2, 3]), np.array([-1, 1, -1])) alternate = map_fn(lambda x: x[0] * x[1], elems, dtype=tf.int64) # alternate == [-1, 2, -3] ``` ```python elems = np.array([1, 2, 3]) alternates = map_fn(lambda x: (x, -x), elems, dtype=(tf.int64, tf.int64)) # alternates[0] == [1, 2, 3] # alternates[1] == [-1, -2, -3] ``` ```python elems=ragged.constant([[1, 2, 3], [4, 5], [6, 7]]) mean = map_fn(tf.reduce_mean, elems) # mean == [2, 4, 6] ``` ```python elems=ragged.constant([[1, 2, 3], [4, 5], [6, 7]], dtype=tf.int64) out = map_fn(fn=lambda x: x+1, elems, dtype=ragged.RaggedTensorType(type=tf.int64, ragged_rank=0)) # out = ragged.constant([[2, 3, 4], [5, 6], [7, 8]]) ``` """ if not callable(fn): raise TypeError("fn must be callable.") if isinstance(elems, sparse_tensor.SparseTensor): raise TypeError( "To perform a map on the values of a sparse tensor use either " " SparseTensor(input.indices, fn(input.values), input.dense_shape) or " " SparseTensor(input.indices, map_fn(fn, input.values), " "input.dense_shape)") in_graph_mode = not context.executing_eagerly() # Set the default number of parallel_iterations depending on graph/eager mode. if in_graph_mode and not parallel_iterations: parallel_iterations = 10 elif not in_graph_mode and not parallel_iterations: parallel_iterations = 1 if not in_graph_mode and parallel_iterations > 1: logging.log_first_n(logging.WARN, "Setting parallel_iterations > 1 has no " "effect when executing eagerly. Consider calling map_fn" " with tf.contrib.eager.defun to execute fn in " "parallel.", 1) parallel_iterations = 1 input_is_sequence = nest.is_sequence(elems) input_flatten = lambda x: nest.flatten(x) if input_is_sequence else [x] def input_pack(x): return nest.pack_sequence_as(elems, x) if input_is_sequence else x[0] elems_flat = input_flatten(elems) with ops.name_scope(name, "map", elems_flat): # TODO(akshayka): Remove the in_graph_mode check once caching devices are # supported in Eager if in_graph_mode: # Any get_variable calls in fn will cache the first call locally # and not issue repeated network I/O requests for each iteration. varscope = vs.get_variable_scope() varscope_caching_device_was_none = False if varscope.caching_device is None: # TODO(ebrevdo): Change to using colocate_with here and in other # methods. varscope.set_caching_device(lambda op: op.device) varscope_caching_device_was_none = True elems_flat = [ ragged_tensor.convert_to_tensor_or_ragged_tensor(elem, name="elem") for elem in elems_flat ] # We can either infer the output, or we can assume that it will be the same # as the input structure. dtype = dtype or input_pack([elem.dtype for elem in elems_flat]) # Find the number of iterations, n may be known statically. if isinstance(elems_flat[0], ragged_tensor.RaggedTensor): n = elems_flat[0].nrows(out_type=dtypes.int32) else: static_shape = elems_flat[0].shape if static_shape.ndims is not None and static_shape.ndims < 1: if len(elems_flat) == 1: raise ValueError( "elems must be a 1+ dimensional Tensor, not a scalar") else: raise ValueError( "elements in elems must be 1+ dimensional Tensors, not scalars") n = (tensor_shape.dimension_value(static_shape[0]) or array_ops.shape(elems_flat[0])[0]) n = math_ops.cast(n, dtype=dtypes.int32) # Create a flat list of TAs. # Flatten the dtype structure to a list. dtype_flat = nest.flatten(dtype) # decompose to components dtype_components = [_maybe_decompose_dtype(d) for d in dtype_flat] dtype_components_flat = nest.flatten(dtype_components) # Create TensorArrays. accs_ta = [ tensor_array_ops.TensorArray( dtype=t, dynamic_size=False, infer_shape=infer_shape, size=n) for t in dtype_components_flat ] i = constant_op.constant(0, dtype=dtypes.int32) def compute(i, tas): """The loop body of map_fn. Args: i: the loop counter tas: the flat TensorArray accumulator list Returns: (i + 1, tas): the updated counter + updated TensorArrays Raises: TypeError: if dtype and packed_fn_values structure do not match ValueType: if dtype and packed_fn_values lengths do not match """ # Get Tensors or RaggedTensors sliced at i, then pack it back to the # original structure. packed_values = input_pack([elem_flat[i] for elem_flat in elems_flat]) packed_fn_values = fn(packed_values) # Check that the structure of the output matches what was declared or # inferred. # nest.assert_same_structure(dtype or elems, packed_fn_values) # Flatten and decompose to a list of Tensors flat_fn_values = nest.flatten(packed_fn_values) # If we declared that we are expecting a RaggedTensor output, but we get a # Tensor output. We should try to convert it to a RaggedTensor. flat_fn_composite_tensors = list( _convert_declared(flat_fn_values, dtype_flat)) flat_fn_components = [ _maybe_decompose_tensor(t) for t in flat_fn_composite_tensors ] flat_fn_tensors = nest.flatten(flat_fn_components) # Write to TAs. tas = [ta.write(i, value) for (ta, value) in zip(tas, flat_fn_tensors)] return (i + 1, tas) _, r_a = control_flow_ops.while_loop( lambda i, _: i < n, compute, (i, accs_ta), parallel_iterations=parallel_iterations, back_prop=back_prop, swap_memory=swap_memory, maximum_iterations=n) # TODO(akshayka): Remove the in_graph_mode check once caching devices are # supported in Eager if in_graph_mode and varscope_caching_device_was_none: varscope.set_caching_device(None) # Pack back into a list of components results_as_components = nest.pack_sequence_as(dtype_components, r_a) # Stack TensorArrays for Tensor outputs, and concat RaggedTensor outputs. def _stack_or_concat(e): if isinstance(e, _RaggedTensorComponents): return _concat_ragged_tensor_components(e) else: result = e.stack() return result results_flat_components = [ _stack_or_concat(e) for e in results_as_components ] results_packed = [ _maybe_recompose_tensor(c) for c in results_flat_components ] results_packed = nest.pack_sequence_as(dtype, results_packed) return results_packed
def gather_nd(params, indices, batch_dims=0, name=None): """Gather slices from `params` using `n`-dimensional indices. This operation is similar to `gather`, but it uses the innermost dimension of `indices` to define a slice into `params`. In particular, if: * `indices` has shape `[A1...AN, I]` * `params` has shape `[B1...BM]` Then: * `result` has shape `[A1...AN, B_{I+1}...BM]`. * `result[a1...aN] = params[indices[a1...aN, :]]` Args: params: A potentially ragged tensor with shape `[A1...AN, I]`. indices: A potentially ragged tensor with shape `[B1...BM]`. batch_dims: Must be zero. name: A name for the operation (optional). Returns: A potentially ragged tensor with shape `[A1...AN, B_{I+1}...BM]`. #### Examples: ```python >>> params = tf.compat.v1.ragged.constant_value( ... [ [ ['000', '001'], ['010' ] ], ... [ ['100' ], ['110', '111', '112'], ['120'] ], ... [ [ ], ['210' ] ] ]) >>> # Gather 2D slices from a 3D tensor >>> ragged.gather_nd(params, [[2], [0]]) [ [ [ ], ['210'] ] [ ['000', '001'], ['010'] ] ] >>> # Gather 1D slices from a 3D tensor >>> ragged.gather_nd(params, [[2, 1], [0, 0]]) [['210'], ['000', '001']] >>> # Gather scalars from a 3D tensor >>> ragged.gather_nd(params, [[0, 0, 1], [1, 1, 2]]) ['001', '112'] ``` """ if not isinstance(batch_dims, int) or batch_dims != 0: raise ValueError('batch_dims != 0 is not supported for ragged gather yet.') if not (ragged_tensor.is_ragged(params) or ragged_tensor.is_ragged(indices)): return array_ops.gather_nd(params, indices, name) with ops.name_scope(name, 'RaggedGatherNd', [params, indices]): params = ragged_tensor.convert_to_tensor_or_ragged_tensor( params, name='params') indices = ragged_tensor.convert_to_tensor_or_ragged_tensor( indices, name='indices') params, indices = ragged_tensor.match_row_splits_dtypes(params, indices) indices_shape = indices.shape indices_ndims = indices_shape.ndims if indices_ndims is None: raise ValueError('indices.rank be statically known.') if indices_ndims == 0: raise ValueError('indices.rank must be at least 1.') if (ragged_tensor.is_ragged(indices) and indices_ndims == indices.ragged_rank + 1): raise ValueError('The innermost dimension of indices may not be ragged') # `index_size` is the "n" in "gather_nd" -- i.e., the number of dimensions # that each index slices into. index_size = tensor_shape.dimension_value(indices_shape[-1]) if index_size is None: raise ValueError('indices.shape[-1] must be statically known.') # If `indices` has more than 2 dimensions, then recurse. If `indices` is # dense, then we convert it to ragged before recursing, and then convert # the result back to `dense` if appropriate. if indices_ndims > 2: indices_is_dense = not ragged_tensor.is_ragged(indices) if indices_is_dense: indices = ragged_tensor.RaggedTensor.from_tensor( indices, ragged_rank=indices_ndims - 2, row_splits_dtype=params.row_splits.dtype) result = indices.with_flat_values(gather_nd(params, indices.flat_values)) if (indices_is_dense and ragged_tensor.is_ragged(result) and result.ragged_rank == indices_ndims - 2): result = ragged_tensor.RaggedTensor.to_tensor(result) return result # indices_ndims <= 2, and the innermost dimension of indices may not be # ragged, so `indices` must not be ragged. assert not ragged_tensor.is_ragged(indices) assert ragged_tensor.is_ragged(params) # Handle corner case: An empty index tuple selects the entire `params` # value. So if `index_size` is zero, then tile `params`. if index_size == 0: params_ndims = params.ragged_rank + array_ops.rank(params.flat_values) for dim in range(indices_ndims - 1): params = ragged_array_ops.expand_dims(params, axis=0) multiples = array_ops.concat([ array_ops.shape(indices)[:-1], array_ops.ones([params_ndims], dtypes.int32) ], axis=0) return ragged_array_ops.tile(params, multiples) # When index_size=1, we can just flatten the index tuples and use gather. elif index_size == 1: flattened_index_tuples = array_ops.reshape(indices, [-1]) return gather(params, flattened_index_tuples) # Otherwise, params is a RaggedTensor, and indices is a 1D or 2D Tensor. # Flatten both the index tuples and the params, such that the flattened # index tuples point to the correct values in the flattened params; and # then use ragged.gather on the flattened index tuples & params. else: indices = math_ops.cast(indices, params.row_splits.dtype) # Flatten the outermost 2 dimensions of the index tuples & params. flattened_index_tuples = array_ops.gather(params.row_splits, indices[..., 0]) flattened_index_tuples += indices[..., 1] flattened_params = params.values # Flatten any remaining dimensions. for dim in range(2, index_size): if not ragged_tensor.is_ragged(flattened_params): flattened_index_tuples = array_ops.expand_dims( flattened_index_tuples, axis=1) flattened_index_tuples = array_ops.concat( [flattened_index_tuples, indices[..., dim:]], axis=1) return array_ops.gather_nd(flattened_params, flattened_index_tuples) flattened_index_tuples = array_ops.gather( flattened_params.row_starts(), flattened_index_tuples) flattened_index_tuples += indices[..., dim] flattened_params = flattened_params.values # Gather using the flattened index tuples and params. return gather(flattened_params, flattened_index_tuples)
def strings_split_v1(input=None, sep=None, maxsplit=-1, # pylint: disable=redefined-builtin result_type="SparseTensor", source=None, name=None): """Split elements of `input` based on `sep`. Let N be the size of `input` (typically N will be the batch size). Split each element of `input` based on `sep` and return a `SparseTensor` or `RaggedTensor` containing the split tokens. Empty tokens are ignored. Examples: ```python >>> tf.strings.split(['hello world', 'a b c']) tf.SparseTensor(indices=[[0, 0], [0, 1], [1, 0], [1, 1], [1, 2]], values=['hello', 'world', 'a', 'b', 'c'] dense_shape=[2, 3]) >>> tf.strings.split(['hello world', 'a b c'], result_type="RaggedTensor") <tf.RaggedTensor [['hello', 'world'], ['a', 'b', 'c']]> ``` If `sep` is given, consecutive delimiters are not grouped together and are deemed to delimit empty strings. For example, `input` of `"1<>2<><>3"` and `sep` of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty string, consecutive whitespace are regarded as a single separator, and the result will contain no empty strings at the start or end if the string has leading or trailing whitespace. Note that the above mentioned behavior matches python's str.split. Args: input: A string `Tensor` of rank `N`, the strings to split. If `rank(input)` is not known statically, then it is assumed to be `1`. sep: `0-D` string `Tensor`, the delimiter character. maxsplit: An `int`. If `maxsplit > 0`, limit of the split of the result. result_type: The tensor type for the result: one of `"RaggedTensor"` or `"SparseTensor"`. source: alias for "input" argument. name: A name for the operation (optional). Raises: ValueError: If sep is not a string. Returns: A `SparseTensor` or `RaggedTensor` of rank `N+1`, the strings split according to the delimiter. """ input = deprecation.deprecated_argument_lookup( "input", input, "source", source) with ops.name_scope(name, "StringSplit", [input]): input = ragged_tensor.convert_to_tensor_or_ragged_tensor( input, dtype=dtypes.string, name="input") if result_type == "SparseTensor" and input.shape.rank == 1: return string_ops.string_split_v2(input, sep=sep, maxsplit=maxsplit) ragged_result = string_split_v2(input, sep=sep, maxsplit=maxsplit) if result_type == "SparseTensor": return ragged_result.to_sparse() elif result_type == "RaggedTensor": return ragged_result else: raise ValueError("result_type must be 'RaggedTensor' or 'SparseTensor'.")
def batch_gather(params, indices, name=None): """Gathers slices from `params` according to `indices` with batch dims. This operation is similar to `gather`, but it assumes that the leading `N` dimensions of `indices` and `params` are batch dimensions, and performs a gather within each batch. In particular, when using this operation with `N` batch dimensions `B1...BN`: * `indices` has shape `[B1...BN, I]` * `params` has shape `[B1...BN, P1...PM]`. * `result` has shape `[B1...BN, I, P2...PM]`. * `result[b1...bN, i, p2...pM] = params[b1...bN, indices[b1...bN, i], p2...pM]` Args: params: A potentially ragged tensor with shape `[B1...BN, P1...PM]` (`N>=0`, `M>0`). indices: A potentially ragged tensor with shape `[B1...BN, I]` (`N>=0`). name: A name for the operation (optional). Returns: A potentially ragged tensor with shape `[B1...BN, I, P2...PM]`. `result.ragged_rank = max(indices.ragged_rank, params.ragged_rank)`. #### Example: ```python >>> params = tf.ragged.constant([['a', 'b', 'c'], ['d'], [], ['e']]) >>> indices = tf.ragged.constant([[1, 2, 0], [], [], [0, 0]]) >>> tf.compat.v1.batch_gather(params, indices) [['b', 'c', 'a'], [], [], ['e', 'e']] ``` """ if not (ragged_tensor.is_ragged(params) or ragged_tensor.is_ragged(indices)): return array_ops.batch_gather(params, indices, name) with ops.name_scope(name, 'RaggedBatchGather', [params, indices]): params = ragged_tensor.convert_to_tensor_or_ragged_tensor( params, name='params') indices = ragged_tensor.convert_to_tensor_or_ragged_tensor( indices, name='indices') params, indices = ragged_tensor.match_row_splits_dtypes(params, indices) indices_ndims = indices.shape.ndims if indices_ndims is None: raise ValueError( 'batch_gather does not allow indices with unknown shape.') if indices_ndims == 0: raise ValueError('indices.rank must be at least 1.') if ragged_tensor.is_ragged(indices): # If the outermost ragged dimension is a batch dimension, recurse. if indices_ndims > 2: if not ragged_tensor.is_ragged(params): raise ValueError('batch shape from indices does ' 'not match params shape') checks = [check_ops.assert_equal(params.row_splits, indices.row_splits)] with ops.control_dependencies(checks): return ragged_tensor.RaggedTensor.from_row_splits( batch_gather(params.values, indices.values), indices.row_splits, validate=False) # Otherwise, indices is a 2D ragged tensor with 1 ragged dimension. else: # Ensure that `params` is ragged and has at least 2 dimensions. if not ragged_tensor.is_ragged(params): if params.shape.ndims is not None and params.shape.ndims < 2: raise ValueError('batch shape from indices does ' 'not match params shape') params = ragged_tensor.RaggedTensor.from_tensor( params, ragged_rank=1, row_splits_dtype=indices.row_splits.dtype) # Adjust indices from within-batch to global (in params.values), and # then use ragged.gather to gather them. num_indices = indices.row_lengths() params_starts = params.row_starts() adjustments = ragged_util.repeat(params_starts, num_indices, axis=0) adjusted_index_values = ( math_ops.cast(indices.values, adjustments.dtype) + adjustments) return ragged_tensor.RaggedTensor.from_row_splits( ragged_gather_ops.gather(params.values, adjusted_index_values), indices.row_splits, validate=False) else: # params is a RaggedTensor and indices is a Tensor. if indices_ndims == 1: return ragged_gather_ops.gather(params, indices) elif indices_ndims == 2: # Adjust indices from batch-local to global (in params.values) adjustments = array_ops.expand_dims(params.row_starts(), 1) adjusted_indices = ( math_ops.cast(indices, adjustments.dtype) + adjustments) return ragged_gather_ops.gather(params.values, adjusted_indices) else: raise ValueError('batch shape from indices does not match params shape')
def testElementwiseOpBroadcast(self, x, y, expected): x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x, dtype=dtypes.int32) y = ragged_tensor.convert_to_tensor_or_ragged_tensor(y, dtype=dtypes.int32) result = x + y self.assertRaggedEqual(result, expected)
def batch_gather_with_default(params, indices, default_value='', name=None): """Same as `batch_gather` but inserts `default_value` for invalid indices. This operation is similar to `batch_gather` except that it will substitute the value for invalid indices with `default_value` as the contents. See `batch_gather` for more details. Args: params: A potentially ragged tensor with shape `[B1...BN, P1...PM]` (`N>=0`, `M>0`). indices: A potentially ragged tensor with shape `[B1...BN, I]` (`N>=0`). default_value: A value to be inserted in places where `indices` are out of bounds. Must be the same dtype as params and either a scalar or rank 1. name: A name for the operation (optional). Returns: A potentially ragged tensor with shape `[B1...BN, I, P2...PM]`. `result.ragged_rank = max(indices.ragged_rank, params.ragged_rank)`. #### Example: ```python >>> params = tf.ragged.constant([ ['a', 'b', 'c'], ['d'], [], ['e']]) >>> indices = tf.ragged.constant([[1, 2, -1], [], [], [0, 10]]) >>> batch_gather_with_default(params, indices, 'FOO') [['b', 'c', 'FOO'], [], [], ['e', 'FOO']] ``` """ with ops.name_scope(name, 'RaggedBatchGatherWithDefault'): params = ragged_tensor.convert_to_tensor_or_ragged_tensor( params, name='params', ) indices = ragged_tensor.convert_to_tensor_or_ragged_tensor( indices, name='indices', ) default_value = ragged_tensor.convert_to_tensor_or_ragged_tensor( default_value, name='default_value', ) # TODO(hterry): lift this restriction and support default_values of # of rank > 1 if (default_value.shape.ndims is not 0 and default_value.shape.ndims is not 1): raise ValueError('"default_value" must be a scalar or vector') upper_bounds = None if indices.shape.ndims is None: raise ValueError('Indices must have a known rank.') if params.shape.ndims is None: raise ValueError('Params must have a known rank.') num_batch_dimensions = indices.shape.ndims - 1 pad = None # The logic for this works as follows: # - create a padded params, where: # padded_params[b1...bn, 0] = default_value # padded_params[b1...bn, i] = params[b1...bn, i-1] (i>0) # - create an `upper_bounds` Tensor that contains the number of elements # in each innermost rank. Broadcast `upper_bounds` to be the same shape # as `indices`. # - check to see which index in `indices` are out of bounds and substitute # it with the index containing `default_value` (the first). # - call batch_gather with the indices adjusted. with ops.control_dependencies([ check_ops.assert_greater_equal(array_ops.rank(params), array_ops.rank(indices))]): if ragged_tensor.is_ragged(params): row_lengths = ragged_array_ops.expand_dims( params.row_lengths(axis=num_batch_dimensions), axis=-1) upper_bounds = math_ops.cast(row_lengths, indices.dtype) pad_shape = _get_pad_shape(params, indices) pad = ragged_tensor_shape.broadcast_to( default_value, pad_shape) else: params_shape = array_ops.shape(params) pad_shape = array_ops.concat([ params_shape[:num_batch_dimensions], [1], params_shape[num_batch_dimensions + 1:params.shape.ndims] ], 0) upper_bounds = params_shape[num_batch_dimensions] pad = array_ops.broadcast_to(default_value, pad_shape) # Add `default_value` as the first value in the innermost (ragged) rank. pad = math_ops.cast(pad, params.dtype) padded_params = array_ops.concat( [pad, params], axis=num_batch_dimensions) # Adjust the indices by substituting out-of-bound indices to the # default-value index (which is the first element) shifted_indices = indices + 1 is_out_of_bounds = (indices < 0) | (indices > upper_bounds) adjusted_indices = ragged_where_op.where( is_out_of_bounds, x=array_ops.zeros_like(indices), y=shifted_indices, ) return array_ops.batch_gather( params=padded_params, indices=adjusted_indices, name=name)