def _flat_map_fn(x): return dataset_ops.Dataset.from_tensor_slices( ragged_conversion_ops.to_tensor(x))
def find_source_offsets(offsets_map, input_offsets, name=None): """Maps the input post-normalized string offsets to pre-normalized offsets. Returns the source (i.e. pre-normalized) string offsets mapped from the input post-normalized string offsets using the input offsets_map, which is an output from the `normalize_utf8_with_offsets_map` op. offsets_map can be indexed or sliced along with the input_offsets. Example usage: >>> post_normalized_str, offsets_map = normalize_utf8_with_offsets_map( ... ["株式会社", "KADOKAWA"]) >>> find_source_offsets(offsets_map, [[0, 1, 2], [0, 1, 2]]) <tf.Tensor: shape=(2, 3), dtype=int64, numpy=array([[0, 1, 2], [0, 3, 6]])> >>> find_source_offsets(offsets_map[1], [[0, 1, 2]]) # indexed offsets_map <tf.Tensor: shape=(1, 3), dtype=int64, numpy=array([[0, 3, 6]])> Args: offsets_map: A `Tensor` or `RaggedTensor` of type `variant`, used to map the post-normalized string offsets to pre-normalized string offsets. offsets_map is an output from `normalize_utf8_with_offsets_map` function. input_offsets: A `Tensor` or `RaggedTensor` of type int64 representing the the post-normalized string offsets, name: The name for this op (optional). Returns: results: A `Tensor` or `RaggedTensor` of type int64, with pre-normalized string offsets. """ with ops.name_scope(name, "FindSourceOffsets", [offsets_map, input_offsets]): offsets_map_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor( offsets_map, dtype=dtypes.variant) input_offsets_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor( input_offsets, dtype=dtypes.int64) if ragged_tensor.is_ragged(input_offsets_tensor): if ragged_tensor.is_ragged(offsets_map_tensor): offsets_map_values = offsets_map_tensor.flat_values else: offsets_map_values = array_ops.reshape(offsets_map_tensor, [-1]) output_values = gen_normalize_ops.find_source_offsets( offsets_map=offsets_map_values, input_offsets_values=input_offsets_tensor.flat_values, input_offsets_splits=input_offsets_tensor.nested_row_splits[-1]) return input_offsets_tensor.with_flat_values(output_values) else: if input_offsets_tensor.shape.ndims > 1: output_offsets = find_source_offsets( offsets_map, ragged_conversion_ops.from_tensor( input_offsets_tensor, ragged_rank=input_offsets_tensor.shape.ndims - 1)) return ragged_conversion_ops.to_tensor(output_offsets) elif input_offsets_tensor.shape.ndims == 0: output_offsets = find_source_offsets( offsets_map, array_ops.expand_dims(input_offsets_tensor, 0)) return output_offsets[0] else: output_offsets = find_source_offsets( offsets_map, array_ops.expand_dims(input_offsets_tensor, 0)) return array_ops.squeeze(output_offsets, [0])
def test_passing_empty(self, input_list, squeeze_ranks=None): rt = ragged_squeeze_op.squeeze(ragged_factory_ops.constant(input_list), squeeze_ranks) dt = array_ops.squeeze(constant_op.constant(input_list), squeeze_ranks) self.assertRaggedEqual(ragged_conversion_ops.to_tensor(rt), dt)
def gather_nd(params, indices, name=None): """Gather slices from `params` using `n`-dimensional indices. This operation is similar to `gather`, but it uses the innermost dimension of `indices` to define a slice into `params`. In particular, if: * `indices` has shape `[A1...AN, I]` * `params` has shape `[B1...BM]` Then: * `result` has shape `[A1...AN, B_{I+1}...BM]`. * `result[a1...aN] = params[indices[a1...aN, :]]` Args: params: A potentially ragged tensor with shape `[A1...AN, I]`. indices: A potentially ragged tensor with shape `[B1...BM]`. name: A name for the operation (optional). Returns: A potentially ragged tensor with shape `[A1...AN, B_{I+1}...BM]`. #### Examples: ```python >>> params = tf.ragged.constant_value( ... [ [ ['000', '001'], ['010' ] ], ... [ ['100' ], ['110', '111', '112'], ['120'] ], ... [ [ ], ['210' ] ] ]) >>> # Gather 2D slices from a 3D tensor >>> ragged.gather_nd(params, [[2], [0]]) [ [ [ ], ['210'] ] [ ['000', '001'], ['010'] ] ] >>> # Gather 1D slices from a 3D tensor >>> ragged.gather_nd(params, [[2, 1], [0, 0]]) [['210'], ['000', '001']] >>> # Gather scalars from a 3D tensor >>> ragged.gather_nd(params, [[0, 0, 1], [1, 1, 2]]) ['001', '112'] ``` """ if not (ragged_tensor.is_ragged(params) or ragged_tensor.is_ragged(indices)): return array_ops.gather_nd(params, indices, name) with ops.name_scope(name, 'RaggedGatherNd', [params, indices]): params = ragged_tensor.convert_to_tensor_or_ragged_tensor( params, name='params') indices = ragged_tensor.convert_to_tensor_or_ragged_tensor( indices, name='indices') indices_shape = indices.shape indices_ndims = indices_shape.ndims if indices_ndims is None: raise ValueError('indices.rank be statically known.') if indices_ndims == 0: raise ValueError('indices.rank must be at least 1.') if (ragged_tensor.is_ragged(indices) and indices_ndims == indices.ragged_rank + 1): raise ValueError('The innermost dimension of indices may not be ragged') # `index_size` is the "n" in "gather_nd" -- i.e., the number of dimensions # that each index slices into. index_size = tensor_shape.dimension_value(indices_shape[-1]) if index_size is None: raise ValueError('indices.shape[-1] must be statically known.') # If `indices` has more than 2 dimensions, then recurse. If `indices` is # dense, then we convert it to ragged before recursing, and then convert # the result back to `dense` if appropriate. if indices_ndims > 2: indices_is_dense = not ragged_tensor.is_ragged(indices) if indices_is_dense: indices = ragged_conversion_ops.from_tensor( indices, ragged_rank=indices_ndims - 2) result = indices.with_flat_values(gather_nd(params, indices.flat_values)) if (indices_is_dense and ragged_tensor.is_ragged(result) and result.ragged_rank == indices_ndims - 2): result = ragged_conversion_ops.to_tensor(result) return result # indices_ndims <= 2, and the innermost dimension of indices may not be # ragged, so `indices` must not be ragged. assert not ragged_tensor.is_ragged(indices) assert ragged_tensor.is_ragged(params) # Handle corner case: An empty index tuple selects the entire `params` # value. So if `index_size` is zero, then tile `params`. if index_size == 0: params_ndims = params.ragged_rank + array_ops.rank(params.flat_values) for dim in range(indices_ndims - 1): params = expand_dims(params, axis=0) multiples = array_ops.concat([ array_ops.shape(indices)[:-1], array_ops.ones([params_ndims], dtypes.int32) ], axis=0) return tile(params, multiples) # When index_size=1, we can just flatten the index tuples and use gather. elif index_size == 1: flattened_index_tuples = array_ops.reshape(indices, [-1]) return gather(params, flattened_index_tuples) # Otherwise, params is a RaggedTensor, and indices is a 1D or 2D Tensor. # Flatten both the index tuples and the params, such that the flattened # index tuples point to the correct values in the flattened params; and # then use ragged.gather on the flattened index tuples & params. else: indices = math_ops.to_int64(indices) # Flatten the outermost 2 dimensions of the index tuples & params. flattened_index_tuples = array_ops.gather(params.row_splits, indices[..., 0]) flattened_index_tuples += indices[..., 1] flattened_params = params.values # Flatten any remaining dimensions. for dim in range(2, index_size): if not ragged_tensor.is_ragged(flattened_params): flattened_index_tuples = array_ops.expand_dims( flattened_index_tuples, axis=1) flattened_index_tuples = array_ops.concat( [flattened_index_tuples, indices[..., dim:]], axis=1) return array_ops.gather_nd(flattened_params, flattened_index_tuples) flattened_index_tuples = array_ops.gather( flattened_params.row_starts(), flattened_index_tuples) flattened_index_tuples += indices[..., dim] flattened_params = flattened_params.values # Gather using the flattened index tuples and params. return gather(flattened_params, flattened_index_tuples)
def test_passing_simple_from_dense(self, input_list, squeeze_ranks=None): dt = constant_op.constant(input_list) rt = ragged_conversion_ops.from_tensor(dt) rt_s = ragged_squeeze_op.squeeze(rt, squeeze_ranks) dt_s = array_ops.squeeze(dt, squeeze_ranks) self.assertRaggedEqual(ragged_conversion_ops.to_tensor(rt_s), dt_s)
def _flat_map_fn(x): return dataset_ops.Dataset.from_tensor_slices( ragged_conversion_ops.to_tensor(x))
def gather_nd(params, indices, name=None): """Gather slices from `params` using `n`-dimensional indices. This operation is similar to `gather`, but it uses the innermost dimension of `indices` to define a slice into `params`. In particular, if: * `indices` has shape `[A1...AN, I]` * `params` has shape `[B1...BM]` Then: * `result` has shape `[A1...AN, B_{I+1}...BM]`. * `result[a1...aN] = params[indices[a1...aN, :]]` Args: params: A potentially ragged tensor with shape `[A1...AN, I]`. indices: A potentially ragged tensor with shape `[B1...BM]`. name: A name for the operation (optional). Returns: A potentially ragged tensor with shape `[A1...AN, B_{I+1}...BM]`. #### Examples: ```python >>> params = tf.ragged.constant_value( ... [ [ ['000', '001'], ['010' ] ], ... [ ['100' ], ['110', '111', '112'], ['120'] ], ... [ [ ], ['210' ] ] ]) >>> # Gather 2D slices from a 3D tensor >>> ragged.gather_nd(params, [[2], [0]]) [ [ [ ], ['210'] ] [ ['000', '001'], ['010'] ] ] >>> # Gather 1D slices from a 3D tensor >>> ragged.gather_nd(params, [[2, 1], [0, 0]]) [['210'], ['000', '001']] >>> # Gather scalars from a 3D tensor >>> ragged.gather_nd(params, [[0, 0, 1], [1, 1, 2]]) ['001', '112'] ``` """ if not (ragged_tensor.is_ragged(params) or ragged_tensor.is_ragged(indices)): return array_ops.gather_nd(params, indices, name) with ops.name_scope(name, 'RaggedGatherNd', [params, indices]): params = ragged_tensor.convert_to_tensor_or_ragged_tensor( params, name='params') indices = ragged_tensor.convert_to_tensor_or_ragged_tensor( indices, name='indices') indices_shape = indices.shape indices_ndims = indices_shape.ndims if indices_ndims is None: raise ValueError('indices.rank be statically known.') if indices_ndims == 0: raise ValueError('indices.rank must be at least 1.') if (ragged_tensor.is_ragged(indices) and indices_ndims == indices.ragged_rank + 1): raise ValueError('The innermost dimension of indices may not be ragged') # `index_size` is the "n" in "gather_nd" -- i.e., the number of dimensions # that each index slices into. index_size = tensor_shape.dimension_value(indices_shape[-1]) if index_size is None: raise ValueError('indices.shape[-1] must be statically known.') # If `indices` has more than 2 dimensions, then recurse. If `indices` is # dense, then we convert it to ragged before recursing, and then convert # the result back to `dense` if appropriate. if indices_ndims > 2: indices_is_dense = not ragged_tensor.is_ragged(indices) if indices_is_dense: indices = ragged_conversion_ops.from_tensor( indices, ragged_rank=indices_ndims - 2) result = indices.with_flat_values(gather_nd(params, indices.flat_values)) if (indices_is_dense and ragged_tensor.is_ragged(result) and result.ragged_rank == indices_ndims - 2): result = ragged_conversion_ops.to_tensor(result) return result # indices_ndims <= 2, and the innermost dimension of indices may not be # ragged, so `indices` must not be ragged. assert not ragged_tensor.is_ragged(indices) assert ragged_tensor.is_ragged(params) # Handle corner case: An empty index tuple selects the entire `params` # value. So if `index_size` is zero, then tile `params`. if index_size == 0: params_ndims = params.ragged_rank + array_ops.rank(params.flat_values) for dim in range(indices_ndims - 1): params = ragged_array_ops.expand_dims(params, axis=0) multiples = array_ops.concat([ array_ops.shape(indices)[:-1], array_ops.ones([params_ndims], dtypes.int32) ], axis=0) return ragged_array_ops.tile(params, multiples) # When index_size=1, we can just flatten the index tuples and use gather. elif index_size == 1: flattened_index_tuples = array_ops.reshape(indices, [-1]) return gather(params, flattened_index_tuples) # Otherwise, params is a RaggedTensor, and indices is a 1D or 2D Tensor. # Flatten both the index tuples and the params, such that the flattened # index tuples point to the correct values in the flattened params; and # then use ragged.gather on the flattened index tuples & params. else: indices = math_ops.to_int64(indices) # Flatten the outermost 2 dimensions of the index tuples & params. flattened_index_tuples = array_ops.gather(params.row_splits, indices[..., 0]) flattened_index_tuples += indices[..., 1] flattened_params = params.values # Flatten any remaining dimensions. for dim in range(2, index_size): if not ragged_tensor.is_ragged(flattened_params): flattened_index_tuples = array_ops.expand_dims( flattened_index_tuples, axis=1) flattened_index_tuples = array_ops.concat( [flattened_index_tuples, indices[..., dim:]], axis=1) return array_ops.gather_nd(flattened_params, flattened_index_tuples) flattened_index_tuples = array_ops.gather( flattened_params.row_starts(), flattened_index_tuples) flattened_index_tuples += indices[..., dim] flattened_params = flattened_params.values # Gather using the flattened index tuples and params. return gather(flattened_params, flattened_index_tuples)
def test_passing_empty(self, input_list, squeeze_ranks=None): rt = ragged_squeeze_op.squeeze( ragged_factory_ops.constant(input_list), squeeze_ranks) dt = array_ops.squeeze(constant_op.constant(input_list), squeeze_ranks) self.assertRaggedEqual(ragged_conversion_ops.to_tensor(rt), dt)
def test_passing_simple_from_dense(self, input_list, squeeze_ranks=None): dt = constant_op.constant(input_list) rt = ragged_conversion_ops.from_tensor(dt) rt_s = ragged_squeeze_op.squeeze(rt, squeeze_ranks) dt_s = array_ops.squeeze(dt, squeeze_ranks) self.assertRaggedEqual(ragged_conversion_ops.to_tensor(rt_s), dt_s)