Exemplos de RaggedTensor.from_row_lengths em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: tensorflow.python.ops.ragged.ragged_tensor

Classe / Tipo: RaggedTensor

Método / Função: from_row_lengths

Exemplos em hotexamples.com: 2

RaggedTensor.from_row_lengths em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de tensorflow.python.ops.ragged.ragged_tensor.RaggedTensor.from_row_lengths em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

from_nested_row_splits(12)

from_row_splits(12)

from_tensor(10)

from_value_rowids(8)

from_sparse(4)

from_uniform_row_length(3)

from_row_lengths(2)

from_row_limits(1)

from_row_starts(1)

Métodos Frequentes

from_nested_row_splits (12)

from_row_splits (12)

from_tensor (10)

from_value_rowids (8)

from_sparse (4)

from_uniform_row_length (3)

from_row_lengths (2)

from_row_limits (1)

from_row_starts (1)

Exemplo n.º 1

0

Exibir arquivo

def testConstruction(self): tensor_values = constant_op.constant( ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']) values = WrappedTensor(tensor_values) row_splits = constant_op.constant([0, 2, 2, 5, 6, 8], dtypes.int64) rt = RaggedTensor.from_row_splits(values, row_splits) self.assertIsInstance(rt.values, WrappedTensor) self.assertAllEqual(rt.values.value, tensor_values) self.assertAllEqual(rt.row_splits, row_splits) row_starts = constant_op.constant([0, 2, 2, 5, 6], dtypes.int64) rt = RaggedTensor.from_row_starts(values, row_starts) self.assertIsInstance(rt.values, WrappedTensor) self.assertAllEqual(rt.values.value, tensor_values) self.assertAllEqual(rt.row_starts(), row_starts) row_limits = constant_op.constant([2, 2, 5, 6, 8], dtypes.int64) rt = RaggedTensor.from_row_limits(values, row_limits) self.assertIsInstance(rt.values, WrappedTensor) self.assertAllEqual(rt.values.value, tensor_values) self.assertAllEqual(rt.row_limits(), row_limits) row_lengths = constant_op.constant([2, 0, 3, 1, 2], dtypes.int64) rt = RaggedTensor.from_row_lengths(values, row_lengths) self.assertIsInstance(rt.values, WrappedTensor) self.assertAllEqual(rt.values.value, tensor_values) self.assertAllEqual(rt.row_lengths(), row_lengths) rt = RaggedTensor.from_uniform_row_length(values, 4) self.assertIsInstance(rt.values, WrappedTensor) self.assertAllEqual(rt.values.value, tensor_values) self.assertAllEqual(rt.uniform_row_length, 4)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: wordpiece_tokenizer.py Projeto: fendaq/text-1

def tokenize_with_offsets(self, input): # pylint: disable=redefined-builtin """Tokenizes utf-8 encoded tokens into subword pieces based off of a vocab. ### Example: ```python >>> tokens = [["they're", "the", "greatest"]], >>> tokenizer = WordpieceTokenizer(vocab, token_out_type=tf.string) >>> result = tokenizer.tokenize_with_offsets(tokens) >>> result[0].to_list() # subwords [[['they', "##'", '##re'], ['the'], ['great', '##est']]] >>> result[1].to_list() # offset starts [[[0, 4, 5], [0], [0, 5]]] >>> result[2].to_list() # offset limits [[[4, 5, 7], [3], [5, 8]]] ``` Args: input: An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings. Returns: A tuple of `RaggedTensor`s `tokens`, `start_offsets`, and `limit_offsets` where: * `tokens[i1...iN, j]` is the string contents, or ID in the vocab_lookup_table representing that string, of the `j`th token in `input[i1...iN]` * `start_offsets[i1...iN, j]` is the byte offset for the start of the `j`th token in `input[i1...iN]` * `limit_offsets[i1...iN, j]` is the byte offset for the end of the """ name = None if not isinstance(self._vocab_lookup_table, lookup_ops.LookupInterface): raise TypeError('vocab_lookup_table must be a LookupInterface') with ops.name_scope( name, 'WordpieceTokenizeWithOffsets', [input, self._vocab_lookup_table, self._suffix_indicator]): # Check that the types are expected and the ragged rank is appropriate. tokens = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) rank = tokens.shape.ndims if rank is None: raise ValueError('input must have a known rank.') if rank == 0: wordpieces, starts, limits = self.tokenize_with_offsets( array_ops.stack([tokens])) return wordpieces.values, starts.values, limits.values elif rank > 1: if not ragged_tensor.is_ragged(tokens): tokens = ragged_tensor.RaggedTensor.from_tensor( tokens, ragged_rank=rank - 1) wordpieces, starts, limits = self.tokenize_with_offsets( tokens.flat_values) wordpieces = wordpieces.with_row_splits_dtype( tokens.row_splits.dtype) starts = starts.with_row_splits_dtype(tokens.row_splits.dtype) limits = limits.with_row_splits_dtype(tokens.row_splits.dtype) return (tokens.with_flat_values(wordpieces), tokens.with_flat_values(starts), tokens.with_flat_values(limits)) # Tokenize the tokens into subwords values, row_lengths, starts, limits = ( gen_wordpiece_tokenizer.wordpiece_tokenize_with_offsets( input_values=tokens, vocab_lookup_table=self._vocab_lookup_table. resource_handle, suffix_indicator=self._suffix_indicator, use_unknown_token=self._use_unknown_token, max_bytes_per_word=self._max_bytes_per_word, unknown_token=self._unknown_token, )) # If ids are desired, look them up in the vocab table. Otherwise just # return the string values. if self._token_out_type == dtypes.int64: values = self._vocab_lookup_table.lookup(values) wordpieces = RaggedTensor.from_row_lengths(values, row_lengths) starts = RaggedTensor.from_row_lengths(starts, row_lengths) limits = RaggedTensor.from_row_lengths(limits, row_lengths) return wordpieces, starts, limits