from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensor from tensorflow_text.python.ops.tokenization import TokenizerWithOffsets # pylint: disable=g-bad-import-order from tensorflow.python.framework import load_library from tensorflow.python.platform import resource_loader gen_wordpiece_tokenizer = load_library.load_op_library( resource_loader.get_path_to_datafile('_wordpiece_tokenizer.so')) _tf_text_wordpiece_tokenizer_op_create_counter = monitoring.Counter( '/nlx/api/python/wordpiece_tokenizer_create_counter', 'Counter for number of WordpieceTokenizers created in Python.') class WordpieceTokenizer(TokenizerWithOffsets): """Tokenizes a tensor of UTF-8 string tokens into subword pieces.""" def __init__(self, vocab_lookup_table, suffix_indicator='##', max_bytes_per_word=100, max_chars_per_token=None, token_out_type=dtypes.int64, unknown_token='[UNK]', split_unknown_characters=False): """Initializes the WordpieceTokenizer.
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Python TFLite metrics helper.""" from typing import Optional, Text import uuid from tensorflow.lite.python import metrics_interface from tensorflow.lite.python.metrics_wrapper import _pywrap_tensorflow_lite_metrics_wrapper as _metrics_wrapper from tensorflow.lite.python.metrics_wrapper import converter_error_data_pb2 from tensorflow.python.eager import monitoring _counter_debugger_creation = monitoring.Counter( '/tensorflow/lite/quantization_debugger/created', 'Counter for the number of debugger created.') _counter_interpreter_creation = monitoring.Counter( '/tensorflow/lite/interpreter/created', 'Counter for number of interpreter created in Python.', 'language') # The following are conversion metrics. Attempt and success are kept separated # instead of using a single metric with a label because the converter may # raise exceptions if conversion failed. That may lead to cases when we are # unable to capture the conversion attempt. Increasing attempt count at the # beginning of conversion process and the success count at the end is more # suitable in these cases. _counter_conversion_attempt = monitoring.Counter( '/tensorflow/lite/convert/attempt', 'Counter for number of conversion attempts.')
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Splitter that uses a Hub module.""" import tensorflow_hub as hub from tensorflow.python.eager import monitoring from tensorflow.python.ops import array_ops from tensorflow.python.ops.ragged import ragged_tensor from tensorflow_text.python.ops.splitter import SplitterWithOffsets _tf_text_hub_module_splitter_create_counter = monitoring.Counter( '/nlx/api/python/hub_module_splitter_create_counter', 'Counter for number of HubModuleSplitters created in Python.') class HubModuleSplitter(SplitterWithOffsets): """Splitter that uses a Hub module. The TensorFlow graph from the module performs the real work. The Python code from this class handles the details of interfacing with that module, as well as the support for ragged tensors and high-rank tensors. The Hub module should be supported by `hub.load() <https://www.tensorflow.org/hub/api_docs/python/hub/load>`_ If a v1 module, it should have a graph variant with an empty set of tags; we consider that graph variant to be the module and ignore everything else. The module should have a signature named `default` that takes a `text` input (a rank-1 tensor of
b"Stack", b"StridedSlice", b"StridedSliceGrad", b"TensorListConcatV2", b"TensorListGather", b"TensorListGetItem", b"TensorListPopBack", b"TensorListStack", b"Transpose", b"Unpack", ) _state = threading.local() _check_numerics_callback_create_counter = monitoring.Counter( "/tensorflow/api/python/debugging/check_numerics_callback_create_counter", "Counter for number of times the check_numerics op callback is created.") def limit_string_length(string, max_len=50): """Limit the length of input string. Args: string: Input string. max_len: (int or None) If int, the length limit. If None, no limit. Returns: Possibly length-limited string. """ if max_len is None or len(string) <= max_len: return string
import copy from tensorflow.python.eager import monitoring from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import string_ops from tensorflow_text.python.ops import regex_split_ops from tensorflow_text.python.ops.normalize_ops import case_fold_utf8 from tensorflow_text.python.ops.normalize_ops import normalize_utf8 from tensorflow_text.python.ops.tokenization import TokenizerWithOffsets from tensorflow_text.python.ops.wordpiece_tokenizer import WordpieceTokenizer _tf_text_bert_tokenizer_op_create_counter = monitoring.Counter( "/nlx/api/python/bert_tokenizer_create_counter", "Counter for number of BertTokenizers created in Python.") _DELIM_REGEX = [ r"\s+", r"|".join([ r"[!-/]", r"[:-@]", r"[\[-`]", r"[{-~]", r"[\p{P}]", ]), r"|".join([ r"[\x{4E00}-\x{9FFF}]", r"[\x{3400}-\x{4DBF}]", r"[\x{20000}-\x{2A6DF}]",
from __future__ import print_function from tensorflow.python.eager import monitoring from tensorflow.python.framework import ops from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensor from tensorflow_text.python.ops.tokenization import TokenizerWithOffsets # pylint: disable=g-bad-import-order from tensorflow.python.framework import load_library from tensorflow.python.platform import resource_loader gen_split_merge_from_logits_tokenizer = load_library.load_op_library( resource_loader.get_path_to_datafile( '_split_merge_from_logits_tokenizer.so')) _tf_text_split_merge_from_logits_tokenizer_op_create_counter = monitoring.Counter( '/nlx/api/python/split_merge_from_logits_tokenizer_create_counter', 'Counter for number of SplitMergeFromLogitsTokenizer instances ' 'created in Python.') class SplitMergeFromLogitsTokenizer(TokenizerWithOffsets): """Tokenizes a tensor of UTF-8 string into words according to logits.""" def __init__(self, force_split_at_break_character=True): """Initializes a new instance. Args: force_split_at_break_character: a bool that indicates whether to force start a new word after an ICU-defined whitespace character. Regardless of this parameter, we never include a whitespace into a token, and we always ignore the split/merge action for the whitespace character itself. This parameter indicates what happens after a whitespace. -if force_split_at_break_character is true, create a new word starting
from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensor from tensorflow_text.python.ops.tokenization import TokenizerWithOffsets # pylint: disable=g-bad-import-order from tensorflow.python.framework import load_library from tensorflow.python.platform import resource_loader gen_split_merge_tokenizer = load_library.load_op_library( resource_loader.get_path_to_datafile('_split_merge_tokenizer.so')) _tf_text_split_merge_tokenizer_op_create_counter = monitoring.Counter( '/nlx/api/python/split_merge_tokenizer_create_counter', 'Counter for number of SplitMergeTokenizers created in Python.') class SplitMergeTokenizer(TokenizerWithOffsets): """Tokenizes a tensor of UTF-8 string into words according to labels.""" def __init__(self): """Initializes a new instance. """ super(SplitMergeTokenizer, self).__init__() _tf_text_split_merge_tokenizer_op_create_counter.get_cell( ).increase_by(1) def tokenize( self, input, # pylint: disable=redefined-builtin
from tensorflow.python.ops import math_ops from tensorflow.python.ops import string_ops from tensorflow.python.ops.ragged import ragged_conversion_ops from tensorflow.python.ops.ragged import ragged_string_ops from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensor from tensorflow_text.python.ops.tokenization import TokenizerWithOffsets # pylint: disable=g-bad-import-order from tensorflow.python.framework import load_library from tensorflow.python.platform import resource_loader gen_unicode_script_tokenizer = load_library.load_op_library( resource_loader.get_path_to_datafile('_unicode_script_tokenizer.so')) _tf_text_unicode_script_tokenizer_create_counter = monitoring.Counter( "/nlx/api/python/unicode_script_tokenizer_create_counter", "Counter for number of UnicodeScriptTokenizers created in Python.") class UnicodeScriptTokenizer(TokenizerWithOffsets): """Tokenizes a tensor of UTF-8 strings on Unicode script boundaries.""" def __init__(self, keep_whitespace=False): """Initializes a new instance. Args: keep_whitespace: A boolean that specifices whether to emit whitespace tokens (default `False`). """ super(UnicodeScriptTokenizer, self).__init__() _tf_text_unicode_script_tokenizer_create_counter.get_cell( ).increase_by(1)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Global streamz counters.""" from tensorflow.python.eager import monitoring progressive_policy_creation_counter = monitoring.Counter( "/tensorflow/training/fast_training/progressive_policy_creation", "Counter for the number of ProgressivePolicy creations.") stack_vars_to_vars_call_counter = monitoring.Counter( "/tensorflow/training/fast_training/tf_vars_to_vars", "Counter for the number of low-level stacking API calls.")
def test_same_counter(self): counter1 = monitoring.Counter('test/same_counter', 'test counter') # pylint: disable=unused-variable with self.assertRaises(errors.AlreadyExistsError): counter2 = monitoring.Counter('test/same_counter', 'test counter') # pylint: disable=unused-variable
def test_counter(self): counter = monitoring.Counter('test/counter', 'test counter') counter.get_cell().increase_by(1) self.assertEqual(counter.get_cell().value(), 1) counter.get_cell().increase_by(5) self.assertEqual(counter.get_cell().value(), 6)
from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops.ragged import ragged_conversion_ops from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensor from tensorflow.python.training.tracking import tracking from tensorflow_text.python.ops.tokenization import Detokenizer from tensorflow_text.python.ops.tokenization import TokenizerWithOffsets from tensorflow.python.framework import load_library from tensorflow.python.platform import resource_loader gen_sentencepiece_tokenizer = load_library.load_op_library( resource_loader.get_path_to_datafile('_sentencepiece_tokenizer.so')) # pylint: disable=g-bad-import-order _tf_text_sentencepiece_tokenizer_op_create_counter = monitoring.Counter( "/nlx/api/python/sentencepiece_tokenizer_create_counter", "Counter for number of SentencepieceTokenizers created in Python.") class _SentencepieceModelResource(tracking.TrackableResource): """Utility to track the model resource tensor (for SavedModel support).""" def __init__(self, model, name): super(_SentencepieceModelResource, self).__init__() self._model = model self._name = name _ = self.resource_handle # Accessing this property creates the resource. def _create_resource(self): model, name = self._model, self._name with ops.name_scope(name, "SentenceTokenizerInitializer", [model]): return gen_sentencepiece_tokenizer.sentencepiece_op(model=model)