def test_ragged_to_tensor(self): @tf.function def ragged_tensor_function(): ragged_tensor = tf.RaggedTensor.from_row_splits( values=[ 13, 36, 83, 131, 13, 36, 4, 3127, 152, 130, 30, 2424, 168, 1644, 1524, 4, 3127, 152, 130, 30, 2424, 168, 1644, 636 ], row_splits=[0, 0, 6, 15, 24]) return ragged_tensor.to_tensor() concrete_function = ragged_tensor_function.get_concrete_function() converter = tf.lite.TFLiteConverter.from_concrete_functions( [concrete_function], ragged_tensor_function) converter.allow_custom_ops = True tflite_model = converter.convert() interpreter = interpreter_wrapper.InterpreterWithCustomOps( model_content=tflite_model, custom_op_registerers=["TFLite_RaggedTensorToTensorRegisterer"]) interpreter.allocate_tensors() interpreter.invoke() output_details = interpreter.get_output_details() expected_result_values = [[0, 0, 0, 0, 0, 0, 0, 0, 0], [13, 36, 83, 131, 13, 36, 0, 0, 0], [4, 3127, 152, 130, 30, 2424, 168, 1644, 1524], [4, 3127, 152, 130, 30, 2424, 168, 1644, 636]] self.assertAllEqual( interpreter.get_tensor(output_details[0]["index"]), expected_result_values)
def testToRaggedEquivalence(self, test_case): tf_output = _call_whitespace_tokenizer_to_ragged(test_case) np_test_case = np.array(test_case, dtype=np.str) rank = len(np_test_case.shape) model_filename = resource_loader.get_path_to_datafile( 'testdata/whitespace_tokenizer_to_ragged_{}d_input.tflite'.format( rank)) with open(model_filename, 'rb') as file: model = file.read() interpreter = interpreter_wrapper.InterpreterWithCustomOps( model_content=model, custom_op_registerers=['AddWhitespaceTokenizerCustomOp']) interpreter.resize_tensor_input(0, np_test_case.shape) interpreter.allocate_tensors() interpreter.set_tensor(interpreter.get_input_details()[0]['index'], np_test_case) interpreter.invoke() # Traverse the nested row_splits/values of the ragged tensor. for i in range(rank): tflite_output_cur_row_splits = interpreter.get_tensor( interpreter.get_output_details()[1 + i]['index']) self.assertEqual(tf_output.row_splits.numpy().tolist(), tflite_output_cur_row_splits.tolist()) tf_output = tf_output.values tflite_output_values = interpreter.get_tensor( interpreter.get_output_details()[0]['index']) self.assertEqual(tf_output.numpy().tolist(), tflite_output_values.tolist())
def test_tflite_opt_sentence_tokenizer_vocab_size(self): """Check that can convert a Keras model to TFLite and it produces the same result for vocabulary size.""" class TokenizerLayer(tf.keras.layers.Layer): def __init__(self, sentencepiece_model, **kwargs): super(TokenizerLayer, self).__init__(**kwargs) self.sp = sentencepiece_tokenizer.SentencepieceTokenizer( sentencepiece_model) def call(self, input_tensor, **kwargs): return self.sp.vocab_size() model = tf.keras.models.Sequential( [TokenizerLayer(self.sentencepiece_model)]) input_data = np.array([[""]]) tf_result = model.predict(input_data) converter = tf.lite.TFLiteConverter.from_keras_model(model) supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS] converter.target_spec.supported_ops = supported_ops converter.allow_custom_ops = True tflite_model = converter.convert() interpreter = interpreter_wrapper.InterpreterWithCustomOps( model_content=tflite_model, custom_op_registerers=["TFLite_SentencepieceTokenizerRegisterer"]) interpreter.allocate_tensors() input_details = interpreter.get_input_details() interpreter.set_tensor(input_details[0]["index"], input_data) interpreter.invoke() output_details = interpreter.get_output_details() expected_result = 4000 self.assertEqual(tf_result, expected_result) self.assertAllEqual(interpreter.get_tensor(output_details[0]["index"]), expected_result)
def test_latency(self): latency_op = 0.0 for test_case in TEST_CASES: input_tensor = tf.ragged.constant(test_case) rank = input_tensor.shape.rank model = self._make_model(rank, 3, ragged_tensor=True, flex=False) interpreter = interpreter_wrapper.InterpreterWithCustomOps( model_content=model, custom_op_registerers=['AddNgramsCustomOp']) signature_fn = interpreter.get_signature_runner() signature_kwargs = {} signature_kwargs['values'] = input_tensor.flat_values.numpy() for r in range(rank - 1): signature_kwargs[f'args_{r}'] = input_tensor.nested_row_splits[ r].numpy() start_time = timeit.default_timer() for _ in range(INVOKES_FOR_SINGLE_OP_BENCHMARK): _ = signature_fn(**signature_kwargs) latency_op = latency_op + timeit.default_timer() - start_time latency_op = latency_op / (INVOKES_FOR_SINGLE_OP_BENCHMARK * len(TEST_CASES)) latency_flex = 0.0 for test_case in TEST_CASES: input_tensor = tf.ragged.constant(test_case) rank = input_tensor.shape.rank model = self._make_model(rank, 3, ragged_tensor=True, flex=True) interpreter = interpreter_wrapper.Interpreter(model_content=model) signature_fn = interpreter.get_signature_runner() signature_kwargs = {} signature_kwargs['values'] = input_tensor.flat_values.numpy() for r in range(rank - 1): signature_kwargs[f'args_{r}'] = input_tensor.nested_row_splits[ r].numpy() start_time = timeit.default_timer() for _ in range(INVOKES_FOR_FLEX_DELEGATE_BENCHMARK): _ = signature_fn(**signature_kwargs) latency_flex = latency_flex + timeit.default_timer( ) - start_time latency_flex = latency_flex / (INVOKES_FOR_FLEX_DELEGATE_BENCHMARK * len(TEST_CASES)) logging.info('Latency (single op): %fms', latency_op * 1000.0) logging.info('Latency (flex delegate): %fms', latency_flex * 1000.0)
def testToTensorEquivalence(self, test_case): tf_output = _call_whitespace_tokenizer_to_tensor(test_case) model_filename = resource_loader.get_path_to_datafile( 'testdata/whitespace_tokenizer_to_tensor.tflite') with open(model_filename, 'rb') as file: model = file.read() interpreter = interpreter_wrapper.InterpreterWithCustomOps( model_content=model, custom_op_registerers=['AddWhitespaceTokenizerCustomOp']) np_test_case = np.array(test_case, dtype=np.str) interpreter.resize_tensor_input(0, np_test_case.shape) interpreter.allocate_tensors() interpreter.set_tensor(interpreter.get_input_details()[0]['index'], np_test_case) interpreter.invoke() tflite_output = interpreter.get_tensor( interpreter.get_output_details()[0]['index']) self.assertEqual(tf_output.numpy().tolist(), tflite_output.tolist())
def main(argv): with open(FLAGS.model, 'rb') as file: model = file.read() interpreter = interpreter_wrapper.InterpreterWithCustomOps( model_content=model, custom_op_registerers=[ 'AddWhitespaceTokenizerCustomOp', 'AddNgramsCustomOp', 'AddSgnnProjectionCustomOp', ]) interpreter.resize_tensor_input(0, [1, 1]) interpreter.allocate_tensors() input_string = ' '.join(argv[1:]) print('Input: "{}"'.format(input_string)) input_array = np.array([[input_string]], dtype=np.str) interpreter.set_tensor(interpreter.get_input_details()[0]['index'], input_array) interpreter.invoke() output = interpreter.get_tensor(interpreter.get_output_details()[0]['index']) for x in range(output.shape[0]): for y in range(output.shape[1]): print('{:>3s}: {:.4f}'.format(LANGIDS[y], output[x][y]))
def test_tflite_opt_sentence_detokenizer(self): """Check that can convert a Keras model to TFLite and it produces the same result for tokenization.""" class DeTokenizerLayer(tf.keras.layers.Layer): def __init__(self, sentencepiece_model, **kwargs): super(DeTokenizerLayer, self).__init__(**kwargs) self.sp = sentencepiece_tokenizer.SentencepieceTokenizer( sentencepiece_model) def call(self, input_tensor, **kwargs): return self.sp.detokenize(input_tensor) model = tf.keras.models.Sequential( [DeTokenizerLayer(self.sentencepiece_model)]) input_data = np.array([[ 13, 36, 83, 131, 13, 36, 4, 3127, 152, 130, 30, 2424, 168, 1644, 1524, 4, 3127, 152, 130, 30, 2424, 168, 1644, 636 ]], dtype=np.int32) tf_result = model.predict(input_data) converter = tf.lite.TFLiteConverter.from_keras_model(model) supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS] converter.target_spec.supported_ops = supported_ops converter.allow_custom_ops = True tflite_model = converter.convert() interpreter = interpreter_wrapper.InterpreterWithCustomOps( model_content=tflite_model, custom_op_registerers=["TFLite_SentencepieceTokenizerRegisterer"]) interpreter.allocate_tensors() input_details = interpreter.get_input_details() interpreter.set_tensor(input_details[0]["index"], input_data) interpreter.invoke() output_details = interpreter.get_output_details() expected_result = [ "to be or not to be ignored by length text1 ignored by length text2" ] self.assertAllEqual(tf_result, expected_result) self.assertAllEqual(interpreter.get_tensor(output_details[0]["index"]), expected_result)
def test_width_2_ragged_tensor_equivalence(self, test_case): input_tensor = tf.ragged.constant(test_case) tf_output = tf_text.ngrams( input_tensor, 2, reduction_type=tf_text.Reduction.STRING_JOIN) rank = input_tensor.shape.rank model = self._make_model(rank, 2, ragged_tensor=True, flex=False) interpreter = interpreter_wrapper.InterpreterWithCustomOps( model_content=model, custom_op_registerers=['AddNgramsCustomOp']) signature_fn = interpreter.get_signature_runner() signature_kwargs = {} signature_kwargs['values'] = input_tensor.flat_values.numpy() for r in range(rank - 1): signature_kwargs[f'args_{r}'] = input_tensor.nested_row_splits[ r].numpy() output = signature_fn(**signature_kwargs) tflite_output_values = output['output_0'] self.assertEqual(tf_output.flat_values.numpy().tolist(), tflite_output_values.tolist()) for i in range(rank - 1): tflite_output_cur_row_splits = output[f'output_{i + 1}'] self.assertEqual(tf_output.nested_row_splits[i].numpy().tolist(), tflite_output_cur_row_splits.tolist())
def testSingleOpLatency(self): model_filename = resource_loader.get_path_to_datafile( 'testdata/whitespace_tokenizer_to_tensor.tflite') with open(model_filename, 'rb') as file: model = file.read() interpreter = interpreter_wrapper.InterpreterWithCustomOps( model_content=model, custom_op_registerers=['AddWhitespaceTokenizerCustomOp']) latency = 0.0 for test_case in TEST_CASES: np_test_case = np.array(test_case, dtype=np.str) interpreter.resize_tensor_input(0, np_test_case.shape) interpreter.allocate_tensors() interpreter.set_tensor(interpreter.get_input_details()[0]['index'], np_test_case) start_time = timeit.default_timer() for _ in range(INVOKES_FOR_SINGLE_OP_BENCHMARK): interpreter.invoke() latency = latency + timeit.default_timer() - start_time latency = latency / (INVOKES_FOR_SINGLE_OP_BENCHMARK * len(TEST_CASES)) logging.info('Latency: %fms', latency * 1000.0)