def testClassifyModel(self): example_path = self._get_output_data_dir('examples') self._prepare_multihead_examples(example_path) model_path = self._get_output_data_dir('model') self._build_multihead_model(model_path) prediction_log_path = self._get_output_data_dir('predictions') self._run_inference_with_beam( example_path, model_spec_pb2.InferenceEndpoint( saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=model_path, signature_name=['classify_sum'])), prediction_log_path) results = self._get_results(prediction_log_path) self.assertLen(results, 2) classify_log = results[0].classify_log self.assertLen(classify_log.request.input.example_list.examples, 1) self.assertEqual(classify_log.request.input.example_list.examples[0], self._multihead_examples[0]) self.assertLen(classify_log.response.result.classifications, 1) self.assertLen(classify_log.response.result.classifications[0].classes, 1) self.assertAlmostEqual( classify_log.response.result.classifications[0].classes[0].score, 1.0)
def testEstimatorModelPredict(self): example_path = self._get_output_data_dir('examples') self._prepare_predict_examples(example_path) model_path = self._get_output_data_dir('model') self._build_predict_model(model_path) prediction_log_path = self._get_output_data_dir('predictions') self._run_inference_with_beam( example_path, model_spec_pb2.InferenceEndpoint( saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=model_path)), prediction_log_path) results = self._get_results(prediction_log_path) self.assertLen(results, 2) self.assertEqual( results[0].predict_log.request.inputs[ run_inference._DEFAULT_INPUT_KEY].string_val[0], self._predict_examples[0].SerializeToString()) self.assertEqual(results[0].predict_log.response.outputs['y'].dtype, tf.float32) self.assertLen( results[0].predict_log.response.outputs['y'].tensor_shape.dim, 2) self.assertEqual( results[0].predict_log.response.outputs['y'].tensor_shape.dim[0]. size, 1) self.assertEqual( results[0].predict_log.response.outputs['y'].tensor_shape.dim[1]. size, 1)
def testModelPathInvalid(self): example_path = self._get_output_data_dir('examples') self._prepare_predict_examples(example_path) prediction_log_path = self._get_output_data_dir('predictions') with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'): self._run_inference_with_beam( example_path, model_spec_pb2.InferenceEndpoint( saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=self._get_output_data_dir())), prediction_log_path)
def testKerasModelPredict(self): inputs = tf.keras.Input(shape=(1, ), name='input1') output1 = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid, name='output1')(inputs) output2 = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid, name='output2')(inputs) inference_model = tf.keras.models.Model(inputs, [output1, output2]) class TestKerasModel(tf.keras.Model): def __init__(self, inference_model): super(TestKerasModel, self).__init__(name='test_keras_model') self.inference_model = inference_model @tf.function(input_signature=[ tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs') ]) def call(self, serialized_example): features = { 'input1': tf.compat.v1.io.FixedLenFeature([1], dtype=tf.float32, default_value=0) } input_tensor_dict = tf.io.parse_example( serialized_example, features) return inference_model(input_tensor_dict['input1']) model = TestKerasModel(inference_model) model.compile(optimizer=tf.keras.optimizers.Adam(lr=.001), loss=tf.keras.losses.binary_crossentropy, metrics=['accuracy']) model_path = self._get_output_data_dir('model') tf.compat.v1.keras.experimental.export_saved_model(model, model_path, serving_only=True) example_path = self._get_output_data_dir('examples') self._prepare_predict_examples(example_path) prediction_log_path = self._get_output_data_dir('predictions') self._run_inference_with_beam( example_path, model_spec_pb2.InferenceEndpoint( saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=model_path)), prediction_log_path) results = self._get_results(prediction_log_path) self.assertLen(results, 2)
def testTelemetry(self): example_path = self._get_output_data_dir('examples') self._prepare_multihead_examples(example_path) model_path = self._get_output_data_dir('model') self._build_multihead_model(model_path) inference_endpoint = model_spec_pb2.InferenceEndpoint( saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=model_path, signature_name=['classify_sum'])) pipeline = beam.Pipeline() _ = ( pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) | 'RunInference' >> run_inference.RunInference(inference_endpoint)) run_result = pipeline.run() run_result.wait_until_finish() num_inferences = run_result.metrics().query( MetricsFilter().with_name('num_inferences')) self.assertTrue(num_inferences['counters']) self.assertEqual(num_inferences['counters'][0].result, 2) num_instances = run_result.metrics().query( MetricsFilter().with_name('num_instances')) self.assertTrue(num_instances['counters']) self.assertEqual(num_instances['counters'][0].result, 2) inference_request_batch_size = run_result.metrics().query( MetricsFilter().with_name('inference_request_batch_size')) self.assertTrue(inference_request_batch_size['distributions']) self.assertEqual( inference_request_batch_size['distributions'][0].result.sum, 2) inference_request_batch_byte_size = run_result.metrics().query( MetricsFilter().with_name('inference_request_batch_byte_size')) self.assertTrue(inference_request_batch_byte_size['distributions']) self.assertEqual( inference_request_batch_byte_size['distributions'][0].result.sum, sum(element.ByteSize() for element in self._multihead_examples)) inference_batch_latency_micro_secs = run_result.metrics().query( MetricsFilter().with_name('inference_batch_latency_micro_secs')) self.assertTrue(inference_batch_latency_micro_secs['distributions']) self.assertGreaterEqual( inference_batch_latency_micro_secs['distributions'][0].result.sum, 0) load_model_latency_milli_secs = run_result.metrics().query( MetricsFilter().with_name('load_model_latency_milli_secs')) self.assertTrue(load_model_latency_milli_secs['distributions']) self.assertGreaterEqual( load_model_latency_milli_secs['distributions'][0].result.sum, 0)
def _run_model_inference(self, model_path: Text, example_uris: Mapping[Text, Text], output_path: Text, model_spec: bulk_inferrer_pb2.ModelSpec) -> None: """Runs model inference on given example data. Args: model_path: Path to model. example_uris: Mapping of example split name to example uri. output_path: Path to output generated prediction logs. model_spec: bulk_inferrer_pb2.ModelSpec instance. Returns: None """ saved_model_spec = model_spec_pb2.SavedModelSpec( model_path=model_path, tag=model_spec.tag, signature_name=model_spec.model_signature_name) inference_endpoint = model_spec_pb2.InferenceEndpoint() inference_endpoint.saved_model_spec.CopyFrom(saved_model_spec) with self._make_beam_pipeline() as pipeline: data_list = [] for split, example_uri in example_uris.items(): data = ( pipeline | 'ReadData[{}]'.format(split) >> beam.io.ReadFromTFRecord( file_pattern=io_utils.all_files_pattern(example_uri))) data_list.append(data) _ = ([data for data in data_list] | 'FlattenExamples' >> beam.Flatten(pipeline=pipeline) | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) | 'RunInference' >> run_inference.RunInference(inference_endpoint) | 'WritePredictionLogs' >> beam.io.WriteToTFRecord( output_path, file_name_suffix='.gz', coder=beam.coders.ProtoCoder( prediction_log_pb2.PredictionLog))) logging.info('Inference result written to %s.', output_path)
def testMultiInferenceModel(self): example_path = self._get_output_data_dir('examples') self._prepare_multihead_examples(example_path) model_path = self._get_output_data_dir('model') self._build_multihead_model(model_path) prediction_log_path = self._get_output_data_dir('predictions') self._run_inference_with_beam( example_path, model_spec_pb2.InferenceEndpoint( saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=model_path, signature_name=['regress_diff', 'classify_sum'])), prediction_log_path) results = self._get_results(prediction_log_path) self.assertLen(results, 2) multi_inference_log = results[0].multi_inference_log self.assertLen(multi_inference_log.request.input.example_list.examples, 1) self.assertEqual( multi_inference_log.request.input.example_list.examples[0], self._multihead_examples[0]) self.assertLen(multi_inference_log.response.results, 2) signature_names = [] for result in multi_inference_log.response.results: signature_names.append(result.model_spec.signature_name) self.assertIn('regress_diff', signature_names) self.assertIn('classify_sum', signature_names) result = multi_inference_log.response.results[0] self.assertEqual(result.model_spec.signature_name, 'regress_diff') self.assertLen(result.regression_result.regressions, 1) self.assertAlmostEqual(result.regression_result.regressions[0].value, 0.6) result = multi_inference_log.response.results[1] self.assertEqual(result.model_spec.signature_name, 'classify_sum') self.assertLen(result.classification_result.classifications, 1) self.assertLen(result.classification_result.classifications[0].classes, 1) self.assertAlmostEqual( result.classification_result.classifications[0].classes[0].score, 1.0)