def testDoWithOutputExamplesSpecifiedSplits(self): self._exec_properties['data_spec'] = proto_utils.proto_to_json( text_format.Parse( """ example_splits: 'unlabelled' """, bulk_inferrer_pb2.DataSpec())) self._exec_properties[ 'output_example_spec'] = proto_utils.proto_to_json( text_format.Parse( """ output_columns_spec { classify_output { label_column: 'classify_label' score_column: 'classify_score' } } """, bulk_inferrer_pb2.OutputExampleSpec())) # Run executor. bulk_inferrer = executor.Executor(self._context) bulk_inferrer.Do(self._input_dict, self._output_dict_oe, self._exec_properties) # Check outputs. self.assertTrue(fileio.exists(self._output_examples_dir)) self._verify_example_split('unlabelled') self.assertFalse( fileio.exists( os.path.join(self._output_examples_dir, 'unlabelled2')))
def test_convert_for_regress_invalid_output_example_spec(self): prediction_log = text_format.Parse( """ regress_log { request { input { example_list { examples { features { feature: { key: "regress_input" value: { bytes_list: { value: "feature" } } } } } } } } response { result { regressions { value: 0.7 } } } } """, prediction_log_pb2.PredictionLog()) output_example_spec = text_format.Parse( """ output_columns_spec { } """, bulk_inferrer_pb2.OutputExampleSpec()) with self.assertRaises(ValueError): utils.convert(prediction_log, output_example_spec)
def testConstructOutputExample(self): bulk_inferrer = component.CloudAIBulkInferrerComponent( examples=self._examples, model=self._model, model_blessing=self._model_blessing, output_example_spec=bulk_inferrer_pb2.OutputExampleSpec()) self.assertEqual('Examples', bulk_inferrer.outputs['output_examples'].type_name) self.assertNotIn('inference_result', bulk_inferrer.outputs.keys())
def testConstructOutputExample(self): bulk_inferrer = component.BulkInferrer( examples=self._examples, model=self._model, model_blessing=self._model_blessing, output_example_spec=bulk_inferrer_pb2.OutputExampleSpec()) self.assertEqual( 'Examples', bulk_inferrer.outputs[ standard_component_specs.OUTPUT_EXAMPLES_KEY].type_name) self.assertNotIn('inference_result', bulk_inferrer.outputs.keys())
def test_convert_for_predict_invalid_output_example_spec(self): example = text_format.Parse( """ features { feature { key: "predict_input" value: { bytes_list: { value: "feature" } } } }""", tf.train.Example()) prediction_log = text_format.Parse( """ predict_log { request { inputs { key: "%s" value { dtype: DT_STRING tensor_shape { dim { size: 1 } } } } } response { outputs { key: "output_float" value { dtype: DT_FLOAT tensor_shape { dim { size: 1 } dim { size: 2 }} float_val: 0.1 float_val: 0.2 } } outputs { key: "output_bytes" value { dtype: DT_STRING tensor_shape { dim { size: 1 }} string_val: "prediction" } } } } """ % (utils.INPUT_KEY), prediction_log_pb2.PredictionLog()) # The ending quote cannot be recognized correctly when `string_val` field # is directly set with a serialized string quoted in the text format. prediction_log.predict_log.request.inputs[ utils.INPUT_KEY].string_val.append(example.SerializeToString()) output_example_spec = text_format.Parse( """ output_columns_spec { } """, bulk_inferrer_pb2.OutputExampleSpec()) with self.assertRaises(ValueError): utils.convert(prediction_log, output_example_spec)
def test_convert_for_regress(self): prediction_log = text_format.Parse( """ regress_log { request { input { example_list { examples { features { feature: { key: "regress_input" value: { bytes_list: { value: "feature" } } } } } } } } response { result { regressions { value: 0.7 } } } } """, prediction_log_pb2.PredictionLog()) output_example_spec = text_format.Parse( """ output_columns_spec { regress_output { value_column: 'regress_value' } } """, bulk_inferrer_pb2.OutputExampleSpec()) expected_example = text_format.Parse( """ features { feature: { key: "regress_input" value: { bytes_list: { value: "feature" } } } feature: { key: "regress_value" value: { float_list: { value: 0.7 } } } } """, tf.train.Example()) self.assertProtoEquals( expected_example, utils.convert(prediction_log, output_example_spec))
def testConstructInferenceResultAndOutputExample(self): with self.assertRaises(ValueError): component.BulkInferrer(examples=self._examples, model=self._model, model_blessing=self._model_blessing, output_examples=channel_utils.as_channel( [standard_artifacts.Examples()])) with self.assertRaises(ValueError): component.BulkInferrer( examples=self._examples, model=self._model, model_blessing=self._model_blessing, output_example_spec=bulk_inferrer_pb2.OutputExampleSpec(), inference_result=channel_utils.as_channel( [standard_artifacts.InferenceResult()]))
def testDoWithOutputExamplesAllSplits(self): self._exec_properties[ standard_component_specs. OUTPUT_EXAMPLE_SPEC_KEY] = proto_utils.proto_to_json( text_format.Parse( """ output_columns_spec { classify_output { label_column: 'classify_label' score_column: 'classify_score' } } """, bulk_inferrer_pb2.OutputExampleSpec())) # Run executor. bulk_inferrer = executor.Executor(self._context) bulk_inferrer.Do(self._input_dict, self._output_dict_oe, self._exec_properties) # Check outputs. self.assertTrue(fileio.exists(self._output_examples_dir)) self._verify_example_split('unlabelled') self._verify_example_split('unlabelled2')
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Runs batch inference on a given model with given input examples. Args: input_dict: Input dict from input key to a list of Artifacts. - examples: examples for inference. - model: exported model. - model_blessing: model blessing result, optional. output_dict: Output dict from output key to a list of Artifacts. - output: bulk inference results. exec_properties: A dict of execution properties. - model_spec: JSON string of bulk_inferrer_pb2.ModelSpec instance. - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance. Returns: None """ self._log_startup(input_dict, output_dict, exec_properties) if output_dict.get(standard_component_specs.INFERENCE_RESULT_KEY): inference_result = artifact_utils.get_single_instance( output_dict[standard_component_specs.INFERENCE_RESULT_KEY]) else: inference_result = None if output_dict.get(standard_component_specs.OUTPUT_EXAMPLES_KEY): output_examples = artifact_utils.get_single_instance( output_dict[standard_component_specs.OUTPUT_EXAMPLES_KEY]) else: output_examples = None if 'examples' not in input_dict: raise ValueError('\'examples\' is missing in input dict.') if 'model' not in input_dict: raise ValueError('Input models are not valid, model ' 'need to be specified.') if standard_component_specs.MODEL_BLESSING_KEY in input_dict: model_blessing = artifact_utils.get_single_instance( input_dict[standard_component_specs.MODEL_BLESSING_KEY]) if not model_utils.is_model_blessed(model_blessing): logging.info('Model on %s was not blessed', model_blessing.uri) return else: logging.info( 'Model blessing is not provided, exported model will be ' 'used.') model = artifact_utils.get_single_instance( input_dict[standard_component_specs.MODEL_KEY]) model_path = path_utils.serving_model_path( model.uri, path_utils.is_old_model_artifact(model)) logging.info('Use exported model from %s.', model_path) data_spec = bulk_inferrer_pb2.DataSpec() proto_utils.json_to_proto( exec_properties[standard_component_specs.DATA_SPEC_KEY], data_spec) output_example_spec = bulk_inferrer_pb2.OutputExampleSpec() if exec_properties.get( standard_component_specs.OUTPUT_EXAMPLE_SPEC_KEY): proto_utils.json_to_proto( exec_properties[ standard_component_specs.OUTPUT_EXAMPLE_SPEC_KEY], output_example_spec) self._run_model_inference( data_spec, output_example_spec, input_dict[standard_component_specs.EXAMPLES_KEY], output_examples, inference_result, self._get_inference_spec(model_path, exec_properties))
def test_convert_for_multi_inference(self): prediction_log = text_format.Parse( """ multi_inference_log { request { input { example_list { examples { features { feature: { key: "input" value: { bytes_list: { value: "feature" } } } } } } } } response { results { model_spec { signature_name: 'classification' } classification_result { classifications { classes { label: '1' score: 0.6 } classes { label: '0' score: 0.4 } } } } results { model_spec { signature_name: 'regression' } regression_result { regressions { value: 0.7 } } } } } """, prediction_log_pb2.PredictionLog()) output_example_spec = text_format.Parse( """ output_columns_spec { signature_name: 'classification' classify_output { label_column: 'classify_label' score_column: 'classify_score' } } output_columns_spec { signature_name: 'regression' regress_output { value_column: 'regress_value' } } """, bulk_inferrer_pb2.OutputExampleSpec()) expected_example = text_format.Parse( """ features { feature: { key: "input" value: { bytes_list: { value: "feature" } } } feature: { key: "classify_label" value: { bytes_list: { value: "1" value: "0"} } } feature: { key: "classify_score" value: { float_list: { value: 0.6 value: 0.4} } } feature: { key: "regress_value" value: { float_list: { value: 0.7} } } } """, tf.train.Example()) self.assertProtoEquals( expected_example, utils.convert(prediction_log, output_example_spec))
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Runs batch inference on a given model with given input examples. Args: input_dict: Input dict from input key to a list of Artifacts. - examples: examples for inference. - model: exported model. - model_blessing: model blessing result, optional. output_dict: Output dict from output key to a list of Artifacts. - output: bulk inference results. exec_properties: A dict of execution properties. - model_spec: JSON string of bulk_inferrer_pb2.ModelSpec instance. - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance. Returns: None """ self._log_startup(input_dict, output_dict, exec_properties) source = exec_properties[StepKeys.SOURCE] args = exec_properties[StepKeys.ARGS] c = source_utils.load_source_path_class(source) inferrer_step: BaseInferrer = c(**args) output_examples = artifact_utils.get_single_instance( output_dict[PREDICTIONS]) if EXAMPLES not in input_dict: raise ValueError('\'examples\' is missing in input dict.') if MODEL not in input_dict: raise ValueError('Input models are not valid, model ' 'need to be specified.') if MODEL_BLESSING in input_dict: model_blessing = artifact_utils.get_single_instance( input_dict['model_blessing']) if not model_utils.is_model_blessed(model_blessing): logging.info('Model on %s was not blessed', model_blessing.uri) return else: logging.info( 'Model blessing is not provided, exported model will be ' 'used.') model = artifact_utils.get_single_instance(input_dict[MODEL]) model_path = path_utils.serving_model_path(model.uri) logging.info('Use exported model from %s.', model_path) output_example_spec = bulk_inferrer_pb2.OutputExampleSpec( output_columns_spec=[ bulk_inferrer_pb2.OutputColumnsSpec( predict_output=bulk_inferrer_pb2.PredictOutput( output_columns=[ bulk_inferrer_pb2.PredictOutputCol( output_key=x, output_column=f'{x}_label', ) for x in inferrer_step.get_labels() ])) ]) model_spec = bulk_inferrer_pb2.ModelSpec() saved_model_spec = model_spec_pb2.SavedModelSpec( model_path=model_path, tag=model_spec.tag, signature_name=model_spec.model_signature_name) inference_spec = model_spec_pb2.InferenceSpecType() inference_spec.saved_model_spec.CopyFrom(saved_model_spec) self._run_model_inference(output_example_spec, input_dict[EXAMPLES], output_examples, inference_spec, inferrer_step)
def Do(self, input_dict: Dict[str, List[types.Artifact]], output_dict: Dict[str, List[types.Artifact]], exec_properties: Dict[str, Any]) -> None: """Runs batch inference on a given model with given input examples. This function creates a new model (if necessary) and a new model version before inference, and cleans up resources after inference. It provides re-executability as it cleans up (only) the model resources that are created during the process even inference job failed. Args: input_dict: Input dict from input key to a list of Artifacts. - examples: examples for inference. - model: exported model. - model_blessing: model blessing result output_dict: Output dict from output key to a list of Artifacts. - output: bulk inference results. exec_properties: A dict of execution properties. - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance. - custom_config: custom_config.ai_platform_serving_args need to contain the serving job parameters sent to Google Cloud AI Platform. For the full set of parameters, refer to https://cloud.google.com/ml-engine/reference/rest/v1/projects.models Returns: None """ self._log_startup(input_dict, output_dict, exec_properties) if output_dict.get('inference_result'): inference_result = artifact_utils.get_single_instance( output_dict['inference_result']) else: inference_result = None if output_dict.get('output_examples'): output_examples = artifact_utils.get_single_instance( output_dict['output_examples']) else: output_examples = None if 'examples' not in input_dict: raise ValueError('`examples` is missing in input dict.') if 'model' not in input_dict: raise ValueError('Input models are not valid, model ' 'need to be specified.') if 'model_blessing' in input_dict: model_blessing = artifact_utils.get_single_instance( input_dict['model_blessing']) if not model_utils.is_model_blessed(model_blessing): logging.info('Model on %s was not blessed', model_blessing.uri) return else: logging.info( 'Model blessing is not provided, exported model will be ' 'used.') if _CUSTOM_CONFIG_KEY not in exec_properties: raise ValueError( 'Input exec properties are not valid, {} ' 'need to be specified.'.format(_CUSTOM_CONFIG_KEY)) custom_config = json_utils.loads( exec_properties.get(_CUSTOM_CONFIG_KEY, 'null')) if custom_config is not None and not isinstance(custom_config, Dict): raise ValueError( 'custom_config in execution properties needs to be a ' 'dict.') ai_platform_serving_args = custom_config.get(SERVING_ARGS_KEY) if not ai_platform_serving_args: raise ValueError( '`ai_platform_serving_args` is missing in `custom_config`') service_name, api_version = runner.get_service_name_and_api_version( ai_platform_serving_args) executor_class_path = '%s.%s' % (self.__class__.__module__, self.__class__.__name__) with telemetry_utils.scoped_labels( {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}): job_labels = telemetry_utils.make_labels_dict() model = artifact_utils.get_single_instance(input_dict['model']) model_path = path_utils.serving_model_path( model.uri, path_utils.is_old_model_artifact(model)) logging.info('Use exported model from %s.', model_path) # Use model artifact uri to generate model version to guarantee the # 1:1 mapping from model version to model. model_version = 'version_' + hashlib.sha256( model.uri.encode()).hexdigest() inference_spec = self._get_inference_spec(model_path, model_version, ai_platform_serving_args) data_spec = bulk_inferrer_pb2.DataSpec() proto_utils.json_to_proto(exec_properties['data_spec'], data_spec) output_example_spec = bulk_inferrer_pb2.OutputExampleSpec() if exec_properties.get('output_example_spec'): proto_utils.json_to_proto(exec_properties['output_example_spec'], output_example_spec) endpoint = custom_config.get(constants.ENDPOINT_ARGS_KEY) if endpoint and 'regions' in ai_platform_serving_args: raise ValueError( '`endpoint` and `ai_platform_serving_args.regions` cannot be set simultaneously' ) api = discovery.build( service_name, api_version, requestBuilder=telemetry_utils.TFXHttpRequest, client_options=client_options.ClientOptions(api_endpoint=endpoint), ) new_model_endpoint_created = False try: new_model_endpoint_created = runner.create_model_for_aip_prediction_if_not_exist( job_labels, ai_platform_serving_args, api) runner.deploy_model_for_aip_prediction( serving_path=model_path, model_version_name=model_version, ai_platform_serving_args=ai_platform_serving_args, api=api, labels=job_labels, skip_model_endpoint_creation=True, set_default=False, ) self._run_model_inference(data_spec, output_example_spec, input_dict['examples'], output_examples, inference_result, inference_spec) except Exception as e: logging.error( 'Error in executing CloudAIBulkInferrerComponent: %s', str(e)) raise finally: # Guarantee newly created resources are cleaned up even if the inference # job failed. # Clean up the newly deployed model. runner.delete_model_from_aip_if_exists( model_version_name=model_version, ai_platform_serving_args=ai_platform_serving_args, api=api, delete_model_endpoint=new_model_endpoint_created)