def testProtoExecPropertyMessageFieldTextFormat(self): # Access a message type proto field placeholder_expression = """ operator { proto_op { expression { placeholder { type: EXEC_PROPERTY key: "proto_property" } } proto_schema { message_type: "tfx.components.infra_validator.ServingSpec" } proto_field_path: ".tensorflow_serving" serialization_format: TEXT_FORMAT } } """ pb = text_format.Parse(placeholder_expression, placeholder_pb2.PlaceholderExpression()) fd = descriptor_pb2.FileDescriptorProto() infra_validator_pb2.ServingSpec().DESCRIPTOR.file.CopyToProto(fd) pb.operator.proto_op.proto_schema.file_descriptors.file.append(fd) # If proto_field_path points to a message type field, the message will # be rendered using text_format. self.assertEqual( placeholder_utils.resolve_placeholder_expression( pb, self._resolution_context), "tags: \"latest\"\ntags: \"1.15.0-gpu\"\n")
def testProtoExecPropertyPrimitiveField(self): # Access a non-message type proto field placeholder_expression = """ operator { proto_op { expression { placeholder { type: EXEC_PROPERTY key: "proto_property" } } proto_schema { message_type: "tfx.components.infra_validator.ServingSpec" } proto_field_path: ".tensorflow_serving" proto_field_path: ".tags" proto_field_path: "[1]" } } """ pb = text_format.Parse(placeholder_expression, placeholder_pb2.PlaceholderExpression()) # Prepare FileDescriptorSet fd = descriptor_pb2.FileDescriptorProto() infra_validator_pb2.ServingSpec().DESCRIPTOR.file.CopyToProto(fd) pb.operator.proto_op.proto_schema.file_descriptors.file.append(fd) self.assertEqual( placeholder_utils.resolve_placeholder_expression( pb, self._resolution_context), "1.15.0-gpu")
def testProtoRuntimeInfoNoneAccess(self): # Access a missing platform config. placeholder_expression = """ operator { proto_op { expression { placeholder { type: RUNTIME_INFO key: "platform_config" } } proto_schema { message_type: "tfx.components.infra_validator.ServingSpec" } proto_field_path: ".tensorflow_serving" proto_field_path: ".tags" } } """ pb = text_format.Parse(placeholder_expression, placeholder_pb2.PlaceholderExpression()) # Prepare FileDescriptorSet fd = descriptor_pb2.FileDescriptorProto() infra_validator_pb2.ServingSpec().DESCRIPTOR.file.CopyToProto(fd) pb.operator.proto_op.proto_schema.file_descriptors.file.append(fd) self.assertIsNone( placeholder_utils.resolve_placeholder_expression( pb, self._none_resolution_context))
def testProtoExecPropertyInvalidField(self): # Access a repeated field. placeholder_expression = """ operator { proto_op { expression { placeholder { type: EXEC_PROPERTY key: "proto_property" } } proto_schema { message_type: "tfx.components.infra_validator.ServingSpec" } proto_field_path: ".some_invalid_field" } } """ pb = text_format.Parse(placeholder_expression, placeholder_pb2.PlaceholderExpression()) # Prepare FileDescriptorSet fd = descriptor_pb2.FileDescriptorProto() infra_validator_pb2.ServingSpec().DESCRIPTOR.file.CopyToProto(fd) pb.operator.proto_op.proto_schema.file_descriptors.file.append(fd) with self.assertRaises(AttributeError): placeholder_utils.resolve_placeholder_expression( pb, self._resolution_context)
def setUp(self): super(PlaceholderUtilsTest, self).setUp() examples = [standard_artifacts.Examples()] examples[0].uri = "/tmp" examples[0].split_names = artifact_utils.encode_split_names( ["train", "eval"]) serving_spec = infra_validator_pb2.ServingSpec() serving_spec.tensorflow_serving.tags.extend(["latest", "1.15.0-gpu"]) self._resolution_context = placeholder_utils.ResolutionContext( exec_info=data_types.ExecutionInfo( input_dict={ "model": [standard_artifacts.Model()], "examples": examples, }, output_dict={"blessing": [standard_artifacts.ModelBlessing()]}, exec_properties={ "proto_property": json_format.MessageToJson( message=serving_spec, sort_keys=True, preserving_proto_field_name=True, indent=0) }, execution_output_uri="test_executor_output_uri", stateful_working_dir="test_stateful_working_dir", pipeline_node=pipeline_pb2.PipelineNode( node_info=pipeline_pb2.NodeInfo( type=metadata_store_pb2.ExecutionType( name="infra_validator"))), pipeline_info=pipeline_pb2.PipelineInfo(id="test_pipeline_id")))
def testProtoWithoutSerializationFormat(self): placeholder_expression = """ operator { proto_op { expression { placeholder { type: EXEC_PROPERTY key: "proto_property" } } proto_schema { message_type: "tfx.components.infra_validator.ServingSpec" } } } """ pb = text_format.Parse(placeholder_expression, placeholder_pb2.PlaceholderExpression()) # Prepare FileDescriptorSet fd = descriptor_pb2.FileDescriptorProto() infra_validator_pb2.ServingSpec().DESCRIPTOR.file.CopyToProto(fd) pb.operator.proto_op.proto_schema.file_descriptors.file.append(fd) with self.assertRaises(ValueError): placeholder_utils.resolve_placeholder_expression( pb, self._resolution_context)
def get_pipeline(pipeline_def: ftfx.PipelineDef) -> ftfx.PipelineDef: current_dir = os.path.dirname(os.path.realpath(__file__)) user_code_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'model_code.py') logging.info( f'Using {user_code_file} for preprocessing, training and tuning functions' ) return pipeline_def.from_csv(os.path.join(current_dir, 'data')) \ .generate_statistics() \ .infer_schema(infer_feature_shape=True) \ .validate_input_data() \ .preprocess(user_code_file) \ .tune(user_code_file, train_args=trainer_pb2.TrainArgs(num_steps=5), eval_args=trainer_pb2.EvalArgs(num_steps=3)) \ .train(user_code_file, train_args=trainer_pb2.TrainArgs(num_steps=10), eval_args=trainer_pb2.EvalArgs(num_steps=5)) \ .evaluate_model(eval_config=_get_eval_config()) \ .infra_validate(serving_spec=infra_validator_pb2.ServingSpec( tensorflow_serving=infra_validator_pb2.TensorFlowServing( tags=['latest']), local_docker=infra_validator_pb2.LocalDockerConfig() ), request_spec=infra_validator_pb2.RequestSpec( tensorflow_serving=infra_validator_pb2.TensorFlowServingRequestSpec() )) \ .push_to(relative_push_uri='serving') \ .bulk_infer(example_provider_component=ftfx.input_builders.from_csv( uri=os.path.join(current_dir, 'to_infer'), name='bulk_infer_example_gen' )) \ .add_custom_component(name='tips_printer', component=tips_printer_build_fn)
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Contract for running InfraValidator Executor. Args: input_dict: - `model`: Single `Model` artifact that we're validating. - `examples`: `Examples` artifacts to be used for test requests. output_dict: - `blessing`: Single `InfraBlessing` artifact containing the validated result. It is an empty file with the name either of INFRA_BLESSED or INFRA_NOT_BLESSED. exec_properties: - `serving_spec`: Serialized `ServingSpec` configuration. - `validation_spec`: Serialized `ValidationSpec` configuration. - `request_spec`: Serialized `RequestSpec` configuration. """ self._log_startup(input_dict, output_dict, exec_properties) model = artifact_utils.get_single_instance(input_dict[_MODEL_KEY]) blessing = artifact_utils.get_single_instance( output_dict[_BLESSING_KEY]) if input_dict.get(_EXAMPLES_KEY): examples = artifact_utils.get_single_instance( input_dict[_EXAMPLES_KEY]) else: examples = None serving_spec = infra_validator_pb2.ServingSpec() json_format.Parse(exec_properties[_SERVING_SPEC_KEY], serving_spec) if not serving_spec.model_name: serving_spec.model_name = _DEFAULT_MODEL_NAME validation_spec = infra_validator_pb2.ValidationSpec() if exec_properties.get(_VALIDATION_SPEC_KEY): json_format.Parse(exec_properties[_VALIDATION_SPEC_KEY], validation_spec) if not validation_spec.num_tries: validation_spec.num_tries = _DEFAULT_NUM_TRIES if not validation_spec.max_loading_time_seconds: validation_spec.max_loading_time_seconds = _DEFAULT_MAX_LOADING_TIME_SEC if exec_properties.get(_REQUEST_SPEC_KEY): request_spec = infra_validator_pb2.RequestSpec() json_format.Parse(exec_properties[_REQUEST_SPEC_KEY], request_spec) else: request_spec = None with self._InstallGracefulShutdownHandler(): self._Do( model=model, examples=examples, blessing=blessing, serving_spec=serving_spec, validation_spec=validation_spec, request_spec=request_spec, )
def testParseServingBinaries_TensorFlowServing_DefaultImageName(self): spec = infra_validator_pb2.ServingSpec( tensorflow_serving=infra_validator_pb2.TensorFlowServing( tags=['latest'])) result = serving_bins.parse_serving_binaries(spec) self.assertLen(result, 1) self.assertIsInstance(result[0], serving_bins.TensorFlowServing) self.assertEqual(result[0].image, 'tensorflow/serving:latest')
def _CreateKubernetesRunner(self, k8s_config_dict=None): self._serving_spec = infra_validator_pb2.ServingSpec() json_format.ParseDict({ 'tensorflow_serving': { 'tags': ['1.15.0']}, 'kubernetes': k8s_config_dict or {}, 'model_name': self._model_name, }, self._serving_spec) serving_binary = serving_bins.parse_serving_binaries(self._serving_spec)[0] return kubernetes_runner.KubernetesRunner( model_path=path_utils.serving_model_path(self._model.uri), serving_binary=serving_binary, serving_spec=self._serving_spec)
def testEnableCache(self): model = standard_artifacts.Model() serving_spec = infra_validator_pb2.ServingSpec() validation_spec = infra_validator_pb2.ValidationSpec() infra_validator_1 = component.InfraValidator( model=channel_utils.as_channel([model]), serving_spec=serving_spec, validation_spec=validation_spec) self.assertEqual(None, infra_validator_1.enable_cache) infra_validator_2 = component.InfraValidator( model=channel_utils.as_channel([model]), serving_spec=serving_spec, validation_spec=validation_spec, enable_cache=True) self.assertEqual(True, infra_validator_2.enable_cache)
def testConstruct(self): model = standard_artifacts.Model() serving_spec = infra_validator_pb2.ServingSpec() infra_validator = component.InfraValidator( model=channel_utils.as_channel([model]), serving_spec=serving_spec) # Check channels have been created with proper type. self.assertEqual(standard_artifacts.Model, infra_validator.inputs['model'].type) self.assertEqual(standard_artifacts.InfraBlessing, infra_validator.outputs['blessing'].type) # Check exec_properties have been populated. self.assertEqual( '{}', # Empty dictionary infra_validator.exec_properties['serving_spec'])
def testParseServingBinaries_TensorFlowServing(self): spec = infra_validator_pb2.ServingSpec( tensorflow_serving=infra_validator_pb2.TensorFlowServing( image_name='gcr.io/my_project/my_serving_image', tags=['t1', 't2'], digests=['sha256:d1', 'sha256:d2'])) result = serving_bins.parse_serving_binaries(spec) self.assertLen(result, 4) for item in result: self.assertIsInstance(item, serving_bins.TensorFlowServing) self.assertCountEqual([item.image for item in result], [ 'gcr.io/my_project/my_serving_image:t1', 'gcr.io/my_project/my_serving_image:t2', 'gcr.io/my_project/my_serving_image@sha256:d1', 'gcr.io/my_project/my_serving_image@sha256:d2', ])
def setUp(self): super(PlaceholderUtilsTest, self).setUp() examples = [standard_artifacts.Examples()] examples[0].uri = "/tmp" examples[0].split_names = artifact_utils.encode_split_names( ["train", "eval"]) self._serving_spec = infra_validator_pb2.ServingSpec() self._serving_spec.tensorflow_serving.tags.extend( ["latest", "1.15.0-gpu"]) self._resolution_context = placeholder_utils.ResolutionContext( exec_info=data_types.ExecutionInfo( input_dict={ "model": [standard_artifacts.Model()], "examples": examples, }, output_dict={"blessing": [standard_artifacts.ModelBlessing()]}, exec_properties={ "proto_property": proto_utils.proto_to_json(self._serving_spec) }, execution_output_uri="test_executor_output_uri", stateful_working_dir="test_stateful_working_dir", pipeline_node=pipeline_pb2.PipelineNode( node_info=pipeline_pb2.NodeInfo( type=metadata_store_pb2.ExecutionType( name="infra_validator"))), pipeline_info=pipeline_pb2.PipelineInfo( id="test_pipeline_id")), executor_spec=executable_spec_pb2.PythonClassExecutableSpec( class_path="test_class_path"), ) # Resolution context to simulate missing optional values. self._none_resolution_context = placeholder_utils.ResolutionContext( exec_info=data_types.ExecutionInfo( input_dict={}, output_dict={}, exec_properties={}, pipeline_node=pipeline_pb2.PipelineNode( node_info=pipeline_pb2.NodeInfo( type=metadata_store_pb2.ExecutionType( name="infra_validator"))), pipeline_info=pipeline_pb2.PipelineInfo( id="test_pipeline_id")), executor_spec=None, platform_config=None)
def setUp(self): super(PlaceholderUtilsTest, self).setUp() examples = [standard_artifacts.Examples()] examples[0].uri = "/tmp" examples[0].split_names = artifact_utils.encode_split_names( ["train", "eval"]) serving_spec = infra_validator_pb2.ServingSpec() serving_spec.tensorflow_serving.tags.extend(["latest", "1.15.0-gpu"]) self._resolution_context = placeholder_utils.ResolutionContext( input_dict={ "model": [standard_artifacts.Model()], "examples": examples, }, output_dict={"blessing": [standard_artifacts.ModelBlessing()]}, exec_properties={ "proto_property": serving_spec.SerializeToString(), "double_list_property": [0.7, 0.8, 0.9], })
def setUp(self): super(PlaceholderUtilsTest, self).setUp() examples = [standard_artifacts.Examples()] examples[0].uri = "/tmp" examples[0].split_names = artifact_utils.encode_split_names( ["train", "eval"]) serving_spec = infra_validator_pb2.ServingSpec() serving_spec.tensorflow_serving.tags.extend(["latest", "1.15.0-gpu"]) self._resolution_context = placeholder_utils.ResolutionContext( input_dict={ "model": [standard_artifacts.Model()], "examples": examples, }, output_dict={"blessing": [standard_artifacts.ModelBlessing()]}, exec_properties={ "proto_property": json_format.MessageToJson(message=serving_spec, sort_keys=True, preserving_proto_field_name=True) })
def testConstruct(self): model = standard_artifacts.Model() serving_spec = infra_validator_pb2.ServingSpec() validation_spec = infra_validator_pb2.ValidationSpec() infra_validator = component.InfraValidator( model=channel_utils.as_channel([model]), serving_spec=serving_spec, validation_spec=validation_spec) # Check channels have been created with proper type. self.assertEqual( standard_artifacts.Model, infra_validator.inputs[standard_component_specs.MODEL_KEY].type) self.assertEqual( standard_artifacts.InfraBlessing, infra_validator.outputs[standard_component_specs.BLESSING_KEY].type) # Check exec_properties have been populated. self.assertIn(standard_component_specs.SERVING_SPEC_KEY, infra_validator.exec_properties) self.assertIn(standard_component_specs.VALIDATION_SPEC_KEY, infra_validator.exec_properties)
def testProtoSerializationJSON(self): placeholder_expression = """ operator { proto_op { expression { placeholder { type: EXEC_PROPERTY key: "proto_property" } } proto_schema { message_type: "tfx.components.infra_validator.ServingSpec" } serialization_format: JSON } } """ pb = text_format.Parse(placeholder_expression, placeholder_pb2.PlaceholderExpression()) # Prepare FileDescriptorSet fd = descriptor_pb2.FileDescriptorProto() infra_validator_pb2.ServingSpec().DESCRIPTOR.file.CopyToProto(fd) pb.operator.proto_op.proto_schema.file_descriptors.file.append(fd) expected_json_serialization = """\ { "tensorflow_serving": { "tags": [ "latest", "1.15.0-gpu" ] } }""" self.assertEqual( placeholder_utils.resolve_placeholder_expression( pb, self._resolution_context), expected_json_serialization)
def create_e2e_components( pipeline_root: Text, csv_input_location: Text, transform_module: Text, trainer_module: Text, ) -> List[BaseComponent]: """Creates components for a simple Chicago Taxi TFX pipeline for testing. Args: pipeline_root: The root of the pipeline output. csv_input_location: The location of the input data directory. transform_module: The location of the transform module file. trainer_module: The location of the trainer module file. Returns: A list of TFX components that constitutes an end-to-end test pipeline. """ example_gen = CsvExampleGen(input_base=csv_input_location) statistics_gen = StatisticsGen(examples=example_gen.outputs['examples']) schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics']) example_validator = ExampleValidator( statistics=statistics_gen.outputs['statistics'], schema=schema_gen.outputs['schema']) transform = Transform(examples=example_gen.outputs['examples'], schema=schema_gen.outputs['schema'], module_file=transform_module) latest_model_resolver = resolver.Resolver( strategy_class=latest_artifact_strategy.LatestArtifactStrategy, latest_model=Channel(type=Model)).with_id('latest_model_resolver') trainer = Trainer( transformed_examples=transform.outputs['transformed_examples'], schema=schema_gen.outputs['schema'], base_model=latest_model_resolver.outputs['latest_model'], transform_graph=transform.outputs['transform_graph'], train_args=trainer_pb2.TrainArgs(num_steps=10), eval_args=trainer_pb2.EvalArgs(num_steps=5), module_file=trainer_module, ) # Set the TFMA config for Model Evaluation and Validation. eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(signature_name='eval')], metrics_specs=[ tfma.MetricsSpec( metrics=[tfma.MetricConfig(class_name='ExampleCount')], thresholds={ 'accuracy': tfma.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': 0.5}), change_threshold=tfma.GenericChangeThreshold( direction=tfma.MetricDirection.HIGHER_IS_BETTER, absolute={'value': -1e-10})) }) ], slicing_specs=[ tfma.SlicingSpec(), tfma.SlicingSpec(feature_keys=['trip_start_hour']) ]) evaluator = Evaluator(examples=example_gen.outputs['examples'], model=trainer.outputs['model'], eval_config=eval_config) infra_validator = InfraValidator( model=trainer.outputs['model'], examples=example_gen.outputs['examples'], serving_spec=infra_validator_pb2.ServingSpec( tensorflow_serving=infra_validator_pb2.TensorFlowServing( tags=['latest']), kubernetes=infra_validator_pb2.KubernetesConfig()), request_spec=infra_validator_pb2.RequestSpec( tensorflow_serving=infra_validator_pb2. TensorFlowServingRequestSpec())) pusher = Pusher( model=trainer.outputs['model'], model_blessing=evaluator.outputs['blessing'], push_destination=pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=os.path.join(pipeline_root, 'model_serving')))) return [ example_gen, statistics_gen, schema_gen, example_validator, transform, latest_model_resolver, trainer, evaluator, infra_validator, pusher, ]
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text, module_file: Text, serving_model_dir: Text, metadata_path: Text, beam_pipeline_args: List[Text]) -> pipeline.Pipeline: """Implements the chicago taxi pipeline with TFX.""" # Brings data into the pipeline or otherwise joins/converts training data. example_gen = CsvExampleGen(input_base=data_root) # Computes statistics over data for visualization and example validation. statistics_gen = StatisticsGen(examples=example_gen.outputs['examples']) # Generates schema based on statistics files. schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=False) # Performs anomaly detection based on statistics and data schema. example_validator = ExampleValidator( statistics=statistics_gen.outputs['statistics'], schema=schema_gen.outputs['schema']) # Performs transformations and feature engineering in training and serving. transform = Transform(examples=example_gen.outputs['examples'], schema=schema_gen.outputs['schema'], module_file=module_file) # Uses user-provided Python function that implements a model using TF-Learn. trainer = Trainer( module_file=module_file, transformed_examples=transform.outputs['transformed_examples'], schema=schema_gen.outputs['schema'], transform_graph=transform.outputs['transform_graph'], train_args=trainer_pb2.TrainArgs(num_steps=10000), eval_args=trainer_pb2.EvalArgs(num_steps=5000)) # Get the latest blessed model for model validation. model_resolver = ResolverNode( instance_name='latest_blessed_model_resolver', resolver_class=latest_blessed_model_resolver. LatestBlessedModelResolver, model=Channel(type=Model), model_blessing=Channel(type=ModelBlessing)) # Uses TFMA to compute a evaluation statistics over features of a model and # perform quality validation of a candidate model (compared to a baseline). eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(signature_name='eval')], slicing_specs=[ tfma.SlicingSpec(), tfma.SlicingSpec(feature_keys=['trip_start_hour']) ], metrics_specs=[ tfma.MetricsSpec( thresholds={ 'accuracy': tfma.config.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': 0.6}), # Change threshold will be ignored if there is no # baseline model resolved from MLMD (first run). change_threshold=tfma.GenericChangeThreshold( direction=tfma.MetricDirection.HIGHER_IS_BETTER, absolute={'value': -1e-10})) }) ]) evaluator = Evaluator(examples=example_gen.outputs['examples'], model=trainer.outputs['model'], baseline_model=model_resolver.outputs['model'], eval_config=eval_config) # Performs infra validation of a candidate model to prevent unservable model # from being pushed. infra_validator = InfraValidator( model=trainer.outputs['model'], examples=example_gen.outputs['examples'], serving_spec=infra_validator_pb2.ServingSpec( tensorflow_serving=infra_validator_pb2.TensorFlowServing( tags=['latest']), local_docker=infra_validator_pb2.LocalDockerConfig()), request_spec=infra_validator_pb2.RequestSpec( tensorflow_serving=infra_validator_pb2. TensorFlowServingRequestSpec())) # Checks whether the model passed the validation steps and pushes the model # to a file destination if check passed. pusher = Pusher(model=trainer.outputs['model'], model_blessing=evaluator.outputs['blessing'], infra_blessing=infra_validator.outputs['blessing'], push_destination=pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=serving_model_dir))) return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=[ example_gen, statistics_gen, schema_gen, example_validator, transform, trainer, model_resolver, evaluator, infra_validator, pusher, ], enable_cache=True, metadata_connection_config=metadata.sqlite_metadata_connection_config( metadata_path), beam_pipeline_args=beam_pipeline_args)
def create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root_uri: data_types.RuntimeParameter, train_steps: data_types.RuntimeParameter, eval_steps: data_types.RuntimeParameter, enable_tuning: bool, ai_platform_training_args: Dict[Text, Text], ai_platform_serving_args: Dict[Text, Text], beam_pipeline_args: List[Text], enable_cache: Optional[bool] = False) -> pipeline.Pipeline: """Trains and deploys the Keras Covertype Classifier with TFX and Kubeflow Pipeline on Google Cloud. Args: pipeline_name: name of the TFX pipeline being created. pipeline_root: root directory of the pipeline. Should be a valid GCS path. data_root_uri: uri of the dataset. train_steps: runtime parameter for number of model training steps for the Trainer component. eval_steps: runtime parameter for number of model evaluation steps for the Trainer component. enable_tuning: If True, the hyperparameter tuning through CloudTuner is enabled. ai_platform_training_args: Args of CAIP training job. Please refer to https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#Job for detailed description. ai_platform_serving_args: Args of CAIP model deployment. Please refer to https://cloud.google.com/ml-engine/reference/rest/v1/projects.models for detailed description. beam_pipeline_args: Optional list of beam pipeline options. Please refer to https://cloud.google.com/dataflow/docs/guides/specifying-exec-params#setting-other-cloud-dataflow-pipeline-options. When this argument is not provided, the default is to use GCP DataflowRunner with 50GB disk size as specified in this function. If an empty list is passed in, default specified by Beam will be used, which can be found at https://cloud.google.com/dataflow/docs/guides/specifying-exec-params#setting-other-cloud-dataflow-pipeline-options enable_cache: Optional boolean Returns: A TFX pipeline object. """ # Brings data into the pipeline and splits the data into training and eval splits output = example_gen_pb2.Output(split_config=example_gen_pb2.SplitConfig( splits=[ example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=4), example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=1) ])) examplegen = CsvExampleGen(input_base=data_root_uri, output_config=output) # Computes statistics over data for visualization and example validation. statisticsgen = StatisticsGen(examples=examplegen.outputs.examples) # Generates schema based on statistics files. Even though, we use user-provided schema # we still want to generate the schema of the newest data for tracking and comparison schemagen = SchemaGen(statistics=statisticsgen.outputs.statistics) # Import a user-provided schema import_schema = ImporterNode(instance_name='import_user_schema', source_uri=SCHEMA_FOLDER, artifact_type=Schema) # Performs anomaly detection based on statistics and data schema. examplevalidator = ExampleValidator( statistics=statisticsgen.outputs.statistics, schema=import_schema.outputs.result) # Performs transformations and feature engineering in training and serving. transform = Transform(examples=examplegen.outputs.examples, schema=import_schema.outputs.result, module_file=TRANSFORM_MODULE_FILE) # Tunes the hyperparameters for model training based on user-provided Python # function. Note that once the hyperparameters are tuned, you can drop the # Tuner component from pipeline and feed Trainer with tuned hyperparameters. if enable_tuning: # The Tuner component launches 1 AI Platform Training job for flock management. # For example, 3 workers (defined by num_parallel_trials) in the flock # management AI Platform Training job, each runs Tuner.Executor. tuner = Tuner( module_file=TRAIN_MODULE_FILE, examples=transform.outputs.transformed_examples, transform_graph=transform.outputs.transform_graph, train_args={'num_steps': train_steps}, eval_args={'num_steps': eval_steps}, tune_args=tuner_pb2.TuneArgs( # num_parallel_trials=3 means that 3 search loops are running in parallel. num_parallel_trials=3), custom_config={ # Configures Cloud AI Platform-specific configs. For details, see # https://cloud.google.com/ai-platform/training/docs/reference/rest/v1/projects.jobs#traininginput. ai_platform_trainer_executor.TRAINING_ARGS_KEY: ai_platform_training_args }) # Trains the model using a user provided trainer function. trainer = Trainer( custom_executor_spec=executor_spec.ExecutorClassSpec( ai_platform_trainer_executor.GenericExecutor), module_file=TRAIN_MODULE_FILE, transformed_examples=transform.outputs.transformed_examples, schema=import_schema.outputs.result, transform_graph=transform.outputs.transform_graph, hyperparameters=(tuner.outputs.best_hyperparameters if enable_tuning else None), train_args={'num_steps': train_steps}, eval_args={'num_steps': eval_steps}, custom_config={'ai_platform_training_args': ai_platform_training_args}) # Get the latest blessed model for model validation. resolver = ResolverNode(instance_name='latest_blessed_model_resolver', resolver_class=latest_blessed_model_resolver. LatestBlessedModelResolver, model=Channel(type=Model), model_blessing=Channel(type=ModelBlessing)) # Uses TFMA to compute a evaluation statistics over features of a model. accuracy_threshold = tfma.MetricThreshold( value_threshold=tfma.GenericValueThreshold(lower_bound={'value': 0.5}, upper_bound={'value': 0.99}), ) metrics_specs = tfma.MetricsSpec(metrics=[ tfma.MetricConfig(class_name='SparseCategoricalAccuracy', threshold=accuracy_threshold), tfma.MetricConfig(class_name='ExampleCount') ]) eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(label_key='Cover_Type')], metrics_specs=[metrics_specs], slicing_specs=[ tfma.SlicingSpec(), tfma.SlicingSpec(feature_keys=['Wilderness_Area']) ]) evaluator = Evaluator(examples=examplegen.outputs.examples, model=trainer.outputs.model, baseline_model=resolver.outputs.model, eval_config=eval_config) # Validate model can be loaded and queried in sand-boxed environment # mirroring production. serving_config = infra_validator_pb2.ServingSpec( tensorflow_serving=infra_validator_pb2.TensorFlowServing( tags=['latest']), kubernetes=infra_validator_pb2.KubernetesConfig(), ) validation_config = infra_validator_pb2.ValidationSpec( max_loading_time_seconds=60, num_tries=3, ) request_config = infra_validator_pb2.RequestSpec( tensorflow_serving=infra_validator_pb2.TensorFlowServingRequestSpec(), num_examples=3, ) infravalidator = InfraValidator( model=trainer.outputs.model, examples=examplegen.outputs.examples, serving_spec=serving_config, validation_spec=validation_config, request_spec=request_config, ) # Checks whether the model passed the validation steps and pushes the model # to CAIP Prediction if checks are passed. pusher = Pusher(custom_executor_spec=executor_spec.ExecutorClassSpec( ai_platform_pusher_executor.Executor), model=trainer.outputs.model, model_blessing=evaluator.outputs.blessing, infra_blessing=infravalidator.outputs.blessing, custom_config={ ai_platform_pusher_executor.SERVING_ARGS_KEY: ai_platform_serving_args }) components = [ examplegen, statisticsgen, schemagen, import_schema, examplevalidator, transform, trainer, resolver, evaluator, infravalidator, pusher ] if enable_tuning: components.append(tuner) return pipeline.Pipeline(pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=components, enable_cache=enable_cache, beam_pipeline_args=beam_pipeline_args)
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text, module_file: Text, accuracy_threshold: float, serving_model_dir: Text, metadata_path: Text, beam_pipeline_args: List[Text], make_warmup: bool) -> pipeline.Pipeline: """Implements the penguin pipeline with TFX.""" # Brings data into the pipeline or otherwise joins/converts training data. example_gen = CsvExampleGen(input_base=data_root) # Computes statistics over data for visualization and example validation. statistics_gen = StatisticsGen(examples=example_gen.outputs['examples']) # Generates schema based on statistics files. schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True) # Performs anomaly detection based on statistics and data schema. example_validator = ExampleValidator( statistics=statistics_gen.outputs['statistics'], schema=schema_gen.outputs['schema']) # Performs transformations and feature engineering in training and serving. transform = Transform(examples=example_gen.outputs['examples'], schema=schema_gen.outputs['schema'], module_file=module_file) # Uses user-provided Python function that trains a model using TF-Learn. trainer = Trainer(module_file=module_file, examples=transform.outputs['transformed_examples'], transform_graph=transform.outputs['transform_graph'], schema=schema_gen.outputs['schema'], train_args=trainer_pb2.TrainArgs(num_steps=2000), eval_args=trainer_pb2.EvalArgs(num_steps=5)) # Get the latest blessed model for model validation. model_resolver = ResolverNode( instance_name='latest_blessed_model_resolver', resolver_class=latest_blessed_model_resolver. LatestBlessedModelResolver, model=Channel(type=Model), model_blessing=Channel(type=ModelBlessing)) # Uses TFMA to compute evaluation statistics over features of a model and # perform quality validation of a candidate model (compared to a baseline). eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(label_key='species')], slicing_specs=[tfma.SlicingSpec()], metrics_specs=[ tfma.MetricsSpec(metrics=[ tfma.MetricConfig( class_name='SparseCategoricalAccuracy', threshold=tfma.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': accuracy_threshold}), # Change threshold will be ignored if there is no # baseline model resolved from MLMD (first run). change_threshold=tfma.GenericChangeThreshold( direction=tfma.MetricDirection.HIGHER_IS_BETTER, absolute={'value': -1e-10}))) ]) ]) evaluator = Evaluator(examples=example_gen.outputs['examples'], model=trainer.outputs['model'], baseline_model=model_resolver.outputs['model'], eval_config=eval_config) # Performs infra validation of a candidate model to prevent unservable model # from being pushed. This config will launch a model server of the latest # TensorFlow Serving image in a local docker engine. infra_validator = InfraValidator( model=trainer.outputs['model'], examples=example_gen.outputs['examples'], serving_spec=infra_validator_pb2.ServingSpec( tensorflow_serving=infra_validator_pb2.TensorFlowServing( tags=['latest']), local_docker=infra_validator_pb2.LocalDockerConfig()), request_spec=infra_validator_pb2.RequestSpec( tensorflow_serving=infra_validator_pb2. TensorFlowServingRequestSpec(), # If this flag is set, InfraValidator will produce a model with # warmup requests (in its outputs['blessing']). make_warmup=make_warmup)) # Checks whether the model passed the validation steps and pushes the model # to a file destination if check passed. if make_warmup: # If InfraValidator.request_spec.make_warmup = True, its output contains # a model so that Pusher can push 'infra_blessing' input instead of # 'model' input. pusher = Pusher(model_blessing=evaluator.outputs['blessing'], infra_blessing=infra_validator.outputs['blessing'], push_destination=pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=serving_model_dir))) else: # Otherwise, 'infra_blessing' does not contain a model and is used as a # conditional checker just like 'model_blessing' does. This is the typical # use case. pusher = Pusher(model=trainer.outputs['model'], model_blessing=evaluator.outputs['blessing'], infra_blessing=infra_validator.outputs['blessing'], push_destination=pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=serving_model_dir))) return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=[ example_gen, statistics_gen, schema_gen, example_validator, transform, trainer, model_resolver, evaluator, infra_validator, pusher, ], enable_cache=True, metadata_connection_config=metadata.sqlite_metadata_connection_config( metadata_path), beam_pipeline_args=beam_pipeline_args)
def create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root_uri: data_types.RuntimeParameter, train_steps: data_types.RuntimeParameter, eval_steps: data_types.RuntimeParameter, ai_platform_training_args: Dict[Text, Text], ai_platform_serving_args: Dict[Text, Text], beam_pipeline_args: List[Text], enable_cache: Optional[bool] = False) -> pipeline.Pipeline: """Trains and deploys the Covertype classifier.""" # Brings data into the pipeline and splits the data into training and eval splits examples = external_input(data_root_uri) output_config = example_gen_pb2.Output( split_config=example_gen_pb2.SplitConfig(splits=[ example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=4), example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=1) ])) generate_examples = CsvExampleGen(input=examples) # Computes statistics over data for visualization and example validation. generate_statistics = StatisticsGen( examples=generate_examples.outputs.examples) # Import a user-provided schema import_schema = ImporterNode(instance_name='import_user_schema', source_uri=SCHEMA_FOLDER, artifact_type=Schema) # Generates schema based on statistics files.Even though, we use user-provided schema # we still want to generate the schema of the newest data for tracking and comparison infer_schema = SchemaGen(statistics=generate_statistics.outputs.statistics) # Performs anomaly detection based on statistics and data schema. validate_stats = ExampleValidator( statistics=generate_statistics.outputs.statistics, schema=import_schema.outputs.result) # Performs transformations and feature engineering in training and serving. transform = Transform(examples=generate_examples.outputs.examples, schema=import_schema.outputs.result, module_file=TRANSFORM_MODULE_FILE) # Trains the model using a user provided trainer function. train = Trainer( custom_executor_spec=executor_spec.ExecutorClassSpec( ai_platform_trainer_executor.GenericExecutor), # custom_executor_spec=executor_spec.ExecutorClassSpec(trainer_executor.GenericExecutor), module_file=TRAIN_MODULE_FILE, transformed_examples=transform.outputs.transformed_examples, schema=import_schema.outputs.result, transform_graph=transform.outputs.transform_graph, train_args={'num_steps': train_steps}, eval_args={'num_steps': eval_steps}, custom_config={'ai_platform_training_args': ai_platform_training_args}) # Get the latest blessed model for model validation. resolve = ResolverNode(instance_name='latest_blessed_model_resolver', resolver_class=latest_blessed_model_resolver. LatestBlessedModelResolver, model=Channel(type=Model), model_blessing=Channel(type=ModelBlessing)) # Uses TFMA to compute a evaluation statistics over features of a model. accuracy_threshold = tfma.MetricThreshold( value_threshold=tfma.GenericValueThreshold(lower_bound={'value': 0.5}, upper_bound={'value': 0.99}), change_threshold=tfma.GenericChangeThreshold( absolute={'value': 0.0001}, direction=tfma.MetricDirection.HIGHER_IS_BETTER), ) metrics_specs = tfma.MetricsSpec(metrics=[ tfma.MetricConfig(class_name='SparseCategoricalAccuracy', threshold=accuracy_threshold), tfma.MetricConfig(class_name='ExampleCount') ]) eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(label_key='Cover_Type')], metrics_specs=[metrics_specs], slicing_specs=[ tfma.SlicingSpec(), tfma.SlicingSpec(feature_keys=['Wilderness_Area']) ]) analyze = Evaluator(examples=generate_examples.outputs.examples, model=train.outputs.model, baseline_model=resolve.outputs.model, eval_config=eval_config) # Validate model can be loaded and queried in sand-boxed environment # mirroring production. serving_config = infra_validator_pb2.ServingSpec( tensorflow_serving=infra_validator_pb2.TensorFlowServing( tags=['latest']), kubernetes=infra_validator_pb2.KubernetesConfig(), ) validation_config = infra_validator_pb2.ValidationSpec( max_loading_time_seconds=60, num_tries=3, ) request_config = infra_validator_pb2.RequestSpec( tensorflow_serving=infra_validator_pb2.TensorFlowServingRequestSpec(), num_examples=3, ) infra_validate = InfraValidator( model=train.outputs['model'], examples=generate_examples.outputs['examples'], serving_spec=serving_config, validation_spec=validation_config, request_spec=request_config, ) # Checks whether the model passed the validation steps and pushes the model # to a file destination if check passed. deploy = Pusher(custom_executor_spec=executor_spec.ExecutorClassSpec( ai_platform_pusher_executor.Executor), model=train.outputs['model'], model_blessing=analyze.outputs['blessing'], infra_blessing=infra_validate.outputs['blessing'], custom_config={ ai_platform_pusher_executor.SERVING_ARGS_KEY: ai_platform_serving_args }) return pipeline.Pipeline(pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=[ generate_examples, generate_statistics, import_schema, infer_schema, validate_stats, transform, train, resolve, analyze, infra_validate, deploy ], enable_cache=enable_cache, beam_pipeline_args=beam_pipeline_args)
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Contract for running InfraValidator Executor. Args: input_dict: - `model`: Single `Model` artifact that we're validating. - `examples`: `Examples` artifacts to be used for test requests. output_dict: - `blessing`: Single `InfraBlessing` artifact containing the validated result. It is an empty file with the name either of INFRA_BLESSED or INFRA_NOT_BLESSED. exec_properties: - `serving_spec`: Serialized `ServingSpec` configuration. - `validation_spec`: Serialized `ValidationSpec` configuration. - `request_spec`: Serialized `RequestSpec` configuration. """ self._log_startup(input_dict, output_dict, exec_properties) model = artifact_utils.get_single_instance(input_dict['model']) blessing = artifact_utils.get_single_instance(output_dict['blessing']) serving_spec = infra_validator_pb2.ServingSpec() json_format.Parse(exec_properties['serving_spec'], serving_spec) if not serving_spec.model_name: serving_spec.model_name = _DEFAULT_MODEL_NAME validation_spec = infra_validator_pb2.ValidationSpec() if 'validation_spec' in exec_properties: json_format.Parse(exec_properties['validation_spec'], validation_spec) if not validation_spec.num_tries: validation_spec.num_tries = _DEFAULT_NUM_TRIES if not validation_spec.max_loading_time_seconds: validation_spec.max_loading_time_seconds = _DEFAULT_MAX_LOADING_TIME_SEC if _is_query_mode(input_dict, exec_properties): logging.info('InfraValidator will be run in LOAD_AND_QUERY mode.') request_spec = infra_validator_pb2.RequestSpec() json_format.Parse(exec_properties['request_spec'], request_spec) examples = artifact_utils.get_single_instance( input_dict['examples']) requests = request_builder.build_requests( model_name=os.path.basename( os.path.dirname(path_utils.serving_model_path(model.uri))), examples=examples, request_spec=request_spec) else: logging.info('InfraValidator will be run in LOAD_ONLY mode.') requests = [] model_path = self._PrepareModelPath(model.uri, serving_spec) try: # TODO(jjong): Make logic parallel. all_passed = True for serving_binary in serving_bins.parse_serving_binaries( serving_spec): all_passed &= self._ValidateWithRetry( model_path=model_path, serving_binary=serving_binary, serving_spec=serving_spec, validation_spec=validation_spec, requests=requests) finally: io_utils.delete_dir(self._get_tmp_dir()) if all_passed: _mark_blessed(blessing) else: _mark_not_blessed(blessing)
def create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root_uri, trainer_config: TrainerConfig, tuner_config: TunerConfig, pusher_config: PusherConfig, runtime_parameters_config: RuntimeParametersConfig = None, str_runtime_parameters_supported = False, int_runtime_parameters_supported = False, local_run: bool = True, beam_pipeline_args: Optional[List[Text]] = None, enable_cache: Optional[bool] = True, code_folder = '', metadata_connection_config: Optional[metadata_store_pb2.ConnectionConfig] = None ) -> pipeline.Pipeline: """Trains and deploys the Keras Titanic Classifier with TFX and Kubeflow Pipeline on Google Cloud. Args: pipeline_name: name of the TFX pipeline being created. pipeline_root: root directory of the pipeline. Should be a valid GCS path. data_root_uri: uri of the dataset. train_steps: runtime parameter for number of model training steps for the Trainer component. eval_steps: runtime parameter for number of model evaluation steps for the Trainer component. enable_tuning: If True, the hyperparameter tuning through CloudTuner is enabled. ai_platform_training_args: Args of CAIP training job. Please refer to https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#Job for detailed description. ai_platform_serving_args: Args of CAIP model deployment. Please refer to https://cloud.google.com/ml-engine/reference/rest/v1/projects.models for detailed description. beam_pipeline_args: Optional list of beam pipeline options. Please refer to https://cloud.google.com/dataflow/docs/guides/specifying-exec-params#setting-other-cloud-dataflow-pipeline-options. When this argument is not provided, the default is to use GCP DataflowRunner with 50GB disk size as specified in this function. If an empty list is passed in, default specified by Beam will be used, which can be found at https://cloud.google.com/dataflow/docs/guides/specifying-exec-params#setting-other-cloud-dataflow-pipeline-options enable_cache: Optional boolean Returns: A TFX pipeline object. """ #pydevd_pycharm.settrace('localhost', port=9091, stdoutToServer=True, stderrToServer=True) absl.logging.info('pipeline_name: %s' % pipeline_name) absl.logging.info('pipeline root: %s' % pipeline_root) absl.logging.info('data_root_uri for training: %s' % data_root_uri) absl.logging.info('train_steps for training: %s' % trainer_config.train_steps) absl.logging.info('tuner_steps for tuning: %s' % tuner_config.tuner_steps) absl.logging.info('eval_steps for evaluating: %s' % trainer_config.eval_steps) absl.logging.info('os default list dir: %s' % os.listdir('.')) schema_proper_folder = os.path.join(os.sep, code_folder, SCHEMA_FOLDER) absl.logging.info('schema_proper_folder: %s' % schema_proper_folder) preprocessing_proper_file = os.path.join(os.sep, code_folder, TRANSFORM_MODULE_FILE) absl.logging.info('preprocessing_proper_file: %s' % preprocessing_proper_file) model_proper_file = os.path.join(os.sep, code_folder, TRAIN_MODULE_FILE) absl.logging.info('model_proper_file: %s' % model_proper_file) hyperparameters_proper_folder = os.path.join(os.sep, code_folder, HYPERPARAMETERS_FOLDER) absl.logging.info('hyperparameters_proper_folder: %s' % hyperparameters_proper_folder) # Brings data into the pipeline and splits the data into training and eval splits output_config = example_gen_pb2.Output( split_config=example_gen_pb2.SplitConfig(splits=[ example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=4), example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=1) ])) # examples = external_input(data_root_uri) if str_runtime_parameters_supported and runtime_parameters_config is not None: data_root_uri = runtime_parameters_config.data_root_runtime examplegen = CsvExampleGen(input_base=data_root_uri, output_config=output_config) # examplegen = CsvExampleGen(input_base=data_root_uri) # Computes statistics over data for visualization and example validation. statisticsgen = StatisticsGen(examples=examplegen.outputs.examples) # Generates schema based on statistics files. Even though, we use user-provided schema # we still want to generate the schema of the newest data for tracking and comparison schemagen = SchemaGen(statistics=statisticsgen.outputs.statistics) # Import a user-provided schema import_schema = Importer( source_uri=schema_proper_folder, artifact_type=Schema).with_id('import_user_schema') # Performs anomaly detection based on statistics and data schema. examplevalidator = ExampleValidator( statistics=statisticsgen.outputs.statistics, schema=import_schema.outputs.result) # Performs transformations and feature engineering in training and serving. transform = Transform( examples=examplegen.outputs.examples, schema=import_schema.outputs.result, module_file=preprocessing_proper_file) # Tunes the hyperparameters for model training based on user-provided Python # function. Note that once the hyperparameters are tuned, you can drop the # Tuner component from pipeline and feed Trainer with tuned hyperparameters. hparams_importer = Importer( source_uri=hyperparameters_proper_folder, artifact_type=HyperParameters).with_id('import_hparams') # apparently only str RuntimeParameters are supported in airflow :/ if int_runtime_parameters_supported and runtime_parameters_config is not None: train_steps = runtime_parameters_config.train_steps_runtime eval_steps = runtime_parameters_config.eval_steps_runtime else: train_steps = trainer_config.train_steps eval_steps = trainer_config.eval_steps absl.logging.info('train_steps: %s' % train_steps) absl.logging.info('eval_steps: %s' % eval_steps) if tuner_config.enable_tuning: tuner_args = { 'module_file': model_proper_file, 'examples': transform.outputs.transformed_examples, 'transform_graph': transform.outputs.transform_graph, 'train_args': {'num_steps': tuner_config.tuner_steps}, 'eval_args': {'num_steps': tuner_config.eval_tuner_steps}, 'custom_config': {'max_trials': tuner_config.max_trials, 'is_local_run': local_run} # 'tune_args': tuner_pb2.TuneArgs(num_parallel_trials=3), } if tuner_config.ai_platform_tuner_args is not None: tuner_args.update({ 'custom_config': { ai_platform_trainer_executor.TRAINING_ARGS_KEY: tuner_config.ai_platform_tuner_args }, 'tune_args': tuner_pb2.TuneArgs(num_parallel_trials=3) }) absl.logging.info("tuner_args: " + str(tuner_args)) tuner = Tuner(**tuner_args) hyperparameters = tuner.outputs.best_hyperparameters if tuner_config.enable_tuning else hparams_importer.outputs['result'] # Trains the model using a user provided trainer function. trainer_args = { 'module_file': model_proper_file, 'transformed_examples': transform.outputs.transformed_examples, 'schema': import_schema.outputs.result, 'transform_graph': transform.outputs.transform_graph, # train_args={'num_steps': train_steps}, 'train_args': {'num_steps': train_steps}, 'eval_args': {'num_steps': eval_steps}, #'hyperparameters': tuner.outputs.best_hyperparameters if tunerConfig.enable_tuning else None, 'hyperparameters': hyperparameters, 'custom_config': {'epochs': trainer_config.epochs, 'train_batch_size': trainer_config.train_batch_size, 'eval_batch_size': trainer_config.eval_batch_size, } } if trainer_config.ai_platform_training_args is not None: trainer_args['custom_config'].update({ ai_platform_trainer_executor.TRAINING_ARGS_KEY: trainer_config.ai_platform_training_args, }) trainer_args.update({ 'custom_executor_spec': executor_spec.ExecutorClassSpec(ai_platform_trainer_executor.GenericExecutor), # 'custom_config': { # ai_platform_trainer_executor.TRAINING_ARGS_KEY: # ai_platform_training_args, # } }) else: trainer_args.update({ 'custom_executor_spec': executor_spec.ExecutorClassSpec(trainer_executor.GenericExecutor), #executor_spec.ExecutorClassSpec(custom_trainer_executor.CustomGenericExecutor), # for debugging purposes }) trainer = Trainer(**trainer_args) # Get the latest blessed model for model validation. model_resolver = resolver.Resolver( #instance_name='latest_blessed_model_resolver', # instance_name is deprecated, use with_id() strategy_class=latest_blessed_model_resolver.LatestBlessedModelResolver, model=Channel(type=Model), model_blessing=Channel(type=ModelBlessing)\ ).with_id('latest_blessed_model_resolver') # Uses TFMA to compute a evaluation statistics over features of a model. accuracy_threshold = tfma.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': 0.5}, upper_bound={'value': 0.995}), ) metrics_specs = tfma.MetricsSpec( metrics=[ tfma.MetricConfig(class_name='BinaryAccuracy', threshold=accuracy_threshold), tfma.MetricConfig(class_name='ExampleCount')]) eval_config = tfma.EvalConfig( model_specs=[ tfma.ModelSpec(label_key='Survived') ], metrics_specs=[metrics_specs], slicing_specs=[ tfma.SlicingSpec() ,tfma.SlicingSpec(feature_keys=['Sex']) ,tfma.SlicingSpec(feature_keys=['Age']) ,tfma.SlicingSpec(feature_keys=['Parch']) ] ) evaluator = Evaluator( examples=examplegen.outputs.examples, model=trainer.outputs.model, baseline_model=model_resolver.outputs.model, eval_config=eval_config ) # Validate model can be loaded and queried in sand-boxed environment # mirroring production. serving_config = None if local_run: serving_config = infra_validator_pb2.ServingSpec( tensorflow_serving=infra_validator_pb2.TensorFlowServing(tags=['latest']), local_docker=infra_validator_pb2.LocalDockerConfig() # Running on local docker. ) else: serving_config = infra_validator_pb2.ServingSpec( tensorflow_serving=infra_validator_pb2.TensorFlowServing(tags=['latest']), kubernetes=infra_validator_pb2.KubernetesConfig() # Running on K8s. ) validation_config = infra_validator_pb2.ValidationSpec( max_loading_time_seconds=60, num_tries=3, ) request_config = infra_validator_pb2.RequestSpec( tensorflow_serving=infra_validator_pb2.TensorFlowServingRequestSpec(), num_examples=3, ) infravalidator = InfraValidator( model=trainer.outputs.model, examples=examplegen.outputs.examples, serving_spec=serving_config, validation_spec=validation_config, request_spec=request_config, ) # Checks whether the model passed the validation steps and pushes the model # to CAIP Prediction if checks are passed. pusher_args = { 'model': trainer.outputs.model, 'model_blessing': evaluator.outputs.blessing, 'infra_blessing': infravalidator.outputs.blessing } if local_run: pusher_args.update({'push_destination': pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=pusher_config.serving_model_dir))}) if pusher_config.ai_platform_serving_args is not None: pusher_args.update({ 'custom_executor_spec': executor_spec.ExecutorClassSpec(ai_platform_pusher_executor.Executor ), 'custom_config': { ai_platform_pusher_executor.SERVING_ARGS_KEY: pusher_config.ai_platform_serving_args }, }) pusher = Pusher(**pusher_args) # pylint: disable=unused-variable components = [ examplegen, statisticsgen, schemagen, import_schema, examplevalidator, transform, trainer, model_resolver, evaluator, infravalidator, pusher ] if tuner_config.enable_tuning: components.append(tuner) else: components.append(hparams_importer) return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=components, enable_cache=enable_cache, metadata_connection_config=metadata_connection_config, beam_pipeline_args=beam_pipeline_args )
def create_pipeline(pipeline_name: Text, pipeline_root: Text, project_id: Text, bq_dataset_name: Text, min_item_frequency: data_types.RuntimeParameter, max_group_size: data_types.RuntimeParameter, dimensions: data_types.RuntimeParameter, num_leaves: data_types.RuntimeParameter, eval_min_recall: data_types.RuntimeParameter, eval_max_latency: data_types.RuntimeParameter, ai_platform_training_args: Dict[Text, Text], beam_pipeline_args: List[Text], model_regisrty_uri: Text, metadata_connection_config: Optional[ metadata_store_pb2.ConnectionConfig] = None, enable_cache: Optional[bool] = False) -> pipeline.Pipeline: """Implements the online news pipeline with TFX.""" local_executor_spec = executor_spec.ExecutorClassSpec( trainer_executor.GenericExecutor) caip_executor_spec = executor_spec.ExecutorClassSpec( ai_platform_trainer_executor.GenericExecutor) # Compute the PMI. pmi_computer = bq_components.compute_pmi( project_id=project_id, bq_dataset=bq_dataset_name, min_item_frequency=min_item_frequency, max_group_size=max_group_size) # Train the BQML Matrix Factorization model. bqml_trainer = bq_components.train_item_matching_model( project_id=project_id, bq_dataset=bq_dataset_name, item_cooc=pmi_computer.outputs.item_cooc, dimensions=dimensions, ) # Extract the embeddings from the BQML model to a table. embeddings_extractor = bq_components.extract_embeddings( project_id=project_id, bq_dataset=bq_dataset_name, bq_model=bqml_trainer.outputs.bq_model) # Export embeddings from BigQuery to Cloud Storage. embeddings_exporter = BigQueryExampleGen( query=f''' SELECT item_Id, embedding, bias, FROM {bq_dataset_name}.item_embeddings ''', output_config=example_gen_pb2.Output( split_config=example_gen_pb2.SplitConfig(splits=[ example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=1) ]))) # Add dependency from embeddings_exporter to embeddings_extractor. embeddings_exporter.add_upstream_node(embeddings_extractor) # Import embeddings schema. schema_importer = tfx.components.ImporterNode( source_uri=SCHEMA_DIR, artifact_type=tfx.types.standard_artifacts.Schema, instance_name='ImportSchema', ) # Generate stats for the embeddings for validation. stats_generator = tfx.components.StatisticsGen( examples=embeddings_exporter.outputs.examples, ) # Validate the embeddings stats against the schema. stats_validator = tfx.components.ExampleValidator( statistics=stats_generator.outputs.statistics, schema=schema_importer.outputs.result, ) # Create an embedding lookup SavedModel. embedding_lookup_creator = tfx.components.Trainer( custom_executor_spec=local_executor_spec, module_file=LOOKUP_CREATOR_MODULE, train_args={ 'splits': ['train'], 'num_steps': 0 }, eval_args={ 'splits': ['train'], 'num_steps': 0 }, schema=schema_importer.outputs.result, examples=embeddings_exporter.outputs.examples) embedding_lookup_creator.id = 'CreateEmbeddingLookup' # Add dependency from stats_validator to embedding_lookup_creator. embedding_lookup_creator.add_upstream_node(stats_validator) # Infra-validate the embedding lookup model. infra_validator = tfx.components.InfraValidator( model=embedding_lookup_creator.outputs.model, serving_spec=infra_validator_pb2.ServingSpec( tensorflow_serving=infra_validator_pb2.TensorFlowServing( tags=['latest']), local_docker=infra_validator_pb2.LocalDockerConfig(), ), validation_spec=infra_validator_pb2.ValidationSpec( max_loading_time_seconds=60, num_tries=3, )) # Push the embedding lookup model to model registry location. embedding_lookup_pusher = tfx.components.Pusher( model=embedding_lookup_creator.outputs.model, infra_blessing=infra_validator.outputs.blessing, push_destination=tfx.proto.pusher_pb2.PushDestination( filesystem=tfx.proto.pusher_pb2.PushDestination.Filesystem( base_directory=os.path.join(model_regisrty_uri, EMBEDDING_LOOKUP_MODEL_NAME)))) embedding_lookup_pusher.id = 'PushEmbeddingLookup' # Build the ScaNN index. scann_indexer = tfx.components.Trainer( custom_executor_spec=caip_executor_spec if ai_platform_training_args else local_executor_spec, module_file=SCANN_INDEXER_MODULE, train_args={ 'splits': ['train'], 'num_steps': num_leaves }, eval_args={ 'splits': ['train'], 'num_steps': 0 }, schema=schema_importer.outputs.result, examples=embeddings_exporter.outputs.examples, custom_config={'ai_platform_training_args': ai_platform_training_args}) scann_indexer.id = 'BuildScaNNIndex' # Add dependency from stats_validator to scann_indexer. scann_indexer.add_upstream_node(stats_validator) # Evaluate and validate the ScaNN index. index_evaluator = scann_evaluator.IndexEvaluator( examples=embeddings_exporter.outputs.examples, schema=schema_importer.outputs.result, model=scann_indexer.outputs.model, min_recall=eval_min_recall, max_latency=eval_max_latency) # Push the ScaNN index to model registry location. scann_index_pusher = tfx.components.Pusher( model=scann_indexer.outputs.model, model_blessing=index_evaluator.outputs.blessing, push_destination=tfx.proto.pusher_pb2.PushDestination( filesystem=tfx.proto.pusher_pb2.PushDestination.Filesystem( base_directory=os.path.join(model_regisrty_uri, SCANN_INDEX_MODEL_NAME)))) scann_index_pusher.id = 'PushScaNNIndex' components = [ pmi_computer, bqml_trainer, embeddings_extractor, embeddings_exporter, schema_importer, stats_generator, stats_validator, embedding_lookup_creator, infra_validator, embedding_lookup_pusher, scann_indexer, index_evaluator, scann_index_pusher ] print('The pipeline consists of the following components:') print([component.id for component in components]) return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=components, beam_pipeline_args=beam_pipeline_args, metadata_connection_config=metadata_connection_config, enable_cache=enable_cache)
def _create_serving_spec(payload: Dict[Text, Any]): result = infra_validator_pb2.ServingSpec() json_format.ParseDict(payload, result) return result
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text, module_file: Text, serving_model_dir: Text, direct_num_workers: int) -> pipeline.Pipeline: """Implements the chicago taxi pipeline with TFX and Kubeflow Pipelines.""" examples = external_input(data_root) # Brings data into the pipeline or otherwise joins/converts training data. example_gen = CsvExampleGen(input=examples) # Computes statistics over data for visualization and example validation. statistics_gen = StatisticsGen(examples=example_gen.outputs['examples']) # Generates schema based on statistics files. schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=False) # Performs anomaly detection based on statistics and data schema. example_validator = ExampleValidator( statistics=statistics_gen.outputs['statistics'], schema=schema_gen.outputs['schema']) # Performs transformations and feature engineering in training and serving. transform = Transform(examples=example_gen.outputs['examples'], schema=schema_gen.outputs['schema'], module_file=module_file) # Uses user-provided Python function that implements a model using TF-Learn # to train a model on Google Cloud AI Platform. trainer = Trainer( module_file=module_file, transformed_examples=transform.outputs['transformed_examples'], schema=schema_gen.outputs['schema'], transform_graph=transform.outputs['transform_graph'], train_args=trainer_pb2.TrainArgs(num_steps=10000), eval_args=trainer_pb2.EvalArgs(num_steps=5000), ) # Get the latest blessed model for model validation. model_resolver = ResolverNode( instance_name='latest_blessed_model_resolver', resolver_class=latest_blessed_model_resolver. LatestBlessedModelResolver, model=Channel(type=Model), model_blessing=Channel(type=ModelBlessing)) # Uses TFMA to compute a evaluation statistics over features of a model and # perform quality validation of a candidate model (compared to a baseline). eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(signature_name='eval')], slicing_specs=[ tfma.SlicingSpec(), tfma.SlicingSpec(feature_keys=['trip_start_hour']) ], metrics_specs=[ tfma.MetricsSpec( thresholds={ 'binary_accuracy': tfma.config.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': 0.6}), change_threshold=tfma.GenericChangeThreshold( direction=tfma.MetricDirection.HIGHER_IS_BETTER, absolute={'value': -1e-10})) }) ]) evaluator = Evaluator( examples=example_gen.outputs['examples'], model=trainer.outputs['model'], baseline_model=model_resolver.outputs['model'], # Change threshold will be ignored if there is no baseline (first run). eval_config=eval_config) # Performs infra validation of a candidate model to prevent unservable model # from being pushed. In order to use InfraValidator component, persistent # volume and its claim that the pipeline is using should be a ReadWriteMany # access mode. infra_validator = InfraValidator( model=trainer.outputs['model'], examples=example_gen.outputs['examples'], serving_spec=infra_validator_pb2.ServingSpec( tensorflow_serving=infra_validator_pb2.TensorFlowServing( tags=['latest']), kubernetes=infra_validator_pb2.KubernetesConfig()), request_spec=infra_validator_pb2.RequestSpec( tensorflow_serving=infra_validator_pb2. TensorFlowServingRequestSpec())) # Checks whether the model passed the validation steps and pushes the model # to Google Cloud AI Platform if check passed. pusher = Pusher(model=trainer.outputs['model'], model_blessing=evaluator.outputs['blessing'], infra_blessing=infra_validator.outputs['blessing'], push_destination=pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=serving_model_dir))) return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=[ example_gen, statistics_gen, schema_gen, example_validator, transform, trainer, model_resolver, evaluator, infra_validator, pusher, ], # TODO(b/142684737): The multi-processing API might change. beam_pipeline_args=['--direct_num_workers=%d' % direct_num_workers], )
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text, module_file: Text, serving_model_dir: Text, metadata_path: Text, direct_num_workers: int) -> pipeline.Pipeline: """Implements the Iris flowers pipeline with TFX.""" examples = external_input(data_root) # Brings data into the pipeline or otherwise joins/converts training data. example_gen = CsvExampleGen(input=examples) # Computes statistics over data for visualization and example validation. statistics_gen = StatisticsGen(examples=example_gen.outputs['examples']) # Generates schema based on statistics files. schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True) # Performs anomaly detection based on statistics and data schema. example_validator = ExampleValidator( statistics=statistics_gen.outputs['statistics'], schema=schema_gen.outputs['schema']) # Performs transformations and feature engineering in training and serving. transform = Transform(examples=example_gen.outputs['examples'], schema=schema_gen.outputs['schema'], module_file=module_file) # Uses user-provided Python function that trains a model using TF-Learn. trainer = Trainer( module_file=module_file, custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor), examples=transform.outputs['transformed_examples'], transform_graph=transform.outputs['transform_graph'], schema=schema_gen.outputs['schema'], train_args=trainer_pb2.TrainArgs(num_steps=2000), eval_args=trainer_pb2.EvalArgs(num_steps=5)) # Get the latest blessed model for model validation. model_resolver = ResolverNode( instance_name='latest_blessed_model_resolver', resolver_class=latest_blessed_model_resolver. LatestBlessedModelResolver, model=Channel(type=Model), model_blessing=Channel(type=ModelBlessing)) # Uses TFMA to compute an evaluation statistics over features of a model and # perform quality validation of a candidate model (compared to a baseline). eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(label_key='variety')], slicing_specs=[tfma.SlicingSpec()], metrics_specs=[ tfma.MetricsSpec(metrics=[ tfma.MetricConfig( class_name='SparseCategoricalAccuracy', threshold=tfma.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': 0.6}), change_threshold=tfma.GenericChangeThreshold( direction=tfma.MetricDirection.HIGHER_IS_BETTER, absolute={'value': -1e-10}))) ]) ]) evaluator = Evaluator( examples=example_gen.outputs['examples'], model=trainer.outputs['model'], baseline_model=model_resolver.outputs['model'], # Change threshold will be ignored if there is no baseline (first run). eval_config=eval_config) # Performs infra validation of a candidate model to prevent unservable model # from being pushed. This config will launch a model server of the latest # TensorFlow Serving image in a local docker engine. infra_validator = InfraValidator( model=trainer.outputs['model'], examples=example_gen.outputs['examples'], serving_spec=infra_validator_pb2.ServingSpec( tensorflow_serving=infra_validator_pb2.TensorFlowServing( tags=['latest']), local_docker=infra_validator_pb2.LocalDockerConfig()), request_spec=infra_validator_pb2.RequestSpec( tensorflow_serving=infra_validator_pb2. TensorFlowServingRequestSpec())) # Checks whether the model passed the validation steps and pushes the model # to a file destination if check passed. pusher = Pusher(model=trainer.outputs['model'], model_blessing=evaluator.outputs['blessing'], infra_blessing=infra_validator.outputs['blessing'], push_destination=pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=serving_model_dir))) return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=[ example_gen, statistics_gen, schema_gen, example_validator, transform, trainer, model_resolver, evaluator, infra_validator, pusher, ], enable_cache=True, metadata_connection_config=metadata.sqlite_metadata_connection_config( metadata_path), # TODO(b/142684737): The multi-processing API might change. beam_pipeline_args=['--direct_num_workers=%d' % direct_num_workers], )
def _create_serving_spec(serving_spec_dict: Dict[Text, Any]): serving_spec = infra_validator_pb2.ServingSpec() json_format.ParseDict(serving_spec_dict, serving_spec) return serving_spec