def _init_model(self, multi_model, validation): # The benchmark runner will instantiate this class twice - once to determine # the benchmarks to run, and once to actually to run them. However, Keras # freezes if we try to load the same model twice. As such, we have to pull # the model loading out of the constructor into a separate method which we # call before each benchmark. if multi_model: metric_specs = metric_specs_util.specs_from_metrics( [tf.keras.metrics.AUC(name="auc", num_thresholds=10000)], model_names=["candidate", "baseline"]) if validation: # Only one metric, adding a threshold for all slices. metric_specs[0].metrics[0].threshold.CopyFrom( tfma.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={"value": 0.5}, upper_bound={"value": 0.5}), change_threshold=tfma.GenericChangeThreshold( absolute={"value": -0.001}, direction=tfma.MetricDirection.HIGHER_IS_BETTER))) self._eval_config = tfma.EvalConfig(model_specs=[ tfma.ModelSpec(name="candidate", label_key="tips"), tfma.ModelSpec(name="baseline", label_key="tips", is_baseline=True) ], metrics_specs=metric_specs) self._eval_shared_models = { "candidate": tfma.default_eval_shared_model( self._dataset.trained_saved_model_path(), eval_config=self._eval_config, model_name="candidate"), "baseline": tfma.default_eval_shared_model( self._dataset.trained_saved_model_path(), eval_config=self._eval_config, model_name="baseline") } else: metric_specs = metric_specs_util.specs_from_metrics( [tf.keras.metrics.AUC(name="auc", num_thresholds=10000)]) if validation: # Only one metric, adding a threshold for all slices. metric_specs[0].metrics[0].threshold.CopyFrom( tfma.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={"value": 0.5}, upper_bound={"value": 0.5}))) self._eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(label_key="tips")], metrics_specs=metric_specs) self._eval_shared_models = { "": tfma.default_eval_shared_model( self._dataset.trained_saved_model_path(), eval_config=self._eval_config) }
def build_config(self, use_defaults=False): # SLICING SPEC slicing_specs = [tfma.SlicingSpec()] if self.slices: slicing_specs.extend([tfma.SlicingSpec(feature_keys=e) for e in self.slices]) # MODEL SPEC metric_labels = sorted(list(set(self.metrics.keys()))) model_specs = [tfma.ModelSpec(signature_name='zen_eval', label_keys=self.output_mapping)] # METRIC SPEC baseline = [tfma.MetricConfig(class_name='ExampleCount')] metrics_specs = [] for i, key in enumerate(metric_labels): metrics = baseline.copy() metrics.extend([tfma.MetricConfig(class_name=to_camel_case(m)) for m in self.metrics[key]]) metrics_specs.append(tfma.MetricsSpec( output_names=[key], metrics=metrics)) return tfma.EvalConfig( model_specs=model_specs, slicing_specs=slicing_specs, metrics_specs=metrics_specs, options=tfma.Options( include_default_metrics=BoolValue(value=use_defaults)))
def build_config(self): # SLICING SPEC slicing_specs = [tfma.SlicingSpec()] if self.slices: slicing_specs.extend( [tfma.SlicingSpec(feature_keys=e) for e in self.slices]) # MODEL SPEC model_specs = [ tfma.ModelSpec(label_key=self.label_key, prediction_key=self.prediction_key) ] # METRIC SPEC baseline = [tfma.MetricConfig(class_name='ExampleCount')] for key in self.metrics: baseline.append(tfma.MetricConfig(class_name=to_camel_case(key))) metrics_specs = [tfma.MetricsSpec(metrics=baseline)] return tfma.EvalConfig( model_specs=model_specs, slicing_specs=slicing_specs, metrics_specs=metrics_specs, options=tfma.Options(include_default_metrics=BoolValue( value=False)))
def evaluate_model(classifier, validate_tf_file, tfma_eval_result_path, selected_slice, label, feature_map): """Evaluate Model using Tensorflow Model Analysis. Args: classifier: Trained classifier model to be evaluted. validate_tf_file: File containing validation TFRecordDataset. tfma_eval_result_path: Directory path where eval results will be written. selected_slice: Feature for slicing the data. label: Groundtruth label. feature_map: Dict of feature names to their data type. """ def eval_input_receiver_fn(): """Eval Input Receiver function.""" serialized_tf_example = tf.compat.v1.placeholder( dtype=tf.string, shape=[None], name='input_example_placeholder') receiver_tensors = {'examples': serialized_tf_example} features = tf.io.parse_example(serialized_tf_example, feature_map) features['weight'] = tf.ones_like(features[label]) return tfma.export.EvalInputReceiver( features=features, receiver_tensors=receiver_tensors, labels=features[label]) tfma_export_dir = tfma.export.export_eval_savedmodel( estimator=classifier, export_dir_base=os.path.join(tempfile.gettempdir(), 'tfma_eval_model'), eval_input_receiver_fn=eval_input_receiver_fn) # Define slices that you want the evaluation to run on. slice_spec = [ tfma.slicer.SingleSliceSpec(), # Overall slice tfma.slicer.SingleSliceSpec(columns=[selected_slice]), ] # Add the fairness metrics. add_metrics_callbacks = [ tfma.post_export_metrics.fairness_indicators( thresholds=[0.1, 0.3, 0.5, 0.7, 0.9], labels_key=label) ] eval_shared_model = tfma.default_eval_shared_model( eval_saved_model_path=tfma_export_dir, add_metrics_callbacks=add_metrics_callbacks) eval_config = tfma.EvalConfig( input_data_specs=[tfma.InputDataSpec(location=validate_tf_file)], model_specs=[tfma.ModelSpec(location=tfma_export_dir)], output_data_specs=[ tfma.OutputDataSpec(default_location=tfma_eval_result_path) ], slicing_specs=[s.to_proto() for s in slice_spec]) # Run the fairness evaluation. tfma.run_model_analysis( eval_config=eval_config, eval_shared_model=eval_shared_model)
def build(self, context: Context) -> BaseNode: from tfx.components import Evaluator import tensorflow_model_analysis as tfma threshold = { 'binary_accuracy': tfma.config.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': 0.6}), change_threshold=tfma.GenericChangeThreshold( direction=tfma.MetricDirection.HIGHER_IS_BETTER, absolute={'value': -1e-10})) } eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(signature_name='eval')], slicing_specs=[ tfma.SlicingSpec(), ], metrics_specs=[tfma.MetricsSpec(thresholds=threshold)]) component = Evaluator( examples=context.get(self._config.inputs.examples), model=context.get(self._config.inputs.model), baseline_model=context.get(self._config.inputs.baseline_model), eval_config=eval_config, instance_name=context.abs_current_url_friendly) put_outputs_to_context(context, self._config.outputs, component) return component
def testGetEvalResultsRoute(self): model_location = self._exportEvalSavedModel( linear_classifier.simple_linear_classifier) examples = [ self._makeExample(age=3.0, language="english", label=1.0), self._makeExample(age=3.0, language="chinese", label=0.0), self._makeExample(age=4.0, language="english", label=1.0), self._makeExample(age=5.0, language="chinese", label=1.0), self._makeExample(age=5.0, language="hindi", label=1.0) ] data_location = self._writeTFExamplesToTFRecords(examples) eval_config = tfma.EvalConfig( input_data_specs=[tfma.InputDataSpec(location=data_location)], model_specs=[tfma.ModelSpec(location=model_location)], output_data_specs=[ tfma.OutputDataSpec( default_location=self._eval_result_output_dir) ]) _ = tfma.run_model_analysis( eval_config=eval_config, eval_shared_model=tfma.default_eval_shared_model( eval_saved_model_path=model_location, example_weight_key="age")) response = self._server.get( "/data/plugin/fairness_indicators/get_evaluation_result?run=.") self.assertEqual(200, response.status_code)
def create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text, module_file: Text, serving_model_dir: Text, metadata_path: Text, direct_num_workers: int) -> pipeline.Pipeline: output = example_gen_pb2.Output(split_config=example_gen_pb2.SplitConfig( splits=[ example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=3), example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=1) ])) examples = tfrecord_input(data_root) example_gen = ImportExampleGen(input=examples, output_config=output) statistics_gen = StatisticsGen(examples=example_gen.outputs['examples']) infer_schema = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True) validate_stats = ExampleValidator( statistics=statistics_gen.outputs['statistics'], schema=infer_schema.outputs['schema']) transform = Transform(examples=example_gen.outputs['examples'], schema=infer_schema.outputs['schema'], module_file=module_file) trainer = Trainer(module_file=module_file, examples=transform.outputs['transformed_examples'], transform_graph=transform.outputs['transform_graph'], schema=infer_schema.outputs['schema'], train_args=trainer_pb2.TrainArgs(num_steps=100), eval_args=trainer_pb2.EvalArgs(num_steps=50)) eval_config = tfma.EvalConfig(slicing_specs=[tfma.SlicingSpec()]) model_analyzer = Evaluator(examples=example_gen.outputs['examples'], model=trainer.outputs['model'], eval_config=eval_config) model_validator = ModelValidator(examples=example_gen.outputs['examples'], model=trainer.outputs['model']) pusher = Pusher(model=trainer.outputs['model'], model_blessing=model_analyzer.outputs['blessing'], push_destination=pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=serving_model_dir))) return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=[ example_gen, statistics_gen, infer_schema, validate_stats, transform, trainer, model_analyzer, model_validator, pusher, ], enable_cache=True, metadata_connection_config=metadata.sqlite_metadata_connection_config( metadata_path), beam_pipeline_args=['--direct_num_workers=%d' % direct_num_workers])
def testConstructWithEvalConfig(self): examples = standard_artifacts.Examples() model_exports = standard_artifacts.Model() evaluator = component.Evaluator( examples=channel_utils.as_channel([examples]), model_exports=channel_utils.as_channel([model_exports]), eval_config=tfma.EvalConfig( slicing_specs=[tfma.SlicingSpec(feature_keys=['trip_start_hour'])])) self.assertEqual(standard_artifacts.ModelEvaluation.TYPE_NAME, evaluator.outputs['output'].type_name)
def _init_model(self, multi_model): # The benchmark runner will instantiate this class twice - once to determine # the benchmarks to run, and once to actually to run them. However, Keras # freezes if we try to load the same model twice. As such, we have to pull # the model loading out of the constructor into a separate method which we # call before each benchmark. if multi_model: self._eval_config = tfma.EvalConfig( model_specs=[ tfma.ModelSpec(name="candidate", label_key="tips"), tfma.ModelSpec(name="baseline", label_key="tips", is_baseline=True) ], metrics_specs=metric_specs.specs_from_metrics( [ tf.keras.metrics.AUC(name="auc", num_thresholds=10000), ], model_names=["candidate", "baseline"])) self._eval_shared_models = { "candidate": tfma.default_eval_shared_model( self._dataset.trained_saved_model_path(), eval_config=self._eval_config, model_name="candidate"), "baseline": tfma.default_eval_shared_model( self._dataset.trained_saved_model_path(), eval_config=self._eval_config, model_name="baseline") } else: self._eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(label_key="tips")], metrics_specs=metric_specs.specs_from_metrics([ tf.keras.metrics.AUC(name="auc", num_thresholds=10000), ])) self._eval_shared_models = { "": tfma.default_eval_shared_model( self._dataset.trained_saved_model_path(), eval_config=self._eval_config) }
def _init_model(self): # The benchmark runner will instantiate this class twice - once to determine # the benchmarks to run, and once to actually to run them. However, Keras # freezes if we try to load the same model twice. As such, we have to pull # the model loading out of the constructor into a separate method which we # call before each benchmark. self._eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(label_key="tips")], metrics_specs=metric_specs.example_count_specs()) self._eval_shared_model = tfma.default_eval_shared_model( self._dataset.trained_saved_model_path(), eval_config=self._eval_config)
def __init__(self, dataset, **kwargs): # Benchmark runners may pass extraneous arguments we don't care about. del kwargs super(TFMAV2BenchmarkBase, self).__init__() self._dataset = dataset self._eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(label_key="tips")], metrics_specs=metric_specs.example_count_specs()) self._eval_shared_model = tfma.default_eval_shared_model( self._dataset.trained_saved_model_path(), eval_config=self._eval_config)
def testMakeSklearnPredictExtractorWithMultiModels(self): """Tests that predictions are made from extracts for multiple models.""" eval_config = tfma.EvalConfig(model_specs=[ tfma.ModelSpec(name='model1'), tfma.ModelSpec(name='model2'), ]) eval_export_dir_1 = os.path.join(self._eval_export_dir, '1') self._create_sklearn_model(eval_export_dir_1) eval_shared_model_1 = sklearn_predict_extractor.custom_eval_shared_model( eval_saved_model_path=eval_export_dir_1, model_name='model1', eval_config=eval_config) eval_export_dir_2 = os.path.join(self._eval_export_dir, '2') self._create_sklearn_model(eval_export_dir_2) eval_shared_model_2 = sklearn_predict_extractor.custom_eval_shared_model( eval_saved_model_path=eval_export_dir_2, model_name='model2', eval_config=eval_config) feature_extractor = tfma.extractors.FeaturesExtractor( self._eval_config) prediction_extractor = ( sklearn_predict_extractor._make_sklearn_predict_extractor( eval_shared_model={ 'model1': eval_shared_model_1, 'model2': eval_shared_model_2, })) with beam.Pipeline() as pipeline: predict_extracts = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in self._examples]) | 'BatchExamples' >> self._tfx_io.BeamSource() | 'InputsToExtracts' >> tfma.BatchedInputsToExtracts() # pylint: disable=no-value-for-parameter | feature_extractor.stage_name >> feature_extractor.ptransform | prediction_extractor.stage_name >> prediction_extractor.ptransform) def check_result(actual): try: for item in actual: self.assertEqual(item['labels'].shape, item['predictions'].shape) self.assertIn('model1', item['predictions'][0]) self.assertIn('model2', item['predictions'][0]) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(predict_extracts, check_result)
def get_eval_results(model_location, base_dir, eval_subdir, validate_tfrecord_file, slice_selection='religion', compute_confidence_intervals=True): """Get Fairness Indicators eval results.""" tfma_eval_result_path = os.path.join(base_dir, 'tfma_eval_result') # Define slices that you want the evaluation to run on. eval_config = text_format.Parse( """ model_specs { label_key: '%s' } metrics_specs { metrics {class_name: "AUC"} metrics {class_name: "ExampleCount"} metrics {class_name: "Accuracy"} metrics { class_name: "FairnessIndicators" config: '{"thresholds": [0.4, 0.4125, 0.425, 0.4375, 0.45, 0.4675, 0.475, 0.4875, 0.5]}' } } slicing_specs { feature_keys: '%s' } slicing_specs {} options { compute_confidence_intervals { value: %s } disabled_outputs{values: "analysis"} } """ % (LABEL, slice_selection, 'true' if compute_confidence_intervals else 'false'), tfma.EvalConfig()) base_dir = tempfile.mkdtemp(prefix='saved_eval_results') tfma_eval_result_path = os.path.join(base_dir, eval_subdir) eval_shared_model = tfma.default_eval_shared_model( eval_saved_model_path=model_location, tags=[tf.saved_model.SERVING]) # Run the fairness evaluation. return tfma.run_model_analysis( eval_shared_model=eval_shared_model, data_location=validate_tfrecord_file, file_format='tfrecords', eval_config=eval_config, output_path=tfma_eval_result_path, extractors=None)
def _get_eval_config() -> tfma.EvalConfig: return tfma.EvalConfig( model_specs=[tfma.ModelSpec(label_key=LABEL_KEY)], slicing_specs=[tfma.SlicingSpec()], metrics_specs=[ tfma.MetricsSpec(metrics=[ tfma.MetricConfig( class_name='BinaryAccuracy', threshold=tfma.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': 0.01}), change_threshold=tfma.GenericChangeThreshold( direction=tfma.MetricDirection.HIGHER_IS_BETTER, absolute={'value': 1e-10}))) ]) ])
def test_get_eval_results_called_correclty(self, mock_run_model_analysis, mock_shared_model): mock_model = 'model' mock_shared_model.return_value = mock_model model_location = 'saved_model' eval_results_path = 'eval_results' data_file = 'data' util.get_eval_results(model_location, eval_results_path, data_file) mock_shared_model.assert_called_once_with( eval_saved_model_path=model_location, tags=[tf.saved_model.SERVING]) expected_eval_config = text_format.Parse( """ model_specs { label_key: 'toxicity' } metrics_specs { metrics {class_name: "AUC"} metrics {class_name: "ExampleCount"} metrics {class_name: "Accuracy"} metrics { class_name: "FairnessIndicators" config: '{"thresholds": [0.4, 0.4125, 0.425, 0.4375, 0.45, 0.4675, 0.475, 0.4875, 0.5]}' } } slicing_specs { feature_keys: 'religion' } slicing_specs {} options { compute_confidence_intervals { value: true } disabled_outputs{values: "analysis"} } """, tfma.EvalConfig()) mock_run_model_analysis.assert_called_once_with( eval_shared_model=mock_model, data_location=data_file, file_format='tfrecords', eval_config=expected_eval_config, output_path=eval_results_path, extractors=None)
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text, module_file: Text, serving_model_dir: Text, direct_num_workers: int) -> pipeline.Pipeline: examples = external_input(data_root) example_gen = CsvExampleGen(input=examples) statistics_gen = StatisticsGen(examples=example_gen.outputs['examples']) infer_schema = SchemaGen(statistics=statistics_gen.outputs['statistics']) validate_stats = ExampleValidator( statistics=statistics_gen.outputs.output, schema=infer_schema.outputs.output) transform = Transform( examples=example_gen.outputs['examples'], schema=infer_schema.outputs['schema'], module_file=module_file) trainer = Trainer( module_file=module_file, examples=transform.outputs.transformed_examples, schema=infer_schema.outputs.output, transform_graph=transform.outputs['transform_graph'], train_args=trainer_pb2.TrainArgs(num_steps=10000), eval_args=trainer_pb2.EvalArgs(num_steps=5000)) eval_config = tfma.EvalConfig( slicing_specs=[tfma.SlicingSpec()] ) model_analyzer = Evaluator( examples=example_gen.outputs['examples'], model=trainer.outputs['model'], eval_config=eval_config) model_validator = ModelValidator( examples=example_gen.outputs.examples, model=trainer.outputs.output) pusher = Pusher( model=trainer.outputs.output, model_blessing=model_validator.outputs.blessing, push_destination=pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=serving_model_dir))) return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=[example_gen, statistics_gen, infer_schema, validate_stats, transform, trainer, model_analyzer, model_validator, pusher], beam_pipeline_args=['--direct_num_workers=%d' % direct_num_workers] )
def setUp(self): super().setUp() self._eval_export_dir = os.path.join(self._getTempDir(), 'eval_export') self._create_sklearn_model(self._eval_export_dir) self._eval_config = tfma.EvalConfig(model_specs=[tfma.ModelSpec()]) self._eval_shared_model = ( sklearn_predict_extractor.custom_eval_shared_model( eval_saved_model_path=self._eval_export_dir, model_name=None, eval_config=self._eval_config)) self._schema = text_format.Parse( """ feature { name: "age" type: FLOAT } feature { name: "language" type: FLOAT } feature { name: "label" type: INT } """, schema_pb2.Schema()) self._tfx_io = test_util.InMemoryTFExampleRecord( schema=self._schema, raw_record_column_name=tfma.ARROW_INPUT_COLUMN) self._tensor_adapter_config = tensor_adapter.TensorAdapterConfig( arrow_schema=self._tfx_io.ArrowSchema(), tensor_representations=self._tfx_io.TensorRepresentations()) self._examples = [ self._makeExample(age=3.0, language=1.0, label=1), self._makeExample(age=3.0, language=0.0, label=0), self._makeExample(age=4.0, language=1.0, label=1), self._makeExample(age=5.0, language=0.0, label=0), ]
def get_accuracy_eval_config(accuracy_threshold): accuracy_threshold = tfma.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': accuracy_threshold}, upper_bound={'value': 0.99}), change_threshold=tfma.GenericChangeThreshold( absolute={'value': 0.0001}, direction=tfma.MetricDirection.HIGHER_IS_BETTER)) metrics_specs = tfma.MetricsSpec(metrics=[ tfma.MetricConfig(class_name='BinaryAccuracy', threshold=accuracy_threshold), tfma.MetricConfig(class_name='ExampleCount') ]) eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(label_key='income_bracket')], metrics_specs=[metrics_specs], slicing_specs=[ tfma.SlicingSpec(), tfma.SlicingSpec(feature_keys=['occupation']) ]) return eval_config
def get_eval_config(): model_specs = [ tfma.ModelSpec(signature_name='serving_default', label_key='income_bracket', example_weight_key='fnlwgt') ] metrics_specs = [ tfma.MetricsSpec(metrics=[ tfma.MetricConfig(class_name='BinaryAccuracy'), tfma.MetricConfig(class_name='ExampleCount') ]) ] slicing_specs = [ tfma.SlicingSpec(), tfma.SlicingSpec(feature_keys=['occupation']) ] eval_config = tfma.EvalConfig(model_specs=model_specs, metrics_specs=metrics_specs, slicing_specs=slicing_specs) return eval_config
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text, module_file: Text, serving_model_dir: Text, metadata_path: Text, beam_pipeline_args: List[Text]) -> pipeline.Pipeline: """Implements the chicago taxi pipeline with TFX.""" #examples = external_input(data_root) # Brings data into the pipeline or otherwise joins/converts training data. #example_gen = CsvExampleGen(input=examples) example_gen = CsvExampleGen(input_base=data_root) # Computes statistics over data for visualization and example validation. statistics_gen = StatisticsGen(examples=example_gen.outputs['examples']) # Generates schema based on statistics files. schema_gen = SchemaGen( statistics=statistics_gen.outputs['statistics'], infer_feature_shape=False) # Performs anomaly detection based on statistics and data schema. example_validator = ExampleValidator( statistics=statistics_gen.outputs['statistics'], schema=schema_gen.outputs['schema']) # Performs transformations and feature engineering in training and serving. transform = Transform( examples=example_gen.outputs['examples'], schema=schema_gen.outputs['schema'], module_file=module_file) # Uses user-provided Python function that implements a model using TF-Learn. trainer = Trainer( module_file=module_file, transformed_examples=transform.outputs['transformed_examples'], schema=schema_gen.outputs['schema'], transform_graph=transform.outputs['transform_graph'], train_args=trainer_pb2.TrainArgs(num_steps=10000), eval_args=trainer_pb2.EvalArgs(num_steps=5000)) # Get the latest blessed model for model validation. model_resolver = ResolverNode( instance_name='latest_blessed_model_resolver', resolver_class=latest_blessed_model_resolver.LatestBlessedModelResolver, model=Channel(type=Model), model_blessing=Channel(type=ModelBlessing)) # Uses TFMA to compute a evaluation statistics over features of a model and # perform quality validation of a candidate model (compared to a baseline). eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(signature_name='eval')], slicing_specs=[ tfma.SlicingSpec(), tfma.SlicingSpec(feature_keys=['trip_start_hour']) ], metrics_specs=[ tfma.MetricsSpec( thresholds={ 'accuracy': tfma.config.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': 0.6}), change_threshold=tfma.GenericChangeThreshold( direction=tfma.MetricDirection.HIGHER_IS_BETTER, absolute={'value': -1e-10})) }) ]) evaluator = Evaluator( examples=example_gen.outputs['examples'], model=trainer.outputs['model'], baseline_model=model_resolver.outputs['model'], # Change threshold will be ignored if there is no baseline (first run). eval_config=eval_config) # Checks whether the model passed the validation steps and pushes the model # to a file destination if check passed. pusher = Pusher( model=trainer.outputs['model'], model_blessing=evaluator.outputs['blessing'], push_destination=pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=serving_model_dir))) return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=[ example_gen, statistics_gen, schema_gen, example_validator, transform, trainer, model_resolver, evaluator, pusher ], enable_cache=True, metadata_connection_config=metadata.sqlite_metadata_connection_config( metadata_path), beam_pipeline_args=beam_pipeline_args)
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text, module_file: Text, module_file_lite: Text, serving_model_dir: Text, serving_model_dir_lite: Text, metadata_path: Text, beam_pipeline_args: List[Text]) -> pipeline.Pipeline: """Implements the handwritten digit classification example using TFX.""" # Brings data into the pipeline. example_gen = ImportExampleGen(input_base=data_root) # Computes statistics over data for visualization and example validation. statistics_gen = StatisticsGen(examples=example_gen.outputs['examples']) # Generates schema based on statistics files. schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True) # Performs anomaly detection based on statistics and data schema. example_validator = ExampleValidator( statistics=statistics_gen.outputs['statistics'], schema=schema_gen.outputs['schema']) # Performs transformations and feature engineering in training and serving. transform = Transform(examples=example_gen.outputs['examples'], schema=schema_gen.outputs['schema'], module_file=module_file) def _create_trainer(module_file, instance_name): return Trainer(module_file=module_file, custom_executor_spec=executor_spec.ExecutorClassSpec( GenericExecutor), examples=transform.outputs['transformed_examples'], transform_graph=transform.outputs['transform_graph'], schema=schema_gen.outputs['schema'], train_args=trainer_pb2.TrainArgs(num_steps=5000), eval_args=trainer_pb2.EvalArgs(num_steps=100), instance_name=instance_name) # Uses user-provided Python function that trains a Keras model. trainer = _create_trainer(module_file, 'mnist') # Trains the same model as the one above, but converts it into a TFLite one. trainer_lite = _create_trainer(module_file_lite, 'mnist_lite') # TODO(b/150949276): Add resolver back once it supports two trainers. # Uses TFMA to compute an evaluation statistics over features of a model and # performs quality validation of a candidate model. eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(label_key='image_class')], slicing_specs=[tfma.SlicingSpec()], metrics_specs=[ tfma.MetricsSpec(metrics=[ tfma.MetricConfig( class_name='SparseCategoricalAccuracy', threshold=tfma.config.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': 0.8}))) ]) ]) eval_config_lite = tfma.EvalConfig() eval_config_lite.CopyFrom(eval_config) # Informs the evaluator that the model is a TFLite model. eval_config_lite.model_specs[0].model_type = 'tf_lite' # Uses TFMA to compute the evaluation statistics over features of a model. evaluator = Evaluator(examples=example_gen.outputs['examples'], model=trainer.outputs['model'], eval_config=eval_config, instance_name='mnist') # Uses TFMA to compute the evaluation statistics over features of a TFLite # model. evaluator_lite = Evaluator(examples=example_gen.outputs['examples'], model=trainer_lite.outputs['model'], eval_config=eval_config_lite, instance_name='mnist_lite') # Checks whether the model passed the validation steps and pushes the model # to a file destination if check passed. pusher = Pusher(model=trainer.outputs['model'], model_blessing=evaluator.outputs['blessing'], push_destination=pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=serving_model_dir)), instance_name='mnist') # Checks whether the TFLite model passed the validation steps and pushes the # model to a file destination if check passed. pusher_lite = Pusher(model=trainer_lite.outputs['model'], model_blessing=evaluator_lite.outputs['blessing'], push_destination=pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=serving_model_dir_lite)), instance_name='mnist_lite') return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=[ example_gen, statistics_gen, schema_gen, example_validator, transform, trainer, trainer_lite, evaluator, evaluator_lite, pusher, pusher_lite, ], enable_cache=True, metadata_connection_config=metadata.sqlite_metadata_connection_config( metadata_path), beam_pipeline_args=beam_pipeline_args)
class ExecutorTest(tf.test.TestCase, parameterized.TestCase): @parameterized.named_parameters(('evaluation_w_eval_config', { 'eval_config': proto_utils.proto_to_json( tfma.EvalConfig(slicing_specs=[ tfma.SlicingSpec(feature_keys=['trip_start_hour']), tfma.SlicingSpec( feature_keys=['trip_start_day', 'trip_miles']), ])) }), ('evaluation_w_module_file', { 'eval_config': proto_utils.proto_to_json( tfma.EvalConfig(slicing_specs=[ tfma.SlicingSpec(feature_keys=['trip_start_hour']), tfma.SlicingSpec( feature_keys=['trip_start_day', 'trip_miles']), ])), 'module_file': None }), ('evaluation_w_module_path', { 'eval_config': proto_utils.proto_to_json( tfma.EvalConfig(slicing_specs=[ tfma.SlicingSpec(feature_keys=['trip_start_hour']), tfma.SlicingSpec( feature_keys=['trip_start_day', 'trip_miles']), ])), 'module_path': evaluator_module.__name__, })) def testEvalution(self, exec_properties): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. examples = standard_artifacts.Examples() examples.uri = os.path.join(source_data_dir, 'csv_example_gen') examples.split_names = artifact_utils.encode_split_names(['train', 'eval']) model = standard_artifacts.Model() baseline_model = standard_artifacts.Model() model.uri = os.path.join(source_data_dir, 'trainer/current') baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/') schema = standard_artifacts.Schema() schema.uri = os.path.join(source_data_dir, 'schema_gen') input_dict = { constants.EXAMPLES_KEY: [examples], constants.MODEL_KEY: [model], constants.SCHEMA_KEY: [schema], } # Create output dict. eval_output = standard_artifacts.ModelEvaluation() eval_output.uri = os.path.join(output_data_dir, 'eval_output') blessing_output = standard_artifacts.ModelBlessing() blessing_output.uri = os.path.join(output_data_dir, 'blessing_output') output_dict = { constants.EVALUATION_KEY: [eval_output], constants.BLESSING_KEY: [blessing_output], } # Test multiple splits. exec_properties[constants.EXAMPLE_SPLITS_KEY] = json_utils.dumps( ['train', 'eval']) if 'module_file' in exec_properties: exec_properties['module_file'] = os.path.join(source_data_dir, 'module_file', 'evaluator_module.py') # Run executor. evaluator = executor.Executor() evaluator.Do(input_dict, output_dict, exec_properties) # Check evaluator outputs. self.assertTrue( fileio.exists(os.path.join(eval_output.uri, 'eval_config.json'))) self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'metrics'))) self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'plots'))) self.assertFalse( fileio.exists(os.path.join(blessing_output.uri, 'BLESSED'))) @parameterized.named_parameters(('legacy_feature_slicing', { 'feature_slicing_spec': proto_utils.proto_to_json( evaluator_pb2.FeatureSlicingSpec(specs=[ evaluator_pb2.SingleSlicingSpec( column_for_slicing=['trip_start_hour']), evaluator_pb2.SingleSlicingSpec( column_for_slicing=['trip_start_day', 'trip_miles']), ])), })) def testDoLegacySingleEvalSavedModelWFairness(self, exec_properties): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. examples = standard_artifacts.Examples() examples.uri = os.path.join(source_data_dir, 'csv_example_gen') examples.split_names = artifact_utils.encode_split_names(['train', 'eval']) model = standard_artifacts.Model() model.uri = os.path.join(source_data_dir, 'trainer/current') input_dict = { constants.EXAMPLES_KEY: [examples], constants.MODEL_KEY: [model], } # Create output dict. eval_output = standard_artifacts.ModelEvaluation() eval_output.uri = os.path.join(output_data_dir, 'eval_output') blessing_output = standard_artifacts.ModelBlessing() blessing_output.uri = os.path.join(output_data_dir, 'blessing_output') output_dict = { constants.EVALUATION_KEY: [eval_output], constants.BLESSING_KEY: [blessing_output], } try: # Need to import the following module so that the fairness indicator # post-export metric is registered. This may raise an ImportError if the # currently-installed version of TFMA does not support fairness # indicators. import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators # pylint: disable=g-import-not-at-top, unused-variable exec_properties['fairness_indicator_thresholds'] = [ 0.1, 0.3, 0.5, 0.7, 0.9 ] except ImportError: logging.warning( 'Not testing fairness indicators because a compatible TFMA version ' 'is not installed.') # List needs to be serialized before being passed into Do function. exec_properties[constants.EXAMPLE_SPLITS_KEY] = json_utils.dumps(None) # Run executor. evaluator = executor.Executor() evaluator.Do(input_dict, output_dict, exec_properties) # Check evaluator outputs. self.assertTrue( fileio.exists(os.path.join(eval_output.uri, 'eval_config.json'))) self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'metrics'))) self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'plots'))) self.assertFalse( fileio.exists(os.path.join(blessing_output.uri, 'BLESSED'))) @parameterized.named_parameters( ( 'eval_config_w_validation', { 'eval_config': proto_utils.proto_to_json( tfma.EvalConfig( model_specs=[ tfma.ModelSpec(label_key='tips'), ], metrics_specs=[ tfma.MetricsSpec(metrics=[ tfma.config.MetricConfig( class_name='ExampleCount', # Count > 0, OK. threshold=tfma.config.MetricThreshold( value_threshold=tfma .GenericValueThreshold( lower_bound={'value': 0}))), ]), ], slicing_specs=[tfma.SlicingSpec()])) }, True, True), ( 'eval_config_w_validation_fail', { 'eval_config': proto_utils.proto_to_json( tfma.EvalConfig( model_specs=[ tfma.ModelSpec( name='baseline1', label_key='tips', is_baseline=True), tfma.ModelSpec( name='candidate1', label_key='tips'), ], metrics_specs=[ tfma.MetricsSpec(metrics=[ tfma.config.MetricConfig( class_name='ExampleCount', # Count < -1, NOT OK. threshold=tfma.config.MetricThreshold( value_threshold=tfma .GenericValueThreshold( upper_bound={'value': -1}))), ]), ], slicing_specs=[tfma.SlicingSpec()])) }, False, True), ( 'no_baseline_model_ignore_change_threshold_validation_pass', { 'eval_config': proto_utils.proto_to_json( tfma.EvalConfig( model_specs=[ tfma.ModelSpec( name='baseline', label_key='tips', is_baseline=True), tfma.ModelSpec( name='candidate', label_key='tips'), ], metrics_specs=[ tfma.MetricsSpec(metrics=[ tfma.config.MetricConfig( class_name='ExampleCount', # Count > 0, OK. threshold=tfma.config.MetricThreshold( value_threshold=tfma .GenericValueThreshold( lower_bound={'value': 0}))), tfma.config.MetricConfig( class_name='Accuracy', # Should be ignored due to no baseline. threshold=tfma.config.MetricThreshold( change_threshold=tfma .GenericChangeThreshold( relative={'value': 0}, direction=tfma.MetricDirection .LOWER_IS_BETTER))), ]), ], slicing_specs=[tfma.SlicingSpec()])) }, True, False)) def testDoValidation(self, exec_properties, blessed, has_baseline): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. examples = standard_artifacts.Examples() examples.uri = os.path.join(source_data_dir, 'csv_example_gen') examples.split_names = artifact_utils.encode_split_names(['train', 'eval']) model = standard_artifacts.Model() baseline_model = standard_artifacts.Model() model.uri = os.path.join(source_data_dir, 'trainer/current') baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/') blessing_output = standard_artifacts.ModelBlessing() blessing_output.uri = os.path.join(output_data_dir, 'blessing_output') schema = standard_artifacts.Schema() schema.uri = os.path.join(source_data_dir, 'schema_gen') input_dict = { constants.EXAMPLES_KEY: [examples], constants.MODEL_KEY: [model], constants.SCHEMA_KEY: [schema], } if has_baseline: input_dict[constants.BASELINE_MODEL_KEY] = [baseline_model] # Create output dict. eval_output = standard_artifacts.ModelEvaluation() eval_output.uri = os.path.join(output_data_dir, 'eval_output') blessing_output = standard_artifacts.ModelBlessing() blessing_output.uri = os.path.join(output_data_dir, 'blessing_output') output_dict = { constants.EVALUATION_KEY: [eval_output], constants.BLESSING_KEY: [blessing_output], } # List needs to be serialized before being passed into Do function. exec_properties[constants.EXAMPLE_SPLITS_KEY] = json_utils.dumps(None) # Run executor. evaluator = executor.Executor() evaluator.Do(input_dict, output_dict, exec_properties) # Check evaluator outputs. self.assertTrue( fileio.exists(os.path.join(eval_output.uri, 'eval_config.json'))) self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'metrics'))) self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'plots'))) self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'validations'))) if blessed: self.assertTrue( fileio.exists(os.path.join(blessing_output.uri, 'BLESSED'))) else: self.assertTrue( fileio.exists(os.path.join(blessing_output.uri, 'NOT_BLESSED')))
def create_pipeline( pipeline_name: Text, pipeline_root: Text, data_path: Text, # TODO(step 7): (Optional) Uncomment here to use BigQuery as a data source. # query: Text, preprocessing_fn: Text, run_fn: Text, train_args: trainer_pb2.TrainArgs, eval_args: trainer_pb2.EvalArgs, eval_accuracy_threshold: float, serving_model_dir: Text, metadata_connection_config: Optional[ metadata_store_pb2.ConnectionConfig] = None, beam_pipeline_args: Optional[List[Text]] = None, ai_platform_training_args: Optional[Dict[Text, Text]] = None, ai_platform_serving_args: Optional[Dict[Text, Any]] = None, ) -> pipeline.Pipeline: """Implements the chicago taxi pipeline with TFX.""" components = [] # Brings data into the pipeline or otherwise joins/converts training data. example_gen = CsvExampleGen(input=external_input(data_path)) # TODO(step 7): (Optional) Uncomment here to use BigQuery as a data source. # example_gen = BigQueryExampleGen(query=query) components.append(example_gen) # Computes statistics over data for visualization and example validation. statistics_gen = StatisticsGen(examples=example_gen.outputs['examples']) # TODO(step 5): Uncomment here to add StatisticsGen to the pipeline. # components.append(statistics_gen) # Generates schema based on statistics files. schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=False) # TODO(step 5): Uncomment here to add SchemaGen to the pipeline. # components.append(schema_gen) # Performs anomaly detection based on statistics and data schema. example_validator = ExampleValidator( # pylint: disable=unused-variable statistics=statistics_gen.outputs['statistics'], schema=schema_gen.outputs['schema']) # TODO(step 5): Uncomment here to add ExampleValidator to the pipeline. # components.append(example_validator) # Performs transformations and feature engineering in training and serving. transform = Transform(examples=example_gen.outputs['examples'], schema=schema_gen.outputs['schema'], preprocessing_fn=preprocessing_fn) # TODO(step 6): Uncomment here to add Transform to the pipeline. # components.append(transform) # Uses user-provided Python function that implements a model using TF-Learn. trainer_args = { 'run_fn': run_fn, 'transformed_examples': transform.outputs['transformed_examples'], 'schema': schema_gen.outputs['schema'], 'transform_graph': transform.outputs['transform_graph'], 'train_args': train_args, 'eval_args': eval_args, 'custom_executor_spec': executor_spec.ExecutorClassSpec(trainer_executor.GenericExecutor), } if ai_platform_training_args is not None: trainer_args.update({ 'custom_executor_spec': executor_spec.ExecutorClassSpec( ai_platform_trainer_executor.GenericExecutor), 'custom_config': { ai_platform_trainer_executor.TRAINING_ARGS_KEY: ai_platform_training_args, } }) trainer = Trainer(**trainer_args) # TODO(step 6): Uncomment here to add Trainer to the pipeline. # components.append(trainer) # Get the latest blessed model for model validation. model_resolver = ResolverNode( instance_name='latest_blessed_model_resolver', resolver_class=latest_blessed_model_resolver. LatestBlessedModelResolver, model=Channel(type=Model), model_blessing=Channel(type=ModelBlessing)) # TODO(step 6): Uncomment here to add ResolverNode to the pipeline. # components.append(model_resolver) # Uses TFMA to compute a evaluation statistics over features of a model and # perform quality validation of a candidate model (compared to a baseline). eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(label_key='tips')], slicing_specs=[tfma.SlicingSpec()], metrics_specs=[ tfma.MetricsSpec(metrics=[ tfma.MetricConfig( class_name='BinaryAccuracy', threshold=tfma.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': eval_accuracy_threshold}), change_threshold=tfma.GenericChangeThreshold( direction=tfma.MetricDirection.HIGHER_IS_BETTER, absolute={'value': -1e-10}))) ]) ]) evaluator = Evaluator( examples=example_gen.outputs['examples'], model=trainer.outputs['model'], baseline_model=model_resolver.outputs['model'], # Change threshold will be ignored if there is no baseline (first run). eval_config=eval_config) # TODO(step 6): Uncomment here to add Evaluator to the pipeline. # components.append(evaluator) # Checks whether the model passed the validation steps and pushes the model # to a file destination if check passed. pusher_args = { 'model': trainer.outputs['model'], 'model_blessing': evaluator.outputs['blessing'], 'push_destination': pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=serving_model_dir)), } if ai_platform_serving_args is not None: pusher_args.update({ 'custom_executor_spec': executor_spec.ExecutorClassSpec( ai_platform_pusher_executor.Executor), 'custom_config': { ai_platform_pusher_executor.SERVING_ARGS_KEY: ai_platform_serving_args }, }) pusher = Pusher(**pusher_args) # pylint: disable=unused-variable # TODO(step 6): Uncomment here to add Pusher to the pipeline. # components.append(pusher) return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=components, # TODO(step 8): Change this value to control caching of execution results. enable_cache=True, metadata_connection_config=metadata_connection_config, beam_pipeline_args=beam_pipeline_args, )
def generate_pipeline(pipeline_name, pipeline_root, train_data, test_data, train_steps, eval_steps, pusher_target, runner): module_file = 'util.py' # util.py is a file in the same folder # RuntimeParameter is only supported on KubeflowDagRunner currently if runner == 'kubeflow': pipeline_root_param = os.path.join('gs://{{kfp-default-bucket}}', pipeline_name, '{{workflow.uid}}') train_data_param = data_types.RuntimeParameter( name='train-data', default= 'gs://renming-mlpipeline-kubeflowpipelines-default/kaggle/santander/train', ptype=Text) test_data_param = data_types.RuntimeParameter( name='test-data', default= 'gs://renming-mlpipeline-kubeflowpipelines-default/kaggle/santander/test', ptype=Text) pusher_target_param = data_types.RuntimeParameter( name='pusher-destination', default= 'gs://renming-mlpipeline-kubeflowpipelines-default/kaggle/santander/serving', ptype=Text) else: pipeline_root_param = pipeline_root train_data_param = train_data test_data_param = test_data pusher_target_param = pusher_target examples = external_input(train_data_param) example_gen = CsvExampleGen(input=examples, instance_name="train") test_examples = external_input(test_data_param) test_example_gen = CsvExampleGen(input=test_examples, output_config={ 'split_config': { 'splits': [{ 'name': 'test', 'hash_buckets': 1 }] } }, instance_name="test") statistics_gen = StatisticsGen(examples=example_gen.outputs['examples']) schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True ) # infer_feature_shape controls sparse or dense # Transform is too slow in my side. transform = Transform(examples=example_gen.outputs['examples'], schema=schema_gen.outputs['schema'], module_file=module_file) trainer = Trainer( custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor), examples=transform.outputs['transformed_examples'], transform_graph=transform.outputs['transform_graph'], schema=schema_gen.outputs['schema'], module_file=module_file, train_args=trainer_pb2.TrainArgs(num_steps=train_steps), eval_args=trainer_pb2.EvalArgs(num_steps=eval_steps), instance_name="train", enable_cache=False) # Get the latest blessed model for model validation. model_resolver = ResolverNode( instance_name='latest_blessed_model_resolver', resolver_class=latest_blessed_model_resolver. LatestBlessedModelResolver, model=Channel(type=Model), model_blessing=Channel(type=ModelBlessing)) # Uses TFMA to compute a evaluation statistics over features of a model and # perform quality validation of a candidate model (compared to a baseline). eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(label_key='target')], # tfma.SlicingSpec(feature_keys=['var_0', 'var_1']) when add more, Evaluator can't ouptput BLESSED status. It should be a bug in TFMA. slicing_specs=[tfma.SlicingSpec()], metrics_specs=[ tfma.MetricsSpec( thresholds={ 'binary_accuracy': tfma.config.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': 0.4}), change_threshold=tfma.GenericChangeThreshold( direction=tfma.MetricDirection.HIGHER_IS_BETTER, absolute={'value': -1e-10})) }) ]) evaluator = Evaluator( examples=example_gen.outputs['examples'], model=trainer.outputs['model'], # baseline_model=model_resolver.outputs['model'], # Change threshold will be ignored if there is no baseline (first run). eval_config=eval_config, instance_name="eval5") # Checks whether the model passed the validation steps and pushes the model # to a file destination if check passed. pusher = Pusher(model=trainer.outputs['model'], model_blessing=evaluator.outputs['blessing'], push_destination={ 'filesystem': { 'base_directory': pusher_target_param } }) bulk_inferrer = BulkInferrer( examples=test_example_gen.outputs['examples'], model=trainer.outputs['model'], # model_blessing=evaluator.outputs['blessing'], data_spec=bulk_inferrer_pb2.DataSpec(), model_spec=bulk_inferrer_pb2.ModelSpec(), instance_name="bulkInferrer") hello = component.HelloComponent( input_data=bulk_inferrer.outputs['inference_result'], instance_name='csvGen') return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root_param, components=[ example_gen, statistics_gen, schema_gen, transform, trainer, model_resolver, evaluator, pusher, hello, test_example_gen, bulk_inferrer ], enable_cache=True, metadata_connection_config=metadata.sqlite_metadata_connection_config( os.path.join(pipeline_root, 'metadata.sqlite')), beam_pipeline_args=['--direct_num_workers=0'])
class ExecutorTest(tf.test.TestCase, absl.testing.parameterized.TestCase): # TODO(jinhuang): add test for eval_saved_model when supported. @absl.testing.parameterized.named_parameters(('eval_config', { 'eval_config': json_format.MessageToJson(tfma.EvalConfig(slicing_specs=[ tfma.SlicingSpec(feature_keys=['trip_start_hour']), tfma.SlicingSpec(feature_keys=['trip_start_day', 'trip_miles']), ]), preserving_proto_field_name=True) }), ('eval_config_w_baseline', { 'eval_config': json_format.MessageToJson(tfma.EvalConfig( model_specs=[ tfma.ModelSpec(name='baseline', is_baseline=True), tfma.ModelSpec(name='candidate'), ], slicing_specs=[ tfma.SlicingSpec(feature_keys=['trip_start_hour']), tfma.SlicingSpec( feature_keys=['trip_start_day', 'trip_miles']), ]), preserving_proto_field_name=True) }), ('legacy_feature_slicing', { 'feature_slicing_spec': json_format.MessageToJson(evaluator_pb2.FeatureSlicingSpec(specs=[ evaluator_pb2.SingleSlicingSpec( column_for_slicing=['trip_start_hour']), evaluator_pb2.SingleSlicingSpec( column_for_slicing=['trip_start_day', 'trip_miles']), ]), preserving_proto_field_name=True), })) def testDo(self, exec_properties): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. examples = standard_artifacts.Examples() examples.uri = os.path.join(source_data_dir, 'csv_example_gen') examples.split_names = artifact_utils.encode_split_names( ['train', 'eval']) model = standard_artifacts.Model() baseline_model = standard_artifacts.Model() model.uri = os.path.join(source_data_dir, 'trainer/current') baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/') input_dict = { executor.EXAMPLES_KEY: [examples], executor.MODEL_KEY: [model], executor.BASELINE_MODEL_KEY: [baseline_model], } # Create output dict. eval_output = standard_artifacts.ModelEvaluation() eval_output.uri = os.path.join(output_data_dir, 'eval_output') output_dict = { executor.EVALUATION_KEY: [eval_output], } # Run executor. evaluator = executor.Executor() evaluator.Do(input_dict, output_dict, exec_properties) # Check evaluator outputs. self.assertTrue( tf.io.gfile.exists( os.path.join(eval_output.uri, 'eval_config.json'))) self.assertTrue( tf.io.gfile.exists(os.path.join(eval_output.uri, 'metrics'))) self.assertTrue( tf.io.gfile.exists(os.path.join(eval_output.uri, 'plots'))) @absl.testing.parameterized.named_parameters(('legacy_feature_slicing', { 'feature_slicing_spec': json_format.MessageToJson(evaluator_pb2.FeatureSlicingSpec(specs=[ evaluator_pb2.SingleSlicingSpec( column_for_slicing=['trip_start_hour']), evaluator_pb2.SingleSlicingSpec( column_for_slicing=['trip_start_day', 'trip_miles']), ]), preserving_proto_field_name=True), })) def testDoLegacySingleEvalSavedModelWFairness(self, exec_properties): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. examples = standard_artifacts.Examples() examples.uri = os.path.join(source_data_dir, 'csv_example_gen') examples.split_names = artifact_utils.encode_split_names( ['train', 'eval']) model = standard_artifacts.Model() baseline_model = standard_artifacts.Model() model.uri = os.path.join(source_data_dir, 'trainer/current') baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/') input_dict = { executor.EXAMPLES_KEY: [examples], executor.MODEL_KEY: [model], } # Create output dict. eval_output = standard_artifacts.ModelEvaluation() eval_output.uri = os.path.join(output_data_dir, 'eval_output') output_dict = {executor.EVALUATION_KEY: [eval_output]} try: # Need to import the following module so that the fairness indicator # post-export metric is registered. This may raise an ImportError if the # currently-installed version of TFMA does not support fairness # indicators. import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators # pylint: disable=g-import-not-at-top, unused-variable exec_properties['fairness_indicator_thresholds'] = [ 0.1, 0.3, 0.5, 0.7, 0.9 ] except ImportError: absl.logging.warning( 'Not testing fairness indicators because a compatible TFMA version ' 'is not installed.') # Run executor. evaluator = executor.Executor() evaluator.Do(input_dict, output_dict, exec_properties) # Check evaluator outputs. self.assertTrue( tf.io.gfile.exists( os.path.join(eval_output.uri, 'eval_config.json'))) self.assertTrue( tf.io.gfile.exists(os.path.join(eval_output.uri, 'metrics'))) self.assertTrue( tf.io.gfile.exists(os.path.join(eval_output.uri, 'plots')))
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, training_data_root: Text, inference_data_root: Text, module_file: Text, metadata_path: Text, beam_pipeline_args: List[Text]) -> pipeline.Pipeline: """Implements the chicago taxi pipeline with TFX.""" # Brings training data into the pipeline or otherwise joins/converts # training data. training_example_gen = CsvExampleGen(input_base=training_data_root, instance_name='training_example_gen') # Computes statistics over data for visualization and example validation. statistics_gen = StatisticsGen( input_data=training_example_gen.outputs['examples']) # Generates schema based on statistics files. schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=False) # Performs anomaly detection based on statistics and data schema. example_validator = ExampleValidator( statistics=statistics_gen.outputs['statistics'], schema=schema_gen.outputs['schema']) # Performs transformations and feature engineering in training and serving. transform = Transform(examples=training_example_gen.outputs['examples'], schema=schema_gen.outputs['schema'], module_file=module_file) # Uses user-provided Python function that implements a model using TF-Learn. trainer = Trainer( module_file=module_file, transformed_examples=transform.outputs['transformed_examples'], schema=schema_gen.outputs['schema'], transform_graph=transform.outputs['transform_graph'], train_args=trainer_pb2.TrainArgs(num_steps=10000), eval_args=trainer_pb2.EvalArgs(num_steps=5000)) # Get the latest blessed model for model validation. model_resolver = ResolverNode( instance_name='latest_blessed_model_resolver', resolver_class=latest_blessed_model_resolver. LatestBlessedModelResolver, model=Channel(type=Model), model_blessing=Channel(type=ModelBlessing)) # Uses TFMA to compute a evaluation statistics over features of a model and # perform quality validation of a candidate model (compared to a baseline). eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(signature_name='eval')], slicing_specs=[ tfma.SlicingSpec(), tfma.SlicingSpec(feature_keys=['trip_start_hour']) ], metrics_specs=[ tfma.MetricsSpec( thresholds={ 'accuracy': tfma.config.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': 0.6}), # Change threshold will be ignored if there is no # baseline model resolved from MLMD (first run). change_threshold=tfma.GenericChangeThreshold( direction=tfma.MetricDirection.HIGHER_IS_BETTER, absolute={'value': -1e-10})) }) ]) evaluator = Evaluator(examples=training_example_gen.outputs['examples'], model=trainer.outputs['model'], baseline_model=model_resolver.outputs['model'], eval_config=eval_config) # Brings inference data into the pipeline. inference_example_gen = CsvExampleGen( input_base=inference_data_root, output_config=example_gen_pb2.Output( split_config=example_gen_pb2.SplitConfig(splits=[ example_gen_pb2.SplitConfig.Split(name='unlabelled', hash_buckets=100) ])), instance_name='inference_example_gen') # Performs offline batch inference over inference examples. bulk_inferrer = BulkInferrer( examples=inference_example_gen.outputs['examples'], model=trainer.outputs['model'], model_blessing=evaluator.outputs['blessing'], # Empty data_spec.example_splits will result in using all splits. data_spec=bulk_inferrer_pb2.DataSpec(), model_spec=bulk_inferrer_pb2.ModelSpec()) return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=[ training_example_gen, inference_example_gen, statistics_gen, schema_gen, example_validator, transform, trainer, model_resolver, evaluator, bulk_inferrer ], enable_cache=True, metadata_connection_config=metadata.sqlite_metadata_connection_config( metadata_path), beam_pipeline_args=beam_pipeline_args)
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text, module_file: Text, serving_model_dir: Text, metadata_path: Text, beam_pipeline_args: List[Text]) -> pipeline.Pipeline: """Implements the imdb sentiment analysis pipline with TFX.""" output = example_gen_pb2.Output(split_config=example_gen_pb2.SplitConfig( splits=[ example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=9), example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=1) ])) # Brings data in to the pipline example_gen = CsvExampleGen(input_base=data_root, output_config=output) # Computes statistics over data for visualization and example validation. statistics_gen = StatisticsGen(examples=example_gen.outputs['examples']) # Generates schema based on statistics files. schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True) # Performs anomaly detection based on statistics and data schema. example_validator = ExampleValidator( statistics=statistics_gen.outputs['statistics'], schema=schema_gen.outputs['schema']) # Performs transformations and feature engineering in training and serving. transform = Transform(examples=example_gen.outputs['examples'], schema=schema_gen.outputs['schema'], module_file=module_file) # Uses user-provided Python function that trains a model. trainer = Trainer(module_file=module_file, examples=transform.outputs['transformed_examples'], transform_graph=transform.outputs['transform_graph'], schema=schema_gen.outputs['schema'], train_args=trainer_pb2.TrainArgs(num_steps=500), eval_args=trainer_pb2.EvalArgs(num_steps=200)) # Get the latest blessed model for model validation. model_resolver = ResolverNode( instance_name='latest_blessed_model_resolver', resolver_class=latest_blessed_model_resolver. LatestBlessedModelResolver, model=Channel(type=Model), model_blessing=Channel(type=ModelBlessing)) # Uses TFMA to compute evaluation statistics over features of a model and # perform quality validation of a candidate model (compared to a baseline). eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(label_key='label')], slicing_specs=[tfma.SlicingSpec()], metrics_specs=[ tfma.MetricsSpec(metrics=[ tfma.MetricConfig( class_name='BinaryAccuracy', threshold=tfma.MetricThreshold( value_threshold=tfma.GenericValueThreshold( # Increase this threshold when training on complete # dataset. lower_bound={'value': 0.01}), # Change threshold will be ignored if there is no # baseline model resolved from MLMD (first run). change_threshold=tfma.GenericChangeThreshold( direction=tfma.MetricDirection.HIGHER_IS_BETTER, absolute={'value': -1e-2}))) ]) ]) evaluator = Evaluator(examples=example_gen.outputs['examples'], model=trainer.outputs['model'], baseline_model=model_resolver.outputs['model'], eval_config=eval_config) # Checks whether the model passed the validation steps and pushes the model # to a file destination if check passed. pusher = Pusher(model=trainer.outputs['model'], model_blessing=evaluator.outputs['blessing'], push_destination=pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=serving_model_dir))) components = [ example_gen, statistics_gen, schema_gen, example_validator, transform, trainer, model_resolver, evaluator, pusher, ] return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=components, metadata_connection_config=metadata.sqlite_metadata_connection_config( metadata_path), enable_cache=True, beam_pipeline_args=beam_pipeline_args)
def _create_pipeline( pipeline_name: Text, pipeline_root: Text, data_root: Text, trainer_module_file: Text, evaluator_module_file: Text, serving_model_dir: Text, metadata_path: Text, beam_pipeline_args: List[Text], ) -> tfx.dsl.Pipeline: """Implements the Penguin pipeline with TFX.""" # Brings data into the pipeline or otherwise joins/converts training data. example_gen = tfx.components.CsvExampleGen( input_base=os.path.join(data_root, 'labelled')) # Computes statistics over data for visualization and example validation. statistics_gen = tfx.components.StatisticsGen( examples=example_gen.outputs['examples']) # Generates schema based on statistics files. schema_gen = tfx.components.SchemaGen( statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True) # Performs anomaly detection based on statistics and data schema. example_validator = tfx.components.ExampleValidator( statistics=statistics_gen.outputs['statistics'], schema=schema_gen.outputs['schema']) # TODO(humichael): Handle applying transformation component in Milestone 3. # Uses user-provided Python function that trains a model using TF-Learn. # Num_steps is not provided during evaluation because the scikit-learn model # loads and evaluates the entire test set at once. trainer = tfx.components.Trainer( module_file=trainer_module_file, examples=example_gen.outputs['examples'], schema=schema_gen.outputs['schema'], train_args=tfx.proto.TrainArgs(num_steps=2000), eval_args=tfx.proto.EvalArgs()) # Get the latest blessed model for model validation. model_resolver = tfx.dsl.Resolver( strategy_class=tfx.dsl.experimental.LatestBlessedModelStrategy, model=tfx.dsl.Channel(type=tfx.types.standard_artifacts.Model), model_blessing=tfx.dsl.Channel( type=tfx.types.standard_artifacts.ModelBlessing)).with_id( 'latest_blessed_model_resolver') # Uses TFMA to compute evaluation statistics over features of a model and # perform quality validation of a candidate model (compared to a baseline). eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(label_key='species')], slicing_specs=[tfma.SlicingSpec()], metrics_specs=[ tfma.MetricsSpec(metrics=[ tfma.MetricConfig( class_name='Accuracy', threshold=tfma.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': 0.6}), change_threshold=tfma.GenericChangeThreshold( direction=tfma.MetricDirection.HIGHER_IS_BETTER, absolute={'value': -1e-10}))) ]) ]) evaluator = tfx.components.Evaluator( module_file=evaluator_module_file, examples=example_gen.outputs['examples'], model=trainer.outputs['model'], baseline_model=model_resolver.outputs['model'], eval_config=eval_config) pusher = tfx.components.Pusher( model=trainer.outputs['model'], model_blessing=evaluator.outputs['blessing'], push_destination=tfx.proto.PushDestination( filesystem=tfx.proto.PushDestination.Filesystem( base_directory=serving_model_dir))) return tfx.dsl.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=[ example_gen, statistics_gen, schema_gen, example_validator, trainer, model_resolver, evaluator, pusher, ], enable_cache=True, metadata_connection_config=tfx.orchestration.metadata. sqlite_metadata_connection_config(metadata_path), beam_pipeline_args=beam_pipeline_args, )
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text, module_file: Text, serving_model_dir: Text, metadata_path: Text, direct_num_workers: int) -> pipeline.Pipeline: """Implements the Iris flowers pipeline with TFX.""" examples = external_input(data_root) # Brings data into the pipeline or otherwise joins/converts training data. example_gen = CsvExampleGen(input=examples) # Computes statistics over data for visualization and example validation. statistics_gen = StatisticsGen(examples=example_gen.outputs['examples']) # Generates schema based on statistics files. schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True) # Performs anomaly detection based on statistics and data schema. example_validator = ExampleValidator( statistics=statistics_gen.outputs['statistics'], schema=schema_gen.outputs['schema']) # Performs transformations and feature engineering in training and serving. transform = Transform(examples=example_gen.outputs['examples'], schema=schema_gen.outputs['schema'], module_file=module_file) # Uses user-provided Python function that trains a model using TF-Learn. trainer = Trainer( module_file=module_file, custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor), examples=transform.outputs['transformed_examples'], transform_graph=transform.outputs['transform_graph'], schema=schema_gen.outputs['schema'], train_args=trainer_pb2.TrainArgs(num_steps=2000), eval_args=trainer_pb2.EvalArgs(num_steps=5)) # Get the latest blessed model for model validation. model_resolver = ResolverNode( instance_name='latest_blessed_model_resolver', resolver_class=latest_blessed_model_resolver. LatestBlessedModelResolver, model=Channel(type=Model), model_blessing=Channel(type=ModelBlessing)) # Uses TFMA to compute an evaluation statistics over features of a model and # perform quality validation of a candidate model (compared to a baseline). eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(label_key='variety')], slicing_specs=[tfma.SlicingSpec()], metrics_specs=[ tfma.MetricsSpec(metrics=[ tfma.MetricConfig( class_name='SparseCategoricalAccuracy', threshold=tfma.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': 0.6}), change_threshold=tfma.GenericChangeThreshold( direction=tfma.MetricDirection.HIGHER_IS_BETTER, absolute={'value': -1e-10}))) ]) ]) evaluator = Evaluator( examples=example_gen.outputs['examples'], model=trainer.outputs['model'], baseline_model=model_resolver.outputs['model'], # Change threshold will be ignored if there is no baseline (first run). eval_config=eval_config) # Performs infra validation of a candidate model to prevent unservable model # from being pushed. This config will launch a model server of the latest # TensorFlow Serving image in a local docker engine. infra_validator = InfraValidator( model=trainer.outputs['model'], examples=example_gen.outputs['examples'], serving_spec=infra_validator_pb2.ServingSpec( tensorflow_serving=infra_validator_pb2.TensorFlowServing( tags=['latest']), local_docker=infra_validator_pb2.LocalDockerConfig()), request_spec=infra_validator_pb2.RequestSpec( tensorflow_serving=infra_validator_pb2. TensorFlowServingRequestSpec())) # Checks whether the model passed the validation steps and pushes the model # to a file destination if check passed. pusher = Pusher(model=trainer.outputs['model'], model_blessing=evaluator.outputs['blessing'], infra_blessing=infra_validator.outputs['blessing'], push_destination=pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=serving_model_dir))) return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=[ example_gen, statistics_gen, schema_gen, example_validator, transform, trainer, model_resolver, evaluator, infra_validator, pusher, ], enable_cache=True, metadata_connection_config=metadata.sqlite_metadata_connection_config( metadata_path), # TODO(b/142684737): The multi-processing API might change. beam_pipeline_args=['--direct_num_workers=%d' % direct_num_workers], )
def create_pipeline( pipeline_name: Text, pipeline_root: Text, data_path: Text, preprocessing_fn: Text, run_fn: Text, train_args: trainer_pb2.TrainArgs, eval_args: trainer_pb2.EvalArgs, eval_accuracy_threshold: float, serving_model_dir: Text, metadata_connection_config: Optional[ metadata_store_pb2.ConnectionConfig] = None, beam_pipeline_args: Optional[List[Text]] = None, ) -> pipeline.Pipeline: """Implements the penguin pipeline with TFX.""" components = [] # Brings data into the pipeline or otherwise joins/converts training data. # TODO(step 2): Might use another ExampleGen class for your data. example_gen = CsvExampleGen(input_base=data_path) components.append(example_gen) # Computes statistics over data for visualization and example validation. statistics_gen = StatisticsGen(examples=example_gen.outputs['examples']) components.append(statistics_gen) # Generates schema based on statistics files. schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True) components.append(schema_gen) # Performs anomaly detection based on statistics and data schema. example_validator = ExampleValidator( # pylint: disable=unused-variable statistics=statistics_gen.outputs['statistics'], schema=schema_gen.outputs['schema']) components.append(example_validator) # Performs transformations and feature engineering in training and serving. transform = Transform( # pylint: disable=unused-variable examples=example_gen.outputs['examples'], schema=schema_gen.outputs['schema'], preprocessing_fn=preprocessing_fn) # TODO(step 3): Uncomment here to add Transform to the pipeline. # components.append(transform) # Uses user-provided Python function that implements a model using Tensorflow. trainer = Trainer( run_fn=run_fn, examples=example_gen.outputs['examples'], # Use outputs of Transform as training inputs if Transform is used. # examples=transform.outputs['transformed_examples'], # transform_graph=transform.outputs['transform_graph'], schema=schema_gen.outputs['schema'], train_args=train_args, eval_args=eval_args) # TODO(step 4): Uncomment here to add Trainer to the pipeline. # components.append(trainer) # Get the latest blessed model for model validation. model_resolver = resolver.Resolver( strategy_class=latest_blessed_model_resolver. LatestBlessedModelResolver, model=Channel(type=Model), model_blessing=Channel( type=ModelBlessing)).with_id('latest_blessed_model_resolver') # TODO(step 5): Uncomment here to add Resolver to the pipeline. # components.append(model_resolver) # Uses TFMA to compute a evaluation statistics over features of a model and # perform quality validation of a candidate model (compared to a baseline). eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(label_key=features.LABEL_KEY)], slicing_specs=[tfma.SlicingSpec()], metrics_specs=[ tfma.MetricsSpec(metrics=[ tfma.MetricConfig( class_name='SparseCategoricalAccuracy', threshold=tfma.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': eval_accuracy_threshold}), change_threshold=tfma.GenericChangeThreshold( direction=tfma.MetricDirection.HIGHER_IS_BETTER, absolute={'value': -1e-10}))) ]) ]) evaluator = Evaluator( # pylint: disable=unused-variable examples=example_gen.outputs['examples'], model=trainer.outputs['model'], baseline_model=model_resolver.outputs['model'], # Change threshold will be ignored if there is no baseline (first run). eval_config=eval_config) # TODO(step 5): Uncomment here to add Evaluator to the pipeline. # components.append(evaluator) # Pushes the model to a file destination if check passed. pusher = Pusher( # pylint: disable=unused-variable model=trainer.outputs['model'], model_blessing=evaluator.outputs['blessing'], push_destination=pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=serving_model_dir))) # TODO(step 5): Uncomment here to add Pusher to the pipeline. # components.append(pusher) return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=components, # Change this value to control caching of execution results. Default value # is `False`. # enable_cache=True, metadata_connection_config=metadata_connection_config, beam_pipeline_args=beam_pipeline_args, )