def __init__(self, examples: types.channel, schema: types.channel, model: types.channel, min_recall: float, max_latency: float, evaluation: Optional[types.Channel] = None, blessing: Optional[types.Channel] = None, instance_name=None): blessing = blessing or types.Channel( type=standard_artifacts.ModelBlessing, artifacts=[standard_artifacts.ModelBlessing()]) evaluation = evaluation or types.Channel( type=standard_artifacts.ModelEvaluation, artifacts=[standard_artifacts.ModelEvaluation()]) spec = IndexEvaluatorSpec(examples=examples, schema=schema, model=model, evaluation=evaluation, blessing=blessing, min_recall=min_recall, max_latency=max_latency) super().__init__(spec=spec, instance_name=instance_name)
def testDoValidation(self, exec_properties, blessed, has_baseline): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. examples = standard_artifacts.Examples() examples.uri = os.path.join(source_data_dir, 'csv_example_gen') examples.split_names = artifact_utils.encode_split_names( ['train', 'eval']) model = standard_artifacts.Model() baseline_model = standard_artifacts.Model() model.uri = os.path.join(source_data_dir, 'trainer/current') baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/') blessing_output = standard_artifacts.ModelBlessing() blessing_output.uri = os.path.join(output_data_dir, 'blessing_output') schema = standard_artifacts.Schema() schema.uri = os.path.join(source_data_dir, 'schema_gen') input_dict = { EXAMPLES_KEY: [examples], MODEL_KEY: [model], SCHEMA_KEY: [schema], } if has_baseline: input_dict[BASELINE_MODEL_KEY] = [baseline_model] # Create output dict. eval_output = standard_artifacts.ModelEvaluation() eval_output.uri = os.path.join(output_data_dir, 'eval_output') blessing_output = standard_artifacts.ModelBlessing() blessing_output.uri = os.path.join(output_data_dir, 'blessing_output') output_dict = { EVALUATION_KEY: [eval_output], BLESSING_KEY: [blessing_output], } # List needs to be serialized before being passed into Do function. exec_properties[EXAMPLE_SPLITS_KEY] = json_utils.dumps(None) # Run executor. evaluator = executor.Executor() evaluator.Do(input_dict, output_dict, exec_properties) # Check evaluator outputs. self.assertTrue( fileio.exists(os.path.join(eval_output.uri, 'eval_config.json'))) self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'metrics'))) self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'plots'))) self.assertTrue( fileio.exists(os.path.join(eval_output.uri, 'validations'))) if blessed: self.assertTrue( fileio.exists(os.path.join(blessing_output.uri, 'BLESSED'))) else: self.assertTrue( fileio.exists(os.path.join(blessing_output.uri, 'NOT_BLESSED')))
def __init__( self, examples: types.Channel = None, model_exports: types.Channel = None, feature_slicing_spec: Optional[evaluator_pb2.FeatureSlicingSpec] = None, output: Optional[types.Channel] = None, model: Optional[types.Channel] = None, name: Optional[Text] = None): """Construct an Evaluator component. Args: examples: A Channel of 'ExamplesPath' type, usually produced by ExampleGen component (required). model_exports: A Channel of 'ModelExportPath' type, usually produced by Trainer component (required). feature_slicing_spec: Optional evaluator_pb2.FeatureSlicingSpec instance, providing the way to slice the data. output: Optional channel of 'ModelEvalPath' for result of evaluation. model: Forwards compatibility alias for the 'model_exports' argument. name: Optional unique name. Necessary if multiple Evaluator components are declared in the same pipeline. """ model_exports = model_exports or model output = output or types.Channel( type=standard_artifacts.ModelEvaluation, artifacts=[standard_artifacts.ModelEvaluation()]) spec = EvaluatorSpec( examples=examples, model_exports=model_exports, feature_slicing_spec=(feature_slicing_spec or evaluator_pb2.FeatureSlicingSpec()), output=output) super(Evaluator, self).__init__(spec=spec, name=name)
def testInvalidBenchmarkNameThrows(self): with self.assertRaises(ValueError): BenchmarkResultPublisher('', channel_utils.as_channel([ standard_artifacts.ModelEvaluation() ]), run=1, num_runs=2)
def testDoLegacySingleEvalSavedModelWFairness(self, exec_properties): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. examples = standard_artifacts.Examples() examples.uri = os.path.join(source_data_dir, 'csv_example_gen') examples.split_names = artifact_utils.encode_split_names( ['train', 'eval']) model = standard_artifacts.Model() model.uri = os.path.join(source_data_dir, 'trainer/current') input_dict = { EXAMPLES_KEY: [examples], MODEL_KEY: [model], } # Create output dict. eval_output = standard_artifacts.ModelEvaluation() eval_output.uri = os.path.join(output_data_dir, 'eval_output') blessing_output = standard_artifacts.ModelBlessing() blessing_output.uri = os.path.join(output_data_dir, 'blessing_output') output_dict = { EVALUATION_KEY: [eval_output], BLESSING_KEY: [blessing_output], } try: # Need to import the following module so that the fairness indicator # post-export metric is registered. This may raise an ImportError if the # currently-installed version of TFMA does not support fairness # indicators. import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators # pylint: disable=g-import-not-at-top, unused-variable exec_properties['fairness_indicator_thresholds'] = [ 0.1, 0.3, 0.5, 0.7, 0.9 ] except ImportError: logging.warning( 'Not testing fairness indicators because a compatible TFMA version ' 'is not installed.') # List needs to be serialized before being passed into Do function. exec_properties[EXAMPLE_SPLITS_KEY] = json_utils.dumps(None) # Run executor. evaluator = executor.Executor() evaluator.Do(input_dict, output_dict, exec_properties) # Check evaluator outputs. self.assertTrue( fileio.exists(os.path.join(eval_output.uri, 'eval_config.json'))) self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'metrics'))) self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'plots'))) self.assertFalse( fileio.exists(os.path.join(blessing_output.uri, 'BLESSED')))
def testMissingBenchmarkResultConstruction(self): publisher = BenchmarkResultPublisher( 'test', channel_utils.as_channel([standard_artifacts.ModelEvaluation()]), run=1, num_runs=2) self.assertEqual('NitroML.BenchmarkResult', publisher.outputs['benchmark_result'].type_name)
def testEvalution(self, exec_properties, model_agnostic=False): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. examples = standard_artifacts.Examples() examples.uri = os.path.join(source_data_dir, 'csv_example_gen') examples.split_names = artifact_utils.encode_split_names( ['train', 'eval']) baseline_model = standard_artifacts.Model() baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/') schema = standard_artifacts.Schema() schema.uri = os.path.join(source_data_dir, 'schema_gen') input_dict = { EXAMPLES_KEY: [examples], SCHEMA_KEY: [schema], } if not model_agnostic: model = standard_artifacts.Model() model.uri = os.path.join(source_data_dir, 'trainer/current') input_dict[MODEL_KEY] = [model] # Create output dict. eval_output = standard_artifacts.ModelEvaluation() eval_output.uri = os.path.join(output_data_dir, 'eval_output') blessing_output = standard_artifacts.ModelBlessing() blessing_output.uri = os.path.join(output_data_dir, 'blessing_output') output_dict = { EVALUATION_KEY: [eval_output], BLESSING_KEY: [blessing_output], } # Test multiple splits. exec_properties[EXAMPLE_SPLITS_KEY] = json_utils.dumps( ['train', 'eval']) if MODULE_FILE_KEY in exec_properties: exec_properties[MODULE_FILE_KEY] = os.path.join( source_data_dir, 'module_file', 'evaluator_module.py') # Run executor. evaluator = executor.Executor() evaluator.Do(input_dict, output_dict, exec_properties) # Check evaluator outputs. self.assertTrue( fileio.exists(os.path.join(eval_output.uri, 'eval_config.json'))) self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'metrics'))) self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'plots'))) self.assertFalse( fileio.exists(os.path.join(blessing_output.uri, 'BLESSED')))
def testContextProperties(self): publisher = BenchmarkResultPublisher( 'test', channel_utils.as_channel([standard_artifacts.ModelEvaluation()]), run=1, num_runs=2, additional_context={ 'test_int': 1, 'test_str': 'str', 'test_float': 0.1 }) want = '{"test_float": 0.1, "test_int": 1, "test_str": "str"}' got = publisher.exec_properties['additional_context'] self.assertEqual(want, got)
def testEvalution(self, exec_properties): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. examples = standard_artifacts.Examples() examples.uri = os.path.join(source_data_dir, 'csv_example_gen') examples.split_names = artifact_utils.encode_split_names( ['train', 'eval']) model = standard_artifacts.Model() baseline_model = standard_artifacts.Model() model.uri = os.path.join(source_data_dir, 'trainer/current') baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/') schema = standard_artifacts.Schema() schema.uri = os.path.join(source_data_dir, 'schema_gen') input_dict = { constants.EXAMPLES_KEY: [examples], constants.MODEL_KEY: [model], constants.SCHEMA_KEY: [schema], } # Create output dict. eval_output = standard_artifacts.ModelEvaluation() eval_output.uri = os.path.join(output_data_dir, 'eval_output') blessing_output = standard_artifacts.ModelBlessing() blessing_output.uri = os.path.join(output_data_dir, 'blessing_output') output_dict = { constants.EVALUATION_KEY: [eval_output], constants.BLESSING_KEY: [blessing_output], } # Run executor. evaluator = executor.Executor() evaluator.Do(input_dict, output_dict, exec_properties) # Check evaluator outputs. self.assertTrue( tf.io.gfile.exists( os.path.join(eval_output.uri, 'eval_config.json'))) self.assertTrue( tf.io.gfile.exists(os.path.join(eval_output.uri, 'metrics'))) self.assertTrue( tf.io.gfile.exists(os.path.join(eval_output.uri, 'plots'))) self.assertFalse( tf.io.gfile.exists(os.path.join(blessing_output.uri, 'BLESSED')))
def __init__(self, examples: types.Channel = None, model: types.Channel = None, feature_slicing_spec: Optional[Union[ evaluator_pb2.FeatureSlicingSpec, Dict[Text, Any]]] = None, fairness_indicator_thresholds: Optional[List[Union[ float, data_types.RuntimeParameter]]] = None, output: Optional[types.Channel] = None, model_exports: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """Construct an Evaluator component. Args: examples: A Channel of type `standard_artifacts.Examples`, usually produced by an ExampleGen component. _required_ model: A Channel of type `standard_artifacts.Model`, usually produced by a Trainer component. feature_slicing_spec: [evaluator_pb2.FeatureSlicingSpec](https://github.com/tensorflow/tfx/blob/master/tfx/proto/evaluator.proto) instance that describes how Evaluator should slice the data. If any field is provided as a RuntimeParameter, feature_slicing_spec should be constructed as a dict with the same field names as FeatureSlicingSpec proto message. fairness_indicator_thresholds: Optional list of float (or RuntimeParameter) threshold values for use with TFMA fairness indicators. Experimental functionality: this interface and functionality may change at any time. TODO(b/142653905): add a link to additional documentation for TFMA fairness indicators here. output: Channel of `ModelEvalPath` to store the evaluation results. model_exports: Backwards compatibility alias for the `model` argument. instance_name: Optional name assigned to this specific instance of Evaluator. Required only if multiple Evaluator components are declared in the same pipeline. Either `model_exports` or `model` must be present in the input arguments. """ model_exports = model_exports or model output = output or types.Channel( type=standard_artifacts.ModelEvaluation, artifacts=[standard_artifacts.ModelEvaluation()]) spec = EvaluatorSpec( examples=examples, model_exports=model_exports, feature_slicing_spec=(feature_slicing_spec or evaluator_pb2.FeatureSlicingSpec()), fairness_indicator_thresholds=fairness_indicator_thresholds, output=output) super(Evaluator, self).__init__(spec=spec, instance_name=instance_name)
def testDo(self): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. train_examples = standard_artifacts.Examples(split='train') eval_examples = standard_artifacts.Examples(split='eval') eval_examples.uri = os.path.join(source_data_dir, 'csv_example_gen/eval/') model_exports = standard_artifacts.Model() model_exports.uri = os.path.join(source_data_dir, 'trainer/current/') input_dict = { 'examples': [train_examples, eval_examples], 'model_exports': [model_exports], } # Create output dict. eval_output = standard_artifacts.ModelEvaluation() eval_output.uri = os.path.join(output_data_dir, 'eval_output') output_dict = {'output': [eval_output]} # Create exec proterties. exec_properties = { 'feature_slicing_spec': json_format.MessageToJson( evaluator_pb2.FeatureSlicingSpec(specs=[ evaluator_pb2.SingleSlicingSpec( column_for_slicing=['trip_start_hour']), evaluator_pb2.SingleSlicingSpec( column_for_slicing=['trip_start_day', 'trip_miles']), ])) } # Run executor. evaluator = executor.Executor() evaluator.Do(input_dict, output_dict, exec_properties) # Check evaluator outputs. self.assertTrue( tf.gfile.Exists(os.path.join(eval_output.uri, 'eval_config'))) self.assertTrue( tf.gfile.Exists(os.path.join(eval_output.uri, 'metrics'))) self.assertTrue(tf.gfile.Exists(os.path.join(eval_output.uri, 'plots')))
def __init__(self, examples: types.Channel = None, model: types.Channel = None, feature_slicing_spec: Optional[ evaluator_pb2.FeatureSlicingSpec] = None, output: Optional[types.Channel] = None, model_exports: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """Construct an Evaluator component. Args: examples: A Channel of 'ExamplesPath' type, usually produced by ExampleGen component. _required_ model: A Channel of 'ModelExportPath' type, usually produced by Trainer component. Will be deprecated in the future for the `model` parameter. feature_slicing_spec: [evaluator_pb2.FeatureSlicingSpec](https://github.com/tensorflow/tfx/blob/master/tfx/proto/evaluator.proto) instance that describes how Evaluator should slice the data. output: Channel of `ModelEvalPath` to store the evaluation results. model_exports: Backwards compatibility alias for the `model` argument. instance_name: Optional name assigned to this specific instance of Evaluator. Required only if multiple Evaluator components are declared in the same pipeline. Either `model_exports` or `model` must be present in the input arguments. """ model_exports = model_exports or model output = output or types.Channel( type=standard_artifacts.ModelEvaluation, artifacts=[standard_artifacts.ModelEvaluation()]) spec = EvaluatorSpec( examples=examples, model_exports=model_exports, feature_slicing_spec=(feature_slicing_spec or evaluator_pb2.FeatureSlicingSpec()), output=output) super(Evaluator, self).__init__(spec=spec, instance_name=instance_name)
def __init__( self, examples: types.Channel = None, model: types.Channel = None, baseline_model: Optional[types.Channel] = None, # TODO(b/148618405): deprecate feature_slicing_spec. feature_slicing_spec: Optional[Union[evaluator_pb2.FeatureSlicingSpec, Dict[Text, Any]]] = None, fairness_indicator_thresholds: Optional[List[Union[ float, data_types.RuntimeParameter]]] = None, output: Optional[types.Channel] = None, model_exports: Optional[types.Channel] = None, instance_name: Optional[Text] = None, eval_config: Optional[tfma.EvalConfig] = None, blessing: Optional[types.Channel] = None, schema: Optional[types.Channel] = None, enable_cache: Optional[bool] = None): """Construct an Evaluator component. Args: examples: A Channel of type `standard_artifacts.Examples`, usually produced by an ExampleGen component. _required_ model: A Channel of type `standard_artifacts.Model`, usually produced by a Trainer component. baseline_model: An optional channel of type 'standard_artifacts.Model' as the baseline model for model diff and model validation purpose. feature_slicing_spec: Deprecated, please use eval_config instead. Only support estimator. [evaluator_pb2.FeatureSlicingSpec](https://github.com/tensorflow/tfx/blob/master/tfx/proto/evaluator.proto) instance that describes how Evaluator should slice the data. If any field is provided as a RuntimeParameter, feature_slicing_spec should be constructed as a dict with the same field names as FeatureSlicingSpec proto message. fairness_indicator_thresholds: Optional list of float (or RuntimeParameter) threshold values for use with TFMA fairness indicators. Experimental functionality: this interface and functionality may change at any time. TODO(b/142653905): add a link to additional documentation for TFMA fairness indicators here. output: Channel of `ModelEvalPath` to store the evaluation results. model_exports: Backwards compatibility alias for the `model` argument. instance_name: Optional name assigned to this specific instance of Evaluator. Required only if multiple Evaluator components are declared in the same pipeline. Either `model_exports` or `model` must be present in the input arguments. eval_config: Instance of tfma.EvalConfig containg configuration settings for running the evaluation. This config has options for both estimator and Keras. blessing: Output channel of 'ModelBlessingPath' that contains the blessing result. schema: A `Schema` channel to use for TFXIO. enable_cache: Optional boolean to indicate if cache is enabled for the Evaluator component. If not specified, defaults to the value specified for pipeline's enable_cache parameter. """ if eval_config is not None and feature_slicing_spec is not None: raise ValueError("Exactly one of 'eval_config' or 'feature_slicing_spec' " "must be supplied.") if eval_config is None and feature_slicing_spec is None: feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec() absl.logging.info('Neither eval_config nor feature_slicing_spec is ' 'passed, the model is treated as estimator.') if model_exports: absl.logging.warning( 'The "model_exports" argument to the Evaluator component has ' 'been renamed to "model" and is deprecated. Please update your ' 'usage as support for this argument will be removed soon.') model = model_exports if feature_slicing_spec: absl.logging.warning('feature_slicing_spec is deprecated, please use ' 'eval_config instead.') blessing = blessing or types.Channel( type=standard_artifacts.ModelBlessing, artifacts=[standard_artifacts.ModelBlessing()]) evaluation = output or types.Channel( type=standard_artifacts.ModelEvaluation, artifacts=[standard_artifacts.ModelEvaluation()]) spec = EvaluatorSpec( examples=examples, model=model, baseline_model=baseline_model, feature_slicing_spec=feature_slicing_spec, fairness_indicator_thresholds=fairness_indicator_thresholds, evaluation=evaluation, eval_config=eval_config, blessing=blessing, schema=schema) super(Evaluator, self).__init__( spec=spec, instance_name=instance_name, enable_cache=enable_cache)
def _make_input_dict(self, uri: Text = '') -> Dict[Text, List[Artifact]]: evaluation = standard_artifacts.ModelEvaluation() evaluation.uri = uri return {'evaluation': [evaluation]}
def testDo(self): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. train_examples = standard_artifacts.Examples(split='train') eval_examples = standard_artifacts.Examples(split='eval') eval_examples.uri = os.path.join(source_data_dir, 'csv_example_gen/eval/') model_exports = standard_artifacts.Model() model_exports.uri = os.path.join(source_data_dir, 'trainer/current/') input_dict = { 'examples': [train_examples, eval_examples], 'model_exports': [model_exports], } # Create output dict. eval_output = standard_artifacts.ModelEvaluation() eval_output.uri = os.path.join(output_data_dir, 'eval_output') output_dict = {'output': [eval_output]} # Create exec proterties. exec_properties = { 'feature_slicing_spec': json_format.MessageToJson( evaluator_pb2.FeatureSlicingSpec(specs=[ evaluator_pb2.SingleSlicingSpec( column_for_slicing=['trip_start_hour']), evaluator_pb2.SingleSlicingSpec( column_for_slicing=['trip_start_day', 'trip_miles']), ])) } try: # Need to import the following module so that the fairness indicator # post-export metric is registered. This may raise an ImportError if the # currently-installed version of TFMA does not support fairness # indicators. import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators # pylint: disable=g-import-not-at-top, unused-variable exec_properties['fairness_indicator_thresholds'] = [ 0.1, 0.3, 0.5, 0.7, 0.9 ] except ImportError: absl.logging.warning( 'Not testing fairness indicators because a compatible TFMA version ' 'is not installed.') # Run executor. evaluator = executor.Executor() evaluator.Do(input_dict, output_dict, exec_properties) # Check evaluator outputs. self.assertTrue( # TODO(b/141490237): Update to only check eval_config.json after TFMA # released with corresponding change. tf.io.gfile.exists(os.path.join(eval_output.uri, 'eval_config')) or tf.io.gfile.exists( os.path.join(eval_output.uri, 'eval_config.json'))) self.assertTrue( tf.io.gfile.exists(os.path.join(eval_output.uri, 'metrics'))) self.assertTrue( tf.io.gfile.exists(os.path.join(eval_output.uri, 'plots')))