def slice_spec_from_stats( # pylint: disable=invalid-name statistics: statistics_pb2.DatasetFeatureStatisticsList, categorical_uniques_threshold: int = 100, max_cross_size: int = 2 ) -> List[slicer.SingleSliceSpec]: """Generates slicing spec from statistics. Args: statistics: Data statistics. categorical_uniques_threshold: Maximum number of unique values beyond which we don't slice on that categorical feature. max_cross_size: Maximum size feature crosses to consider. Returns: List of slice specs. """ columns_to_consider = [] for feature in statistics.datasets[0].features: if len(feature.path.step) != 1: continue stats_type = feature.WhichOneof('stats') if stats_type == 'string_stats': # TODO(pachristopher): Consider slicing on top-K values for features # with high cardinality. if 0 < feature.string_stats.unique <= categorical_uniques_threshold: columns_to_consider.append(feature) result = [] for i in range(1, max_cross_size+1): for cross in itertools.combinations(columns_to_consider, i): result.append(slicer.SingleSliceSpec( columns=[feature.path.step[0] for feature in cross])) result.append(slicer.SingleSliceSpec()) return result
def render_plot( result: view_types.EvalResult, slicing_spec: Optional[Union[slicer.SingleSliceSpec, config_pb2.SlicingSpec]] = None, output_name: Optional[Text] = None, class_id: Optional[int] = None, top_k: Optional[int] = None, k: Optional[int] = None, label: Optional[Text] = None, ) -> Optional[visualization.PlotViewer]: # pytype: disable=invalid-annotation """Renders the plot view as widget. Args: result: An tfma.EvalResult. slicing_spec: The tfma.SlicingSpec to identify the slice. Show overall if unset. output_name: A string representing the output name. class_id: A number representing the class id if multi class. top_k: The k used to compute prediction in the top k position. k: The k used to compute prediciton at the kth position. label: A partial label used to match a set of plots in the results. Returns: A PlotViewer object if in Jupyter notebook; None if in Colab. """ if slicing_spec and isinstance(slicing_spec, config_pb2.SlicingSpec): slicing_spec = slicer.SingleSliceSpec(spec=slicing_spec) slice_spec_to_use = slicing_spec if slicing_spec else slicer.SingleSliceSpec( ) data, cfg = util.get_plot_data_and_config(result.plots, slice_spec_to_use, output_name, class_id, top_k, k, label) return visualization.render_plot(data, cfg)
def slice_spec_from_stats( # pylint: disable=invalid-name statistics: statistics_pb2.DatasetFeatureStatisticsList, categorical_uniques_threshold: int = 100, max_cross_size: int = 2) -> List[slicer.SingleSliceSpec]: """Generates slicing spec from statistics. Args: statistics: Data statistics. categorical_uniques_threshold: Maximum number of unique values beyond which we don't slice on that categorical feature. max_cross_size: Maximum size feature crosses to consider. Returns: List of slice specs. """ slicable_column_names = [] for feature in _get_slicable_categorical_features( statistics, categorical_uniques_threshold): slicable_column_names.append(feature.path.step[0]) for feature in _get_slicable_numeric_features(statistics): # We would bucketize the feature based on the quantiles boundaries. slicable_column_names.append(TRANSFORMED_FEATURE_PREFIX + feature.path.step[0]) result = [] for i in range(1, max_cross_size + 1): for cross in itertools.combinations(slicable_column_names, i): result.append( slicer.SingleSliceSpec( columns=[feature_name for feature_name in cross])) result.append(slicer.SingleSliceSpec()) return result
def testSliceOnMetaFeature(self): # We want to make sure that slicing on the newly added feature works, so # pulling in slice here. with beam.Pipeline() as pipeline: fpls = create_fpls() metrics = ( pipeline | 'CreateTestInput' >> beam.Create(fpls) | 'WrapFpls' >> beam.Map(wrap_fpl) | 'ExtractInterestsNum' >> meta_feature_extractor.ExtractMetaFeature(get_num_interests) | 'ExtractSlices' >> slice_key_extractor._ExtractSliceKeys([ slicer.SingleSliceSpec(), slicer.SingleSliceSpec(columns=['num_interests']) ]) | 'FanoutSlices' >> slicer.FanoutSlices()) def check_result(got): try: self.assertEqual(4, len(got), 'got: %s' % got) expected_slice_keys = [ (), (), (('num_interests', 1), ), (('num_interests', 2), ), ] self.assertEqual(sorted(slice_key for slice_key, _ in got), sorted(expected_slice_keys)) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics, check_result)
def SliceKeyExtractor( slice_spec: Optional[List[slicer.SingleSliceSpec]] = None, eval_config: Optional[config.EvalConfig] = None, materialize: Optional[bool] = True) -> extractor.Extractor: """Creates an extractor for extracting slice keys. The incoming Extracts must contain features stored under tfma.FEATURES_KEY and optionally under tfma.TRANSFORMED_FEATURES. The extractor's PTransform yields a copy of the Extracts input with an additional extract pointing at the list of SliceKeyType values keyed by tfma.SLICE_KEY_TYPES_KEY. If materialize is True then a materialized version of the slice keys will be added under the key tfma.MATERIALZED_SLICE_KEYS_KEY. Args: slice_spec: Deprecated (use EvalConfig). eval_config: Optional EvalConfig containing slicing_specs specifying the slices to slice the data into. If slicing_specs are empty, defaults to overall slice. materialize: True to add MaterializedColumn entries for the slice keys. Returns: Extractor for slice keys. """ if slice_spec and eval_config: raise ValueError('slice_spec is deprecated, only use eval_config') if eval_config: slice_spec = [ slicer.SingleSliceSpec(spec=spec) for spec in eval_config.slicing_specs ] if not slice_spec: slice_spec = [slicer.SingleSliceSpec()] return extractor.Extractor( stage_name=SLICE_KEY_EXTRACTOR_STAGE_NAME, ptransform=ExtractSliceKeys(slice_spec, eval_config, materialize))
def testMaterializedSliceKeys(self): with beam.Pipeline() as pipeline: fpls = create_fpls() slice_keys_extracts = ( pipeline | 'CreateTestInput' >> beam.Create(fpls) | 'WrapFpls' >> beam.Map(wrap_fpl) | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys( [ slicer.SingleSliceSpec(), slicer.SingleSliceSpec(columns=['gender']) ], materialize=True)) def check_result(got): try: self.assertEqual(2, len(got), 'got: %s' % got) expected_results = sorted([ types.MaterializedColumn( name=constants.SLICE_KEYS_KEY, value=[b'Overall', b'gender:f']), types.MaterializedColumn( name=constants.SLICE_KEYS_KEY, value=[b'Overall', b'gender:m']) ]) got_results = [] for item in got: self.assertIn(constants.SLICE_KEYS_KEY, item) got_results.append(item[constants.SLICE_KEYS_KEY]) self.assertEqual(sorted(got_results), sorted(expected_results)) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(slice_keys_extracts, check_result)
def testLegacySliceKeys(self): with beam.Pipeline() as pipeline: fpls = create_fpls() slice_keys_extracts = ( pipeline | 'CreateTestInput' >> beam.Create(fpls) | 'WrapFpls' >> beam.Map(wrap_fpl) | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys([ slicer.SingleSliceSpec(), slicer.SingleSliceSpec(columns=['gender']) ])) def check_result(got): try: self.assertLen(got, 2) expected_results = sorted([[(), (('gender', 'f'),)], [(), (('gender', 'm'),)]]) got_results = [] for item in got: self.assertIn(constants.SLICE_KEY_TYPES_KEY, item) got_results.append(sorted(item[constants.SLICE_KEY_TYPES_KEY])) self.assertCountEqual(got_results, expected_results) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(slice_keys_extracts, check_result)
def test_slice_spec_from_stats_and_schema_int_categorical(self): stats = text_format.Parse( """ datasets { features: { path { step: 'feature1' } type: INT string_stats: { unique: 10 } } features: { path { step: 'feature2' } type: INT num_stats: { min: 1 max: 10 } } } """, statistics_pb2.DatasetFeatureStatisticsList()) transformed_feature2 = ( auto_slice_key_extractor.TRANSFORMED_FEATURE_PREFIX + 'feature2') expected_slice_spec = [ slicer.SingleSliceSpec(columns=['feature1']), slicer.SingleSliceSpec(columns=[transformed_feature2]), slicer.SingleSliceSpec(columns=['feature1', transformed_feature2]), slicer.SingleSliceSpec() ] actual_slice_spec = auto_slice_key_extractor.slice_spec_from_stats(stats) self.assertEqual(actual_slice_spec, expected_slice_spec)
def testSliceOneSlice(self): with beam.Pipeline() as pipeline: fpls = create_fpls() metrics = ( pipeline | 'CreateTestInput' >> beam.Create(fpls, reshuffle=False) | 'WrapFpls' >> beam.Map(wrap_fpl) | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys([ slicer.SingleSliceSpec(), slicer.SingleSliceSpec(columns=['gender']) ]) | 'FanoutSlices' >> slicer.FanoutSlices()) def check_result(got): try: self.assertLen(got, 4) expected_result = [ ((), wrap_fpl(fpls[0])), ((), wrap_fpl(fpls[1])), ((('gender', 'f'), ), wrap_fpl(fpls[0])), ((('gender', 'm'), ), wrap_fpl(fpls[1])), ] self.assertCountEqual(got, expected_result) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics, check_result)
def render_time_series( results: view_types.EvalResults, slicing_spec: Optional[Union[slicer.SingleSliceSpec, config_pb2.SlicingSpec]] = None, display_full_path: bool = False ) -> Optional[visualization.TimeSeriesViewer]: # pytype: disable=invalid-annotation """Renders the time series view as widget. Args: results: An tfma.EvalResults. slicing_spec: A tfma.SlicingSpec determining the slice to show time series on. Show overall if not set. display_full_path: Whether to display the full path to model / data in the visualization or just show file name. Returns: A TimeSeriesViewer object if in Jupyter notebook; None if in Colab. """ if slicing_spec and isinstance(slicing_spec, config_pb2.SlicingSpec): slicing_spec = slicer.SingleSliceSpec(spec=slicing_spec) slice_spec_to_use = slicing_spec if slicing_spec else slicer.SingleSliceSpec( ) data = util.get_time_series(results, slice_spec_to_use, display_full_path) cfg = { 'isModelCentric': results.get_mode() == constants.MODEL_CENTRIC_MODE } return visualization.render_time_series(data, cfg)
def test_slice_spec_from_stats_and_schema(self): stats = text_format.Parse( """ datasets { features: { path { step: 'feature1' } type: STRING string_stats: { unique: 10 } } features: { path { step: 'feature2' } type: STRING string_stats: { unique: 200 } } features: { path { step: 'feature3' } type: INT string_stats: { unique: 10 } } features: { path { step: 'feature4' } type: INT string_stats: { unique: 200 } } features: { path { step: 'feature5' } type: INT num_stats: { } } features: { path { step: 'feature6' } type: FLOAT num_stats: { } } } """, statistics_pb2.DatasetFeatureStatisticsList()) expected_slice_spec = [ slicer.SingleSliceSpec(columns=['feature1']), slicer.SingleSliceSpec(columns=['feature3']), slicer.SingleSliceSpec(columns=['feature1', 'feature3']), slicer.SingleSliceSpec() ] actual_slice_spec = auto_slice_key_extractor.slice_spec_from_stats( stats) self.assertEqual(actual_slice_spec, expected_slice_spec)
def testBuildAnalysisTableWithSlices(self): model_location = self._exportEvalSavedModel( linear_classifier.simple_linear_classifier) eval_shared_model = model_eval_lib.default_eval_shared_model( eval_saved_model_path=model_location) example1 = self._makeExample(age=3.0, language='english', label=1.0, slice_key='first_slice') slice_spec = [ slicer.SingleSliceSpec(columns=['age']), slicer.SingleSliceSpec(features=[('age', 3)]), slicer.SingleSliceSpec(columns=['age'], features=[('language', 'english')]) ] with beam.Pipeline() as pipeline: result = ( pipeline | 'CreateInput' >> beam.Create([example1.SerializeToString()]) | 'BuildTable' >> contrib.BuildAnalysisTable( eval_shared_model, slice_spec)) def check_result(got): self.assertEqual(1, len(got), 'got: %s' % got) extracts = got[0] # Values of type MaterializedColumn are emitted to signal to # downstream sink components to output the data to file. materialized_dict = dict( (k, v) for k, v in extracts.items() if isinstance(v, types.MaterializedColumn)) self._assertMaterializedColumns( materialized_dict, { constants.SLICE_KEYS_KEY: types.MaterializedColumn( name=constants.SLICE_KEYS_KEY, value=[ b'age:3.0', b'age:3', b'age_X_language:3.0_X_english' ]) }) self._assertMaterializedColumnsExist(materialized_dict, [ 'predictions__logits', 'predictions__probabilities', 'predictions__classes', 'predictions__logistic', 'predictions__class_ids' ]) util.assert_that(result[constants.ANALYSIS_KEY], check_result)
def testSerializeDeserializeLegacyEvalConfig(self): output_path = self._getTempDir() old_config = LegacyConfig( model_location='/path/to/model', data_location='/path/to/data', slice_spec=[ slicer.SingleSliceSpec(columns=['country'], features=[('age', 5), ('gender', 'f')]), slicer.SingleSliceSpec(columns=['interest'], features=[('age', 6), ('gender', 'm')]) ], example_count_metric_key=None, example_weight_metric_key='key', compute_confidence_intervals=False, k_anonymization_count=1) final_dict = {} final_dict['tfma_version'] = tfma_version.VERSION_STRING final_dict['eval_config'] = old_config with tf.io.TFRecordWriter(os.path.join(output_path, 'eval_config')) as w: w.write(pickle.dumps(final_dict)) got_eval_config = model_eval_lib.load_eval_config(output_path) options = config.Options() options.compute_confidence_intervals.value = ( old_config.compute_confidence_intervals) options.k_anonymization_count.value = old_config.k_anonymization_count eval_config = config.EvalConfig( input_data_specs=[ config.InputDataSpec(location=old_config.data_location) ], model_specs=[config.ModelSpec(location=old_config.model_location)], output_data_specs=[ config.OutputDataSpec(default_location=output_path) ], slicing_specs=[ config.SlicingSpec(feature_keys=['country'], feature_values={ 'age': '5', 'gender': 'f' }), config.SlicingSpec(feature_keys=['interest'], feature_values={ 'age': '6', 'gender': 'm' }) ], options=options) self.assertEqual(eval_config, got_eval_config)
def slice_spec_from_stats( # pylint: disable=invalid-name statistics: statistics_pb2.DatasetFeatureStatisticsList, categorical_uniques_threshold: int = 100, max_cross_size: int = 2, allowlist_features: Optional[Set[Text]] = None, denylist_features: Optional[Set[Text]] = None) -> List[ slicer.SingleSliceSpec]: """Generates slicing spec from statistics. Args: statistics: Data statistics. categorical_uniques_threshold: Maximum number of unique values beyond which we don't slice on that categorical feature. max_cross_size: Maximum size feature crosses to consider. allowlist_features: Set of features to be used for slicing. denylist_features: Set of features to ignore for slicing. Returns: List of slice specs. """ features_to_consider = [] for feature in statistics.datasets[0].features: # TODO(pachristopher): Consider structured features once TFMA supports # slicing on structured features. if (len(feature.path.step) != 1 or (allowlist_features and feature.path.step[0] not in allowlist_features) or (denylist_features and feature.path.step[0] in denylist_features)): continue features_to_consider.append(feature) slicable_column_names = [] for feature in _get_slicable_categorical_features( features_to_consider, categorical_uniques_threshold): slicable_column_names.append(feature.path.step[0]) for feature in _get_slicable_numeric_features(features_to_consider): # We would bucketize the feature based on the quantiles boundaries. slicable_column_names.append(TRANSFORMED_FEATURE_PREFIX + feature.path.step[0]) result = [] for i in range(1, max_cross_size + 1): for cross in itertools.combinations(slicable_column_names, i): result.append( slicer.SingleSliceSpec( columns=[feature_name for feature_name in cross])) result.append(slicer.SingleSliceSpec()) return result
def render_slicing_metrics( result: view_types.EvalResult, slicing_column: Optional[Text] = None, slicing_spec: Optional[Union[slicer.SingleSliceSpec, config_pb2.SlicingSpec]] = None, weighted_example_column: Optional[Text] = None, event_handlers: Optional[Callable[[Dict[Text, Union[Text, float]]], None]] = None, ) -> Optional[visualization.SlicingMetricsViewer]: # pytype: disable=invalid-annotation """Renders the slicing metrics view as widget. Args: result: An tfma.EvalResult. slicing_column: The column to slice on. slicing_spec: The tfma.SlicingSpec to filter results. If neither column nor spec is set, show overall. weighted_example_column: Override for the weighted example column. This can be used when different weights are applied in different aprts of the model (eg: multi-head). event_handlers: The event handlers Returns: A SlicingMetricsViewer object if in Jupyter notebook; None if in Colab. """ if slicing_spec and isinstance(slicing_spec, config_pb2.SlicingSpec): slicing_spec = slicer.SingleSliceSpec(spec=slicing_spec) data = util.get_slicing_metrics(result.slicing_metrics, slicing_column, slicing_spec) cfg = util.get_slicing_config(result.config, weighted_example_column) return visualization.render_slicing_metrics(data, cfg, event_handlers=event_handlers)
def render_slicing_attributions( result: view_types.EvalResult, slicing_column: Optional[Text] = None, slicing_spec: Optional[Union[slicer.SingleSliceSpec, config_pb2.SlicingSpec]] = None, metric_name: Optional[Text] = None, weighted_example_column: Optional[Text] = None, event_handlers: Optional[Callable[[Dict[Text, Union[Text, float]]], None]] = None, ) -> Optional[visualization.SlicingMetricsViewer]: # pytype: disable=invalid-annotation """Renders the slicing metrics view as widget. Args: result: An tfma.EvalResult. slicing_column: The column to slice on. slicing_spec: The tfma.SlicingSpec to filter results. If neither column nor spec is set, show overall. metric_name: Name of attributions metric to show attributions for. Optional if only one metric used. weighted_example_column: Override for the weighted example column. This can be used when different weights are applied in different aprts of the model (eg: multi-head). event_handlers: The event handlers Returns: A SlicingMetricsViewer object if in Jupyter notebook; None if in Colab. """ if slicing_spec and isinstance(slicing_spec, config_pb2.SlicingSpec): slicing_spec = slicer.SingleSliceSpec(spec=slicing_spec) data = util.get_slicing_metrics(result.attributions, slicing_column, slicing_spec) # Attributions have one additional level of indirection for the metric_name. # Filter this out using the metric_name provided. for d in data: updated_data = {} for output_name, per_output_items in d['metrics'].items(): # pytype: disable=attribute-error updated_data[output_name] = {} for sub_key, per_sub_key_items in per_output_items.items(): updated_data[output_name][sub_key] = {} if metric_name: if metric_name not in per_sub_key_items: raise ValueError( 'metric_name={} not found in {}'.format( metric_name, per_sub_key_items.keys())) updated_data[output_name][sub_key] = per_sub_key_items[ metric_name] elif len(per_sub_key_items) == 1: updated_data[output_name][sub_key] = list( per_sub_key_items.values())[0] else: raise ValueError( 'metric_name must be one of the following: {}'.format( per_sub_key_items.keys())) d['metrics'] = updated_data cfg = util.get_slicing_config(result.config, weighted_example_column) return visualization.render_slicing_metrics(data, cfg, event_handlers=event_handlers)
def assertSliceResult(self, name, features_dict, columns, features, expected): spec = slicer.SingleSliceSpec(columns=columns, features=features) msg = 'Test case %s: slice on columns %s, features %s' % (name, columns, features) six.assertCountEqual( self, expected, slicer.get_slices_for_features_dict(features_dict, [spec]), msg)
def SliceKeyExtractor(slice_spec: Optional[List[slicer.SingleSliceSpec]] = None, materialize: Optional[bool] = True ) -> extractor.Extractor: """Creates an extractor for extracting slice keys. The incoming Extracts must contain a FeaturesPredictionsLabels extract keyed by tfma.FEATURES_PREDICTIONS_LABELS_KEY. Typically this will be obtained by calling the PredictExtractor. The extractor's PTransform yields a copy of the Extracts input with an additional extract pointing at the list of SliceKeyType values keyed by tfma.SLICE_KEY_TYPES_KEY. If materialize is True then a materialized version of the slice keys will be added under the key tfma.MATERIALZED_SLICE_KEYS_KEY. Args: slice_spec: Optional list of SingleSliceSpec specifying the slices to slice the data into. If None, defaults to the overall slice. materialize: True to add MaterializedColumn entries for the slice keys. Returns: Extractor for slice keys. """ if not slice_spec: slice_spec = [slicer.SingleSliceSpec()] return extractor.Extractor( stage_name=SLICE_KEY_EXTRACTOR_STAGE_NAME, ptransform=ExtractSliceKeys(slice_spec, materialize))
def testConvertEvalResultToUIInputWithNoDataFound(self): eval_result = self._makeEvalResult(slices=((('slice', '1'), ), )) with self.assertRaises(ValueError): widget_view.convert_slicing_metrics_to_ui_input( eval_result.slicing_metrics, slicing_spec=slicer.SingleSliceSpec(columns=['unknown']), )
def _check_threshold(key: metric_types.MetricKey, slicing_spec: Optional[config.SlicingSpec], threshold: _ThresholdType, metric: Any) -> bool: """Verify a metric given its metric key and metric value.""" if (slicing_spec is not None and not slicer.SingleSliceSpec( spec=slicing_spec).is_slice_applicable(sliced_key)): return True if isinstance(threshold, config.GenericValueThreshold): lower_bound, upper_bound = -np.inf, np.inf if threshold.HasField('lower_bound'): lower_bound = threshold.lower_bound.value if threshold.HasField('upper_bound'): upper_bound = threshold.upper_bound.value return metric > lower_bound and metric < upper_bound elif isinstance(threshold, config.GenericChangeThreshold): diff = metric ratio = diff / metrics[key.make_baseline_key(baseline_model_name)] if threshold.direction == config.MetricDirection.LOWER_IS_BETTER: absolute, relative = np.inf, np.inf elif threshold.direction == config.MetricDirection.HIGHER_IS_BETTER: absolute, relative = -np.inf, -np.inf else: raise ValueError('"UNKNOWN" direction for change threshold.') if threshold.HasField('absolute'): absolute = threshold.absolute.value if threshold.HasField('relative'): relative = threshold.relative.value if threshold.direction == config.MetricDirection.LOWER_IS_BETTER: return diff < absolute and ratio < relative elif threshold.direction == config.MetricDirection.HIGHER_IS_BETTER: return diff > absolute and ratio > relative
def testSliceKeys(self, model_names, extracts, slice_column, expected_slices): eval_config = config.EvalConfig( model_specs=[config.ModelSpec(name=name) for name in model_names]) with beam.Pipeline() as pipeline: slice_keys_extracts = ( pipeline | 'CreateTestInput' >> beam.Create(extracts) | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys( [slicer.SingleSliceSpec(columns=[slice_column])], eval_config=eval_config)) def check_result(got): try: self.assertLen(got, 2) got_results = [] for item in got: self.assertIn(constants.SLICE_KEY_TYPES_KEY, item) got_results.append( sorted(item[constants.SLICE_KEY_TYPES_KEY])) self.assertCountEqual(got_results, expected_slices) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(slice_keys_extracts, check_result)
def testValidateMetricsMetricTDistributionValueAndThreshold( self, slicing_specs, slice_key): threshold = config.MetricThreshold( value_threshold=config.GenericValueThreshold( lower_bound={'value': 0.9})) eval_config = config.EvalConfig( model_specs=[ config.ModelSpec(), ], slicing_specs=slicing_specs, metrics_specs=[ config.MetricsSpec(metrics=[ config.MetricConfig( class_name='AUC', threshold=threshold if slicing_specs is None else None, per_slice_thresholds=[ config.PerSliceMetricThreshold( slicing_specs=slicing_specs, threshold=threshold) ]), ], model_names=['']), ], ) sliced_metrics = (slice_key, { metric_types.MetricKey(name='auc'): types.ValueWithTDistribution(sample_mean=0.91, unsampled_value=0.8) }) result = metrics_validator.validate_metrics(sliced_metrics, eval_config) self.assertFalse(result.validation_ok) expected = text_format.Parse( """ metric_validations_per_slice { failures { metric_key { name: "auc" } metric_value { double_value { value: 0.8 } } } }""", validation_result_pb2.ValidationResult()) expected.metric_validations_per_slice[0].failures[ 0].metric_threshold.CopyFrom(threshold) expected.metric_validations_per_slice[0].slice_key.CopyFrom( slicer.serialize_slice_key(slice_key)) for spec in slicing_specs or [None]: if (spec is None or slicer.SingleSliceSpec( spec=spec).is_slice_applicable(slice_key)): slicing_details = expected.validation_details.slicing_details.add( ) if spec is not None: slicing_details.slicing_spec.CopyFrom(spec) else: slicing_details.slicing_spec.CopyFrom(config.SlicingSpec()) slicing_details.num_matching_slices = 1 self.assertEqual(result, expected)
def testSliceDefaultSlice(self): with beam.Pipeline() as pipeline: fpls = create_fpls() metrics = (pipeline | 'CreateTestInput' >> beam.Create(fpls) | 'WrapFpls' >> beam.Map(wrap_fpl) | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys( [slicer.SingleSliceSpec()]) | 'FanoutSlices' >> slicer.FanoutSlices()) def check_result(got): try: self.assertLen(got, 2) expected_result = [ ((), wrap_fpl(fpls[0])), ((), wrap_fpl(fpls[1])), ] self.assertEqual(len(got), len(expected_result)) self.assertTrue(got[0] == expected_result[0] and got[1] == expected_result[1] or got[1] == expected_result[0] and got[0] == expected_result[1]) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics, check_result)
def testIsSliceApplicable(self): test_cases = [ ('applicable', ['column1'], [('column3', 'value3'), ('column4', 'value4')], (('column1', 'value1'), ('column3', 'value3'), ('column4', 'value4')), True), ('wrongcolumns', ['column1', 'column2'], [('column3', 'value3'), ('column4', 'value4')], (('column1', 'value1'), ('column3', 'value3'), ('column4', 'value4')), False), ('wrongfeatures', ['column1'], [('column3', 'value3')], (('column1', 'value1'), ('column3', 'value3'), ('column4', 'value4')), False), ('nocolumns', [], [('column3', 'value3')], (('column1', 'value1'), ('column3', 'value3'), ('column4', 'value4')), False), ('nofeatures', ['column1'], [], (('column1', 'value1'), ), True), ('empty slice key', ['column1'], [('column2', 'value1')], (), False), ('overall', [], [], (), True) ] # pyformat: disable for (name, columns, features, slice_key, result) in test_cases: slice_spec = slicer.SingleSliceSpec(columns=columns, features=features) self.assertEqual(slice_spec.is_slice_applicable(slice_key), result, msg=name)
def get_slicing_metrics( results: List[Tuple[slicer.SliceKeyType, Dict[Text, Any]]], slicing_column: Optional[Text] = None, slicing_spec: Optional[slicer.SingleSliceSpec] = None, ) -> List[Dict[Text, Union[Dict[Text, Any], Text]]]: """Util function that extracts slicing metrics from the results. If neither slicing_column nor slicing_spec is provided, get Overall. If slicing_column is set, use it to filter metrics from results. Otherwise, use slicing_spec for filtering. Args: results: A list of records. Each record is a tuple of (slice_name, {metric_name, metric_value}). slicing_column: The column to filter the resuslts with. slicing_spec: The slicer.SingleSliceSpec to filter the resutls with. Returns: A list of {slice, metrics} Raises: ValueError: The provided slicing_column does not exist in results or more than one set of overall result is found. """ if slicing_column: data = find_all_slices( results, slicer.SingleSliceSpec(columns=[slicing_column])) elif not slicing_spec: data = find_all_slices(results, slicer.SingleSliceSpec()) else: data = find_all_slices(results, slicing_spec) slice_count = len(data) if not slice_count: if not slicing_spec: if not slicing_column: slicing_column = slicer.OVERALL_SLICE_NAME raise ValueError('No slices found for %s' % slicing_column) else: raise ValueError('No slices found for %s' % slicing_spec) elif not slicing_column and not slicing_spec and slice_count > 1: raise ValueError('More than one slice found for %s' % slicer.OVERALL_SLICE_NAME) else: return data
def convert_slicing_metrics_to_ui_input( slicing_metrics: List[Tuple[slicer.SliceKeyOrCrossSliceKeyType, view_types.MetricsByOutputName]], slicing_column: Optional[str] = None, slicing_spec: Optional[slicer.SingleSliceSpec] = None, output_name: str = '', multi_class_key: str = '') -> Optional[List[Dict[str, Any]]]: """Renders the Fairness Indicator view. Args: slicing_metrics: tfma.EvalResult.slicing_metrics. slicing_column: The slicing column to to filter results. If both slicing_column and slicing_spec are None, show all eval results. slicing_spec: The slicing spec to filter results. If both slicing_column and slicing_spec are None, show all eval results. output_name: The output name associated with metric (for multi-output models). multi_class_key: The multi-class key associated with metric (for multi-class models). Returns: A list of dicts for each slice, where each dict contains keys 'sliceValue', 'slice', and 'metrics'. Raises: ValueError if no related eval result found or both slicing_column and slicing_spec are not None. """ if slicing_column and slicing_spec: raise ValueError( 'Only one of the "slicing_column" and "slicing_spec" parameters ' 'can be set.') if slicing_column: slicing_spec = slicer.SingleSliceSpec(columns=[slicing_column]) data = [] for (slice_key, metric_value) in slicing_metrics: if (metric_value is not None and output_name in metric_value and multi_class_key in metric_value[output_name]): metrics = metric_value[output_name][multi_class_key] # To add evaluation data for cross slice comparison. if slicer.is_cross_slice_key(slice_key): _add_cross_slice_key_data(slice_key, metrics, data) # To add evaluation data for regular slices. elif (slicing_spec is None or not slice_key or slicing_spec.is_slice_applicable(slice_key)): data.append({ 'sliceValue': stringify_slice_key_value(slice_key), 'slice': slicer.stringify_slice_key(slice_key), 'metrics': metrics }) if not data: raise ValueError( 'No eval result found for output_name:"%s" and ' 'multi_class_key:"%s" and slicing_column:"%s" and slicing_spec:"%s".' % (output_name, multi_class_key, slicing_column, slicing_spec)) return data
def testNonUTF8ValueRaisesValueError(self): column_name = 'column_name' invalid_value = b'\x8a' spec = slicer.SingleSliceSpec(columns=[column_name]) features_dict = self._makeFeaturesDict({ column_name: [invalid_value], }) with self.assertRaisesRegex(ValueError, column_name): list(slicer.get_slices_for_features_dicts([features_dict], None, [spec]))
def testGetSlicesForFeaturesDictMultipleSingleSliceSpecs(self): features_dict = self._makeFeaturesDict({ 'gender': ['f'], 'age': [5], 'interest': ['cars'] }) spec_overall = slicer.SingleSliceSpec() spec_age = slicer.SingleSliceSpec(columns=['age']) spec_age4 = slicer.SingleSliceSpec(features=[('age', 4)]) spec_age5_gender = slicer.SingleSliceSpec(columns=['gender'], features=[('age', 5)]) slice_spec = [spec_overall, spec_age, spec_age4, spec_age5_gender] expected = [(), (('age', 5), ), (('age', 5), ('gender', 'f'))] self.assertItemsEqual( expected, slicer.get_slices_for_features_dict(features_dict, slice_spec))
def convert_eval_result_to_ui_input( eval_result: model_eval_lib.EvalResult, slicing_column: Optional[Text] = None, slicing_spec: Optional[slicer.SingleSliceSpec] = None, output_name: Text = '', multi_class_key: Text = '') -> Optional[List[Dict[Text, Any]]]: """Renders the Fairness Indicator view. Args: eval_result: An tfma.EvalResult. slicing_column: The slicing column to to filter results. If both slicing_column and slicing_spec are None, show all eval results. slicing_spec: The slicing spec to filter results. If both slicing_column and slicing_spec are None, show all eval results. output_name: The output name associated with metric (for multi-output models). multi_class_key: The multi-class key associated with metric (for multi-class models). Returns: A FairnessIndicatorViewer object if in Jupyter notebook; None if in Colab. Raises: ValueError if no related eval result found or both slicing_column and slicing_spec are not None. """ if slicing_column and slicing_spec: raise ValueError( 'Only one of the "slicing_column" and "slicing_spec" parameters ' 'can be set.') if slicing_column: slicing_spec = slicer.SingleSliceSpec(columns=[slicing_column]) data = [] for (slice_key, metric_value) in eval_result.slicing_metrics: slice_key_ok = ( slicing_spec is None or not slice_key or slicing_spec.is_slice_applicable(slice_key)) metric_ok = ( output_name in metric_value and multi_class_key in metric_value[output_name]) if slice_key_ok and metric_ok: data.append({ 'sliceValue': stringify_slice_key_value(slice_key), 'slice': slicer.stringify_slice_key(slice_key), 'metrics': metric_value[output_name][multi_class_key] }) if not data: raise ValueError( 'No eval result found for output_name:"%s" and ' 'multi_class_key:"%s" and slicing_column:"%s" and slicing_spec:"%s".' % (output_name, multi_class_key, slicing_column, slicing_spec)) return data
def is_slice_applicable( sliced_combiner_output: Tuple[slicer.SliceKeyType, metric_types.MetricsDict], slicing_specs: Union[config.SlicingSpec, Iterable[config.SlicingSpec]] ) -> bool: slice_key, _ = sliced_combiner_output for slicing_spec in slicing_specs: if slicer.SingleSliceSpec( spec=slicing_spec).is_slice_applicable(slice_key): return True return False