def to_proto(self) -> metrics_for_slice_pb2.MetricValue: """Converts matrices into ConfusionMatrixAtThresholds proto. If precision or recall are undefined then 1.0 and 0.0 will be used. Returns: A MetricValue proto containing a ConfusionMatrixAtThresholds proto. """ result = metrics_for_slice_pb2.MetricValue() confusion_matrix_at_thresholds_proto = result.confusion_matrix_at_thresholds for i, threshold in enumerate(self.thresholds): precision = 1.0 if self.tp[i] + self.fp[i] > 0: precision = self.tp[i] / (self.tp[i] + self.fp[i]) recall = 0.0 if self.tp[i] + self.fn[i] > 0: recall = self.tp[i] / (self.tp[i] + self.fn[i]) confusion_matrix_at_thresholds_proto.matrices.add( threshold=round(threshold, 6), true_positives=self.tp[i], false_positives=self.fp[i], true_negatives=self.tn[i], false_negatives=self.fn[i], precision=precision, recall=recall) return result
def convert_slice_metrics( slice_key: slicer.SliceKeyType, slice_metrics: Dict[Any, Any], add_metrics_callbacks: List[types.AddMetricsCallbackType], metrics_for_slice: metrics_for_slice_pb2.MetricsForSlice) -> None: """Converts slice_metrics into the given metrics_for_slice proto.""" slice_metrics_copy = slice_metrics.copy() # Prevent further references to this, so we don't accidentally mutate it. del slice_metrics # Convert the metrics from add_metrics_callbacks to the structured output if # defined. if add_metrics_callbacks and (not any( isinstance(k, metric_types.MetricKey) for k in slice_metrics_copy.keys())): for add_metrics_callback in add_metrics_callbacks: if hasattr(add_metrics_callback, 'populate_stats_and_pop'): add_metrics_callback.populate_stats_and_pop( slice_key, slice_metrics_copy, metrics_for_slice.metrics) for key in sorted(slice_metrics_copy.keys()): value = slice_metrics_copy[key] metric_value = metrics_for_slice_pb2.MetricValue() if isinstance(value, metrics_for_slice_pb2.ConfusionMatrixAtThresholds): metric_value.confusion_matrix_at_thresholds.CopyFrom(value) elif isinstance(value, types.ValueWithTDistribution): # Convert to a bounded value. 95% confidence level is computed here. # Will populate t distribution value instead after migration. sample_mean, lower_bound, upper_bound = ( math_util.calculate_confidence_interval(value)) metric_value.bounded_value.value.value = sample_mean metric_value.bounded_value.lower_bound.value = lower_bound metric_value.bounded_value.upper_bound.value = upper_bound metric_value.bounded_value.methodology = ( metrics_for_slice_pb2.BoundedValue.POISSON_BOOTSTRAP) elif isinstance(value, (six.binary_type, six.text_type)): # Convert textual types to string metrics. metric_value.bytes_value = value elif isinstance(value, np.ndarray): # Convert NumPy arrays to ArrayValue. metric_value.array_value.CopyFrom(_convert_to_array_value(value)) else: # We try to convert to float values. try: metric_value.double_value.value = float(value) except (TypeError, ValueError) as e: metric_value.unknown_type.value = str(value) metric_value.unknown_type.error = e.message # pytype: disable=attribute-error if isinstance(key, metric_types.MetricKey): key_and_value = metrics_for_slice.metric_keys_and_values.add() key_and_value.key.CopyFrom(key.to_proto()) key_and_value.value.CopyFrom(metric_value) else: metrics_for_slice.metrics[key].CopyFrom(metric_value)
def convert_slice_attributions_to_proto( attributions: Tuple[slicer.SliceKeyOrCrossSliceKeyType, Dict[Any, Dict[Text, Any]]] ) -> metrics_for_slice_pb2.AttributionsForSlice: """Converts the given slice attributions into serialized AtributionsForSlice. Args: attributions: The slice attributions. Returns: The AttributionsForSlice proto. Raises: TypeError: If the type of the feature value in slice key cannot be recognized. """ result = metrics_for_slice_pb2.AttributionsForSlice() slice_key, slice_attributions = attributions if slicer.is_cross_slice_key(slice_key): result.cross_slice_key.CopyFrom( slicer.serialize_cross_slice_key(slice_key)) else: result.slice_key.CopyFrom(slicer.serialize_slice_key(slice_key)) slice_attributions = slice_attributions.copy() for key in sorted(slice_attributions.keys()): key_and_value = result.attributions_keys_and_values.add() key_and_value.key.CopyFrom(key.to_proto()) for feature, value in slice_attributions[key].items(): attribution_value = metrics_for_slice_pb2.MetricValue() if isinstance(value, six.binary_type): # Convert textual types to string metrics. attribution_value.bytes_value = value elif isinstance(value, six.text_type): # Convert textual types to string metrics. attribution_value.bytes_value = value.encode('utf8') elif isinstance(value, np.ndarray) and value.size != 1: # Convert NumPy arrays to ArrayValue. attribution_value.array_value.CopyFrom( _convert_to_array_value(value)) else: # We try to convert to float values. try: attribution_value.double_value.value = float(value) except (TypeError, ValueError) as e: attribution_value.unknown_type.value = str(value) attribution_value.unknown_type.error = e.message # pytype: disable=attribute-error key_and_value.values[feature].CopyFrom(attribution_value) return result
def to_proto(self) -> metrics_for_slice_pb2.MetricValue: result = metrics_for_slice_pb2.MetricValue() multi_class_confusion_matrices_at_thresholds_proto = ( result.multi_class_confusion_matrix_at_thresholds) for threshold in sorted(self.keys()): # Convert -epsilon and 1.0+epsilon back to 0.0 and 1.0. if threshold == -_EPSILON: t = 0.0 elif threshold == 1.0 + _EPSILON: t = 1.0 else: t = threshold matrix = multi_class_confusion_matrices_at_thresholds_proto.matrices.add( threshold=t) for k in sorted(self[threshold].keys()): matrix.entries.add(actual_class_id=k.actual_class_id, predicted_class_id=k.predicted_class_id, num_weighted_examples=self[threshold][k]) return result
def convert_metric_value_to_proto( value: types.MetricValueType) -> metrics_for_slice_pb2.MetricValue: """Converts a MetricValueType into its proto format.""" if isinstance(value, types.StructuredMetricValue): return value.to_proto() result = metrics_for_slice_pb2.MetricValue() if isinstance(value, six.binary_type): # Convert textual types to string metrics. result.bytes_value = value elif isinstance(value, six.text_type): # Convert textual types to string metrics. result.bytes_value = value.encode('utf8') elif isinstance(value, np.ndarray): # Convert NumPy arrays to ArrayValue. result.array_value.CopyFrom(_convert_to_array_value(value)) else: # We try to convert to float values. try: result.double_value.value = float(value) except (TypeError, ValueError) as e: result.unknown_type.value = str(value) result.unknown_type.error = e.message # pytype: disable=attribute-error return result
def convert_slice_metrics_to_proto( metrics: Tuple[slicer.SliceKeyType, Dict[Any, Any]], add_metrics_callbacks: List[types.AddMetricsCallbackType] ) -> metrics_for_slice_pb2.MetricsForSlice: """Converts the given slice metrics into serialized proto MetricsForSlice. Args: metrics: The slice metrics. add_metrics_callbacks: A list of metric callbacks. This should be the same list as the one passed to tfma.Evaluate(). Returns: The MetricsForSlice proto. Raises: TypeError: If the type of the feature value in slice key cannot be recognized. """ result = metrics_for_slice_pb2.MetricsForSlice() slice_key, slice_metrics = metrics result.slice_key.CopyFrom(slicer.serialize_slice_key(slice_key)) slice_metrics = slice_metrics.copy() if metric_keys.ERROR_METRIC in slice_metrics: logging.warning('Error for slice: %s with error message: %s ', slice_key, slice_metrics[metric_keys.ERROR_METRIC]) result.metrics[metric_keys.ERROR_METRIC].debug_message = slice_metrics[ metric_keys.ERROR_METRIC] return result # Convert the metrics from add_metrics_callbacks to the structured output if # defined. if add_metrics_callbacks and (not any( isinstance(k, metric_types.MetricKey) for k in slice_metrics.keys())): for add_metrics_callback in add_metrics_callbacks: if hasattr(add_metrics_callback, 'populate_stats_and_pop'): add_metrics_callback.populate_stats_and_pop( slice_key, slice_metrics, result.metrics) for key in sorted(slice_metrics.keys()): value = slice_metrics[key] metric_value = metrics_for_slice_pb2.MetricValue() if isinstance(value, metrics_for_slice_pb2.ConfusionMatrixAtThresholds): metric_value.confusion_matrix_at_thresholds.CopyFrom(value) elif isinstance( value, metrics_for_slice_pb2.MultiClassConfusionMatrixAtThresholds): metric_value.multi_class_confusion_matrix_at_thresholds.CopyFrom( value) elif isinstance(value, types.ValueWithTDistribution): # Currently we populate both bounded_value and confidence_interval. # Avoid populating bounded_value once the UI handles confidence_interval. # Convert to a bounded value. 95% confidence level is computed here. _, lower_bound, upper_bound = ( math_util.calculate_confidence_interval(value)) metric_value.bounded_value.value.value = value.unsampled_value metric_value.bounded_value.lower_bound.value = lower_bound metric_value.bounded_value.upper_bound.value = upper_bound metric_value.bounded_value.methodology = ( metrics_for_slice_pb2.BoundedValue.POISSON_BOOTSTRAP) # Populate confidence_interval metric_value.confidence_interval.lower_bound.value = lower_bound metric_value.confidence_interval.upper_bound.value = upper_bound t_dist_value = metrics_for_slice_pb2.TDistributionValue() t_dist_value.sample_mean.value = value.sample_mean t_dist_value.sample_standard_deviation.value = ( value.sample_standard_deviation) t_dist_value.sample_degrees_of_freedom.value = ( value.sample_degrees_of_freedom) # Once the UI handles confidence interval, we will avoid setting this and # instead use the double_value. t_dist_value.unsampled_value.value = value.unsampled_value metric_value.confidence_interval.t_distribution_value.CopyFrom( t_dist_value) elif isinstance(value, six.binary_type): # Convert textual types to string metrics. metric_value.bytes_value = value elif isinstance(value, six.text_type): # Convert textual types to string metrics. metric_value.bytes_value = value.encode('utf8') elif isinstance(value, np.ndarray): # Convert NumPy arrays to ArrayValue. metric_value.array_value.CopyFrom(_convert_to_array_value(value)) else: # We try to convert to float values. try: metric_value.double_value.value = float(value) except (TypeError, ValueError) as e: metric_value.unknown_type.value = str(value) metric_value.unknown_type.error = e.message # pytype: disable=attribute-error if isinstance(key, metric_types.MetricKey): key_and_value = result.metric_keys_and_values.add() key_and_value.key.CopyFrom(key.to_proto()) key_and_value.value.CopyFrom(metric_value) else: result.metrics[key].CopyFrom(metric_value) return result
def testLoadMetricsAsDataframe_DoubleValueOnly(self): metrics_for_slice = text_format.Parse( """ slice_key { single_slice_keys { column: "age" float_value: 38.0 } single_slice_keys { column: "sex" bytes_value: "Female" } } metric_keys_and_values { key { name: "mean_absolute_error" example_weighted { } } value { double_value { value: 0.1 } } } metric_keys_and_values { key { name: "mean_squared_logarithmic_error" example_weighted { } } value { double_value { value: 0.02 } } } """, metrics_for_slice_pb2.MetricsForSlice()) path = os.path.join(absltest.get_default_test_tmpdir(), 'metrics.tfrecord') with tf.io.TFRecordWriter(path) as writer: writer.write(metrics_for_slice.SerializeToString()) df = experimental.load_metrics_as_dataframe(path) expected = pd.DataFrame({ 'slice': ['age = 38.0; sex = b\'Female\'', 'age = 38.0; sex = b\'Female\''], 'name': ['mean_absolute_error', 'mean_squared_logarithmic_error'], 'model_name': ['', ''], 'output_name': ['', ''], 'example_weighted': [False, False], 'is_diff': [False, False], 'display_value': [str(0.1), str(0.02)], 'metric_value': [ metrics_for_slice_pb2.MetricValue(double_value={'value': 0.1}), metrics_for_slice_pb2.MetricValue(double_value={'value': 0.02}) ], }) pd.testing.assert_frame_equal(expected, df) # Include empty column. df = experimental.load_metrics_as_dataframe(path, include_empty_columns=True) expected = pd.DataFrame({ 'slice': ['age = 38.0; sex = b\'Female\'', 'age = 38.0; sex = b\'Female\''], 'name': ['mean_absolute_error', 'mean_squared_logarithmic_error'], 'model_name': ['', ''], 'output_name': ['', ''], 'sub_key': [None, None], 'aggregation_type': [None, None], 'example_weighted': [False, False], 'is_diff': [False, False], 'display_value': [str(0.1), str(0.02)], 'metric_value': [ metrics_for_slice_pb2.MetricValue(double_value={'value': 0.1}), metrics_for_slice_pb2.MetricValue(double_value={'value': 0.02}) ], 'confidence_interval': [None, None], }) pd.testing.assert_frame_equal(expected, df)
def convert_slice_metrics_to_proto( metrics: Tuple[slicer.SliceKeyOrCrossSliceKeyType, metric_types.MetricsDict], add_metrics_callbacks: Optional[List[types.AddMetricsCallbackType]] ) -> metrics_for_slice_pb2.MetricsForSlice: """Converts the given slice metrics into serialized proto MetricsForSlice. Args: metrics: The slice metrics. add_metrics_callbacks: A list of metric callbacks. This should be the same list as the one passed to tfma.Evaluate(). Returns: The MetricsForSlice proto. Raises: TypeError: If the type of the feature value in slice key cannot be recognized. """ result = metrics_for_slice_pb2.MetricsForSlice() slice_key, slice_metrics = metrics if slicer.is_cross_slice_key(slice_key): result.cross_slice_key.CopyFrom(slicer.serialize_cross_slice_key(slice_key)) else: result.slice_key.CopyFrom(slicer.serialize_slice_key(slice_key)) slice_metrics = slice_metrics.copy() if metric_keys.ERROR_METRIC in slice_metrics: logging.warning('Error for slice: %s with error message: %s ', slice_key, slice_metrics[metric_keys.ERROR_METRIC]) result.metrics[metric_keys.ERROR_METRIC].debug_message = slice_metrics[ metric_keys.ERROR_METRIC] return result # Convert the metrics from add_metrics_callbacks to the structured output if # defined. if add_metrics_callbacks and (not any( isinstance(k, metric_types.MetricKey) for k in slice_metrics.keys())): for add_metrics_callback in add_metrics_callbacks: if hasattr(add_metrics_callback, 'populate_stats_and_pop'): add_metrics_callback.populate_stats_and_pop(slice_key, slice_metrics, result.metrics) for key in sorted(slice_metrics.keys()): value = slice_metrics[key] if isinstance(value, types.ValueWithTDistribution): unsampled_value = value.unsampled_value _, lower_bound, upper_bound = ( math_util.calculate_confidence_interval(value)) confidence_interval = metrics_for_slice_pb2.ConfidenceInterval( lower_bound=convert_metric_value_to_proto(lower_bound), upper_bound=convert_metric_value_to_proto(upper_bound), standard_error=convert_metric_value_to_proto( value.sample_standard_deviation), degrees_of_freedom={'value': value.sample_degrees_of_freedom}) metric_value = convert_metric_value_to_proto(unsampled_value) if isinstance(key, metric_types.MetricKey): result.metric_keys_and_values.add( key=key.to_proto(), value=metric_value, confidence_interval=confidence_interval) else: # For v1 we continue to populate bounded_value for backwards # compatibility. If metric can be stored to double_value metrics, # replace it with a bounded_value. # TODO(b/171992041): remove the string-typed metric key branch once v1 # code is removed. if metric_value.WhichOneof('type') == 'double_value': # setting bounded_value clears double_value in the same oneof scope. metric_value.bounded_value.value.value = unsampled_value metric_value.bounded_value.lower_bound.value = lower_bound metric_value.bounded_value.upper_bound.value = upper_bound metric_value.bounded_value.methodology = ( metrics_for_slice_pb2.BoundedValue.POISSON_BOOTSTRAP) result.metrics[key].CopyFrom(metric_value) elif isinstance(value, metrics_for_slice_pb2.BoundedValue): metric_value = metrics_for_slice_pb2.MetricValue( double_value=wrappers_pb2.DoubleValue(value=value.value.value)) confidence_interval = metrics_for_slice_pb2.ConfidenceInterval( lower_bound=metrics_for_slice_pb2.MetricValue( double_value=wrappers_pb2.DoubleValue( value=value.lower_bound.value)), upper_bound=metrics_for_slice_pb2.MetricValue( double_value=wrappers_pb2.DoubleValue( value=value.upper_bound.value))) result.metric_keys_and_values.add( key=key.to_proto(), value=metric_value, confidence_interval=confidence_interval) else: metric_value = convert_metric_value_to_proto(value) if isinstance(key, metric_types.MetricKey): result.metric_keys_and_values.add( key=key.to_proto(), value=metric_value) else: # TODO(b/171992041): remove the string-typed metric key branch once v1 # code is removed. result.metrics[key].CopyFrom(metric_value) return result