def testCalculateConfidenceInterval(self): self.assertEqual( math_util.calculate_confidence_interval( types.ValueWithTDistribution(10, 2, 9, 10)), (10, 8.5692861880948552, 11.430713811905145)) mean, lb, ub = math_util.calculate_confidence_interval( types.ValueWithTDistribution(-1, -1, -1, -1)) self.assertEqual(mean, -1) self.assertTrue(math.isnan(lb)) self.assertTrue(math.isnan(ub))
def testCalculateConfidenceInterval(self): np.testing.assert_almost_equal( math_util.calculate_confidence_interval( types.ValueWithTDistribution(10, 2, 9, 10)), (10, 5.4756856744035902196, 14.524314325596410669)) mean, lb, ub = math_util.calculate_confidence_interval( types.ValueWithTDistribution(-1, -1, -1, -1)) self.assertEqual(mean, -1) self.assertTrue(math.isnan(lb)) self.assertTrue(math.isnan(ub))
def convert_slice_metrics( slice_key: slicer.SliceKeyType, slice_metrics: Dict[Any, Any], add_metrics_callbacks: List[types.AddMetricsCallbackType], metrics_for_slice: metrics_for_slice_pb2.MetricsForSlice) -> None: """Converts slice_metrics into the given metrics_for_slice proto.""" slice_metrics_copy = slice_metrics.copy() # Prevent further references to this, so we don't accidentally mutate it. del slice_metrics # Convert the metrics from add_metrics_callbacks to the structured output if # defined. if add_metrics_callbacks and (not any( isinstance(k, metric_types.MetricKey) for k in slice_metrics_copy.keys())): for add_metrics_callback in add_metrics_callbacks: if hasattr(add_metrics_callback, 'populate_stats_and_pop'): add_metrics_callback.populate_stats_and_pop( slice_key, slice_metrics_copy, metrics_for_slice.metrics) for key in sorted(slice_metrics_copy.keys()): value = slice_metrics_copy[key] metric_value = metrics_for_slice_pb2.MetricValue() if isinstance(value, metrics_for_slice_pb2.ConfusionMatrixAtThresholds): metric_value.confusion_matrix_at_thresholds.CopyFrom(value) elif isinstance(value, types.ValueWithTDistribution): # Convert to a bounded value. 95% confidence level is computed here. # Will populate t distribution value instead after migration. sample_mean, lower_bound, upper_bound = ( math_util.calculate_confidence_interval(value)) metric_value.bounded_value.value.value = sample_mean metric_value.bounded_value.lower_bound.value = lower_bound metric_value.bounded_value.upper_bound.value = upper_bound metric_value.bounded_value.methodology = ( metrics_for_slice_pb2.BoundedValue.POISSON_BOOTSTRAP) elif isinstance(value, (six.binary_type, six.text_type)): # Convert textual types to string metrics. metric_value.bytes_value = value elif isinstance(value, np.ndarray): # Convert NumPy arrays to ArrayValue. metric_value.array_value.CopyFrom(_convert_to_array_value(value)) else: # We try to convert to float values. try: metric_value.double_value.value = float(value) except (TypeError, ValueError) as e: metric_value.unknown_type.value = str(value) metric_value.unknown_type.error = e.message # pytype: disable=attribute-error if isinstance(key, metric_types.MetricKey): key_and_value = metrics_for_slice.metric_keys_and_values.add() key_and_value.key.CopyFrom(key.to_proto()) key_and_value.value.CopyFrom(metric_value) else: metrics_for_slice.metrics[key].CopyFrom(metric_value)
def convert_slice_metrics( slice_key: slicer.SliceKeyType, slice_metrics: Dict[Text, Any], post_export_metrics: List[types.AddMetricsCallbackType], metrics_for_slice: metrics_for_slice_pb2.MetricsForSlice) -> None: """Converts slice_metrics into the given metrics_for_slice proto.""" slice_metrics_copy = slice_metrics.copy() # Prevent further references to this, so we don't accidentally mutate it. del slice_metrics # Convert the metrics from post_export_metrics to the structured output if # defined. for post_export_metric in post_export_metrics: if hasattr(post_export_metric, 'populate_stats_and_pop'): post_export_metric.populate_stats_and_pop( slice_key, slice_metrics_copy, metrics_for_slice.metrics) for name, value in slice_metrics_copy.items(): if isinstance(value, types.ValueWithTDistribution): # Convert to a bounded value. 95% confidence level is computed here. # Will populate t distribution value instead after migration. sample_mean, lower_bound, upper_bound = math_util.calculate_confidence_interval( value) metrics_for_slice.metrics[ name].bounded_value.value.value = sample_mean metrics_for_slice.metrics[ name].bounded_value.lower_bound.value = lower_bound metrics_for_slice.metrics[ name].bounded_value.upper_bound.value = upper_bound metrics_for_slice.metrics[name].bounded_value.methodology = ( metrics_for_slice_pb2.BoundedValue.POISSON_BOOTSTRAP) elif isinstance(value, (six.binary_type, six.text_type)): # Convert textual types to string metrics. metrics_for_slice.metrics[name].bytes_value = value elif isinstance(value, np.ndarray): # Convert NumPy arrays to ArrayValue. metrics_for_slice.metrics[name].array_value.CopyFrom( _convert_to_array_value(value)) else: # We try to convert to float values. try: metrics_for_slice.metrics[name].double_value.value = float( value) except (TypeError, ValueError) as e: metrics_for_slice.metrics[name].unknown_type.value = str(value) metrics_for_slice.metrics[name].unknown_type.error = e.message # pytype: disable=attribute-error
def convert_slice_metrics_to_proto( metrics: Tuple[slicer.SliceKeyType, Dict[Any, Any]], add_metrics_callbacks: List[types.AddMetricsCallbackType] ) -> metrics_for_slice_pb2.MetricsForSlice: """Converts the given slice metrics into serialized proto MetricsForSlice. Args: metrics: The slice metrics. add_metrics_callbacks: A list of metric callbacks. This should be the same list as the one passed to tfma.Evaluate(). Returns: The MetricsForSlice proto. Raises: TypeError: If the type of the feature value in slice key cannot be recognized. """ result = metrics_for_slice_pb2.MetricsForSlice() slice_key, slice_metrics = metrics result.slice_key.CopyFrom(slicer.serialize_slice_key(slice_key)) slice_metrics = slice_metrics.copy() if metric_keys.ERROR_METRIC in slice_metrics: logging.warning('Error for slice: %s with error message: %s ', slice_key, slice_metrics[metric_keys.ERROR_METRIC]) result.metrics[metric_keys.ERROR_METRIC].debug_message = slice_metrics[ metric_keys.ERROR_METRIC] return result # Convert the metrics from add_metrics_callbacks to the structured output if # defined. if add_metrics_callbacks and (not any( isinstance(k, metric_types.MetricKey) for k in slice_metrics.keys())): for add_metrics_callback in add_metrics_callbacks: if hasattr(add_metrics_callback, 'populate_stats_and_pop'): add_metrics_callback.populate_stats_and_pop( slice_key, slice_metrics, result.metrics) for key in sorted(slice_metrics.keys()): value = slice_metrics[key] metric_value = metrics_for_slice_pb2.MetricValue() if isinstance(value, metrics_for_slice_pb2.ConfusionMatrixAtThresholds): metric_value.confusion_matrix_at_thresholds.CopyFrom(value) elif isinstance( value, metrics_for_slice_pb2.MultiClassConfusionMatrixAtThresholds): metric_value.multi_class_confusion_matrix_at_thresholds.CopyFrom( value) elif isinstance(value, types.ValueWithTDistribution): # Currently we populate both bounded_value and confidence_interval. # Avoid populating bounded_value once the UI handles confidence_interval. # Convert to a bounded value. 95% confidence level is computed here. _, lower_bound, upper_bound = ( math_util.calculate_confidence_interval(value)) metric_value.bounded_value.value.value = value.unsampled_value metric_value.bounded_value.lower_bound.value = lower_bound metric_value.bounded_value.upper_bound.value = upper_bound metric_value.bounded_value.methodology = ( metrics_for_slice_pb2.BoundedValue.POISSON_BOOTSTRAP) # Populate confidence_interval metric_value.confidence_interval.lower_bound.value = lower_bound metric_value.confidence_interval.upper_bound.value = upper_bound t_dist_value = metrics_for_slice_pb2.TDistributionValue() t_dist_value.sample_mean.value = value.sample_mean t_dist_value.sample_standard_deviation.value = ( value.sample_standard_deviation) t_dist_value.sample_degrees_of_freedom.value = ( value.sample_degrees_of_freedom) # Once the UI handles confidence interval, we will avoid setting this and # instead use the double_value. t_dist_value.unsampled_value.value = value.unsampled_value metric_value.confidence_interval.t_distribution_value.CopyFrom( t_dist_value) elif isinstance(value, six.binary_type): # Convert textual types to string metrics. metric_value.bytes_value = value elif isinstance(value, six.text_type): # Convert textual types to string metrics. metric_value.bytes_value = value.encode('utf8') elif isinstance(value, np.ndarray): # Convert NumPy arrays to ArrayValue. metric_value.array_value.CopyFrom(_convert_to_array_value(value)) else: # We try to convert to float values. try: metric_value.double_value.value = float(value) except (TypeError, ValueError) as e: metric_value.unknown_type.value = str(value) metric_value.unknown_type.error = e.message # pytype: disable=attribute-error if isinstance(key, metric_types.MetricKey): key_and_value = result.metric_keys_and_values.add() key_and_value.key.CopyFrom(key.to_proto()) key_and_value.value.CopyFrom(metric_value) else: result.metrics[key].CopyFrom(metric_value) return result