def testCalculateConfidenceInterval(self): np.testing.assert_almost_equal( math_util.calculate_confidence_interval( types.ValueWithTDistribution(10, 2, 9, 10)), (10, 5.4756856744035902196, 14.524314325596410669)) mid, lb, ub = math_util.calculate_confidence_interval( types.ValueWithTDistribution(-1, -1, -1, -1)) self.assertEqual(mid, -1) self.assertTrue(math.isnan(lb)) self.assertTrue(math.isnan(ub))
def testCalculateConfidenceIntervalConfusionMatrices(self): mid, lb, ub = math_util.calculate_confidence_interval( types.ValueWithTDistribution( sample_mean=binary_confusion_matrices.Matrices( thresholds=[0.5], tp=[0.0], tn=[2.0], fp=[1.0], fn=[1.0]), sample_standard_deviation=binary_confusion_matrices.Matrices( thresholds=[0.5], tp=[0.0], tn=[2.051956704170308], fp=[1.025978352085154], fn=[1.2139539573337679]), sample_degrees_of_freedom=19, unsampled_value=binary_confusion_matrices.Matrices( thresholds=[0.5], tp=[0.0], tn=[2.0], fp=[1.0], fn=[1.0]))) expected_mid = binary_confusion_matrices.Matrices(thresholds=[0.5], tp=[0.0], tn=[2.0], fp=[1.0], fn=[1.0]) self.assertEqual(expected_mid, mid) expected_lb = binary_confusion_matrices.Matrices( thresholds=[0.5], tp=[0.0], tn=[-2.2947947404327547], fp=[-1.1473973702163773], fn=[-1.5408348336436783]) self.assertEqual(expected_lb.thresholds, lb.thresholds) np.testing.assert_almost_equal(lb.tp, expected_lb.tp) np.testing.assert_almost_equal(lb.fp, expected_lb.fp) np.testing.assert_almost_equal(lb.tn, expected_lb.tn) np.testing.assert_almost_equal(lb.fn, expected_lb.fn) expected_ub = binary_confusion_matrices.Matrices( thresholds=[0.5], tp=[0.0], tn=[6.294794740432755], fp=[3.1473973702163773], fn=[3.5408348336436783]) self.assertEqual(expected_ub.thresholds, ub.thresholds) np.testing.assert_almost_equal(ub.tp, expected_ub.tp) np.testing.assert_almost_equal(ub.fp, expected_ub.fp) np.testing.assert_almost_equal(ub.tn, expected_ub.tn) np.testing.assert_almost_equal(ub.fn, expected_ub.fn)
def convert_slice_metrics_to_proto( metrics: Tuple[slicer.SliceKeyOrCrossSliceKeyType, Dict[Any, Any]], add_metrics_callbacks: List[types.AddMetricsCallbackType] ) -> metrics_for_slice_pb2.MetricsForSlice: """Converts the given slice metrics into serialized proto MetricsForSlice. Args: metrics: The slice metrics. add_metrics_callbacks: A list of metric callbacks. This should be the same list as the one passed to tfma.Evaluate(). Returns: The MetricsForSlice proto. Raises: TypeError: If the type of the feature value in slice key cannot be recognized. """ result = metrics_for_slice_pb2.MetricsForSlice() slice_key, slice_metrics = metrics if slicer.is_cross_slice_key(slice_key): result.cross_slice_key.CopyFrom( slicer.serialize_cross_slice_key(slice_key)) else: result.slice_key.CopyFrom(slicer.serialize_slice_key(slice_key)) slice_metrics = slice_metrics.copy() if metric_keys.ERROR_METRIC in slice_metrics: logging.warning('Error for slice: %s with error message: %s ', slice_key, slice_metrics[metric_keys.ERROR_METRIC]) result.metrics[metric_keys.ERROR_METRIC].debug_message = slice_metrics[ metric_keys.ERROR_METRIC] return result # Convert the metrics from add_metrics_callbacks to the structured output if # defined. if add_metrics_callbacks and (not any( isinstance(k, metric_types.MetricKey) for k in slice_metrics.keys())): for add_metrics_callback in add_metrics_callbacks: if hasattr(add_metrics_callback, 'populate_stats_and_pop'): add_metrics_callback.populate_stats_and_pop( slice_key, slice_metrics, result.metrics) for key in sorted(slice_metrics.keys()): value = slice_metrics[key] if isinstance(value, types.ValueWithTDistribution): unsampled_value = value.unsampled_value _, lower_bound, upper_bound = ( math_util.calculate_confidence_interval(value)) confidence_interval = metrics_for_slice_pb2.ConfidenceInterval( lower_bound=convert_metric_value_to_proto(lower_bound), upper_bound=convert_metric_value_to_proto(upper_bound), standard_error=convert_metric_value_to_proto( value.sample_standard_deviation), degrees_of_freedom={'value': value.sample_degrees_of_freedom}) metric_value = convert_metric_value_to_proto(unsampled_value) # If metric can be stored to double_value metrics, replace it with a # bounded_value for backwards compatibility. # TODO(b/188575688): remove this logic to stop populating bounded_value if metric_value.WhichOneof('type') == 'double_value': # setting bounded_value clears double_value in the same oneof scope. metric_value.bounded_value.value.value = unsampled_value metric_value.bounded_value.lower_bound.value = lower_bound metric_value.bounded_value.upper_bound.value = upper_bound metric_value.bounded_value.methodology = ( metrics_for_slice_pb2.BoundedValue.POISSON_BOOTSTRAP) else: metric_value = convert_metric_value_to_proto(value) confidence_interval = None if isinstance(key, metric_types.MetricKey): result.metric_keys_and_values.add( key=key.to_proto(), value=metric_value, confidence_interval=confidence_interval) else: result.metrics[key].CopyFrom(metric_value) return result