示例#1
0
 def testCalculateConfidenceInterval(self):
     np.testing.assert_almost_equal(
         math_util.calculate_confidence_interval(
             types.ValueWithTDistribution(10, 2, 9, 10)),
         (10, 5.4756856744035902196, 14.524314325596410669))
     mid, lb, ub = math_util.calculate_confidence_interval(
         types.ValueWithTDistribution(-1, -1, -1, -1))
     self.assertEqual(mid, -1)
     self.assertTrue(math.isnan(lb))
     self.assertTrue(math.isnan(ub))
示例#2
0
    def testCalculateConfidenceIntervalConfusionMatrices(self):
        mid, lb, ub = math_util.calculate_confidence_interval(
            types.ValueWithTDistribution(
                sample_mean=binary_confusion_matrices.Matrices(
                    thresholds=[0.5], tp=[0.0], tn=[2.0], fp=[1.0], fn=[1.0]),
                sample_standard_deviation=binary_confusion_matrices.Matrices(
                    thresholds=[0.5],
                    tp=[0.0],
                    tn=[2.051956704170308],
                    fp=[1.025978352085154],
                    fn=[1.2139539573337679]),
                sample_degrees_of_freedom=19,
                unsampled_value=binary_confusion_matrices.Matrices(
                    thresholds=[0.5], tp=[0.0], tn=[2.0], fp=[1.0], fn=[1.0])))

        expected_mid = binary_confusion_matrices.Matrices(thresholds=[0.5],
                                                          tp=[0.0],
                                                          tn=[2.0],
                                                          fp=[1.0],
                                                          fn=[1.0])
        self.assertEqual(expected_mid, mid)

        expected_lb = binary_confusion_matrices.Matrices(
            thresholds=[0.5],
            tp=[0.0],
            tn=[-2.2947947404327547],
            fp=[-1.1473973702163773],
            fn=[-1.5408348336436783])
        self.assertEqual(expected_lb.thresholds, lb.thresholds)
        np.testing.assert_almost_equal(lb.tp, expected_lb.tp)
        np.testing.assert_almost_equal(lb.fp, expected_lb.fp)
        np.testing.assert_almost_equal(lb.tn, expected_lb.tn)
        np.testing.assert_almost_equal(lb.fn, expected_lb.fn)

        expected_ub = binary_confusion_matrices.Matrices(
            thresholds=[0.5],
            tp=[0.0],
            tn=[6.294794740432755],
            fp=[3.1473973702163773],
            fn=[3.5408348336436783])
        self.assertEqual(expected_ub.thresholds, ub.thresholds)
        np.testing.assert_almost_equal(ub.tp, expected_ub.tp)
        np.testing.assert_almost_equal(ub.fp, expected_ub.fp)
        np.testing.assert_almost_equal(ub.tn, expected_ub.tn)
        np.testing.assert_almost_equal(ub.fn, expected_ub.fn)
示例#3
0
def convert_slice_metrics_to_proto(
    metrics: Tuple[slicer.SliceKeyOrCrossSliceKeyType, Dict[Any, Any]],
    add_metrics_callbacks: List[types.AddMetricsCallbackType]
) -> metrics_for_slice_pb2.MetricsForSlice:
    """Converts the given slice metrics into serialized proto MetricsForSlice.

  Args:
    metrics: The slice metrics.
    add_metrics_callbacks: A list of metric callbacks. This should be the same
      list as the one passed to tfma.Evaluate().

  Returns:
    The MetricsForSlice proto.

  Raises:
    TypeError: If the type of the feature value in slice key cannot be
      recognized.
  """
    result = metrics_for_slice_pb2.MetricsForSlice()
    slice_key, slice_metrics = metrics

    if slicer.is_cross_slice_key(slice_key):
        result.cross_slice_key.CopyFrom(
            slicer.serialize_cross_slice_key(slice_key))
    else:
        result.slice_key.CopyFrom(slicer.serialize_slice_key(slice_key))

    slice_metrics = slice_metrics.copy()

    if metric_keys.ERROR_METRIC in slice_metrics:
        logging.warning('Error for slice: %s with error message: %s ',
                        slice_key, slice_metrics[metric_keys.ERROR_METRIC])
        result.metrics[metric_keys.ERROR_METRIC].debug_message = slice_metrics[
            metric_keys.ERROR_METRIC]
        return result

    # Convert the metrics from add_metrics_callbacks to the structured output if
    # defined.
    if add_metrics_callbacks and (not any(
            isinstance(k, metric_types.MetricKey)
            for k in slice_metrics.keys())):
        for add_metrics_callback in add_metrics_callbacks:
            if hasattr(add_metrics_callback, 'populate_stats_and_pop'):
                add_metrics_callback.populate_stats_and_pop(
                    slice_key, slice_metrics, result.metrics)
    for key in sorted(slice_metrics.keys()):
        value = slice_metrics[key]
        if isinstance(value, types.ValueWithTDistribution):
            unsampled_value = value.unsampled_value
            _, lower_bound, upper_bound = (
                math_util.calculate_confidence_interval(value))
            confidence_interval = metrics_for_slice_pb2.ConfidenceInterval(
                lower_bound=convert_metric_value_to_proto(lower_bound),
                upper_bound=convert_metric_value_to_proto(upper_bound),
                standard_error=convert_metric_value_to_proto(
                    value.sample_standard_deviation),
                degrees_of_freedom={'value': value.sample_degrees_of_freedom})
            metric_value = convert_metric_value_to_proto(unsampled_value)

            # If metric can be stored to double_value metrics, replace it with a
            # bounded_value for backwards compatibility.
            # TODO(b/188575688): remove this logic to stop populating bounded_value
            if metric_value.WhichOneof('type') == 'double_value':
                # setting bounded_value clears double_value in the same oneof scope.
                metric_value.bounded_value.value.value = unsampled_value
                metric_value.bounded_value.lower_bound.value = lower_bound
                metric_value.bounded_value.upper_bound.value = upper_bound
                metric_value.bounded_value.methodology = (
                    metrics_for_slice_pb2.BoundedValue.POISSON_BOOTSTRAP)
        else:
            metric_value = convert_metric_value_to_proto(value)
            confidence_interval = None

        if isinstance(key, metric_types.MetricKey):
            result.metric_keys_and_values.add(
                key=key.to_proto(),
                value=metric_value,
                confidence_interval=confidence_interval)
        else:
            result.metrics[key].CopyFrom(metric_value)

    return result