def testRebinWithSparseData(self):
        histogram = [
            calibration_histogram.Bucket(4, 5.0, .25, 5.0),  # pred = .05
            calibration_histogram.Bucket(61, 60.0, 36.0, 60.0),  # pred = .6
            calibration_histogram.Bucket(70, 69.0, 47.61, 69.0),  # pred = .69
            calibration_histogram.Bucket(100, 99.0, 98.01, 99.0)  # pred = .99
        ]
        # [0, 0.1, ..., 0.9, 1.0]
        thresholds = [i * 1.0 / 10 for i in range(0, 11)]
        got = calibration_histogram.rebin(thresholds, histogram, 100)

        expected = [
            calibration_histogram.Bucket(0, 5.0, 0.25, 5.0),
            calibration_histogram.Bucket(1, 0.0, 0.0, 0.0),
            calibration_histogram.Bucket(2, 0.0, 0.0, 0.0),
            calibration_histogram.Bucket(3, 0.0, 0.0, 0.0),
            calibration_histogram.Bucket(4, 0.0, 0.0, 0.0),
            calibration_histogram.Bucket(5, 0.0, 0.0, 0.0),
            calibration_histogram.Bucket(6, 129.0, 83.61, 129.0),
            calibration_histogram.Bucket(7, 0.0, 0.0, 0.0),
            calibration_histogram.Bucket(8, 0.0, 0.0, 0.0),
            calibration_histogram.Bucket(9, 99.0, 98.01, 99.0),
            calibration_histogram.Bucket(10, 0.0, 0.0, 0.0),
        ]
        self.assertLen(got, len(expected))
        for i in range(len(got)):
            self.assertSequenceAlmostEqual(got[i], expected[i])
    def testRebin(self):
        # [Bucket(0, -1, -0.01), Bucket(1, 0, 0) ... Bucket(101, 101, 1.01)]
        histogram = [calibration_histogram.Bucket(0, -1, -.01, 1.0)]
        for i in range(100):
            histogram.append(
                calibration_histogram.Bucket(i + 1, i, i * .01, 1.0))
        histogram.append(calibration_histogram.Bucket(101, 101, 1.01, 1.0))
        # [-1e-7, 0.0, 0.1, ..., 0.9, 1.0, 1.0+1e-7]
        thresholds = [-1e-7] + [i * 1.0 / 10 for i in range(11)] + [1.0 + 1e-7]
        got = calibration_histogram.rebin(thresholds, histogram, 100)

        # labels = (10 * (i-1)) + (1 + 2 + 3 + ... + 9)
        expected = [
            calibration_histogram.Bucket(0, -1, -0.01, 1.0),
            calibration_histogram.Bucket(1, 45.0, 0.45, 10.0),
            calibration_histogram.Bucket(2, 145.0, 1.45, 10.0),
            calibration_histogram.Bucket(3, 245.0, 2.45, 10.0),
            calibration_histogram.Bucket(4, 345.0, 3.45, 10.0),
            calibration_histogram.Bucket(5, 445.0, 4.45, 10.0),
            calibration_histogram.Bucket(6, 545.0, 5.45, 10.0),
            calibration_histogram.Bucket(7, 645.0, 6.45, 10.0),
            calibration_histogram.Bucket(8, 745.0, 7.45, 10.0),
            calibration_histogram.Bucket(9, 845.0, 8.45, 10.0),
            calibration_histogram.Bucket(10, 945.0, 9.45, 10.0),
            calibration_histogram.Bucket(11, 0.0, 0.0, 0.0),
            calibration_histogram.Bucket(12, 101.0, 1.01, 1.0),
        ]
        self.assertLen(got, len(expected))
        for i in range(len(got)):
            self.assertSequenceAlmostEqual(got[i], expected[i])
示例#3
0
 def result(
     metrics: Dict[metric_types.MetricKey, Any]
 ) -> Dict[metric_types.MetricKey, Matrices]:
     """Returns binary confusion matrices."""
     # Calibration histogram uses intervals of the form [start, end) where the
     # prediction >= start. The confusion matrices want intervals of the form
     # (start, end] where the prediction > start. Add a small epsilon so that >=
     # checks don't match. This correction shouldn't be needed in practice but
     # allows for correctness in small tests.
     if len(thresholds) == 1:
         # When there is only one threshold, we need to make adjustments so that
         # we have proper boundaries around the threshold for <, >= comparions.
         if thresholds[0] < 0:
             # This case is used when all prediction values are considered matches
             # (e.g. when calculating top_k for precision/recall).
             rebin_thresholds = [thresholds[0], thresholds[0] + _EPSILON]
         else:
             # This case is used for a single threshold within [0, 1] (e.g. 0.5).
             rebin_thresholds = [
                 -_EPSILON, thresholds[0] + _EPSILON, 1.0 + _EPSILON
             ]
     else:
         rebin_thresholds = ([thresholds[0]] +
                             [t + _EPSILON for t in thresholds[1:]])
     histogram = calibration_histogram.rebin(rebin_thresholds,
                                             metrics[histogram_key])
     matrices = _to_binary_confusion_matrices(thresholds, histogram)
     if len(thresholds) == 1:
         # Reset back to 1 bucket
         matrices = Matrices(thresholds,
                             tp=[matrices.tp[1]],
                             fp=[matrices.fp[1]],
                             tn=[matrices.tn[1]],
                             fn=[matrices.fn[1]])
     return {key: matrices}
 def result(
     metrics: Dict[metric_types.MetricKey, Any]
 ) -> Dict[metric_types.MetricKey, Matrices]:
     """Returns binary confusion matrices."""
     if len(thresholds) == 1 and thresholds[0] < 0:
         # This case is used when all positive prediction values are considered
         # matches (e.g. when calculating top_k for precision/recall where the
         # non-top_k values are expected to have been set to float('-inf')).
         histogram = metrics[histogram_key]
     else:
         # Calibration histogram uses intervals of the form [start, end) where the
         # prediction >= start. The confusion matrices want intervals of the form
         # (start, end] where the prediction > start. Add a small epsilon so that
         # >= checks don't match. This correction shouldn't be needed in practice
         # but allows for correctness in small tests.
         rebin_thresholds = [
             t + _EPSILON if t != 0 else t for t in thresholds
         ]
         if thresholds[0] >= 0:
             # Add -epsilon bucket to account for differences in histogram vs
             # confusion matrix intervals mentioned above. If the epsilon bucket is
             # missing the false negatives and false positives will be 0 for the
             # first threshold.
             rebin_thresholds = [-_EPSILON] + rebin_thresholds
         if thresholds[-1] < 1.0:
             # If the last threshold < 1.0, then add a fence post at 1.0 + epsilon
             # othewise true negatives and true positives will be overcounted.
             rebin_thresholds = rebin_thresholds + [1.0 + _EPSILON]
         histogram = calibration_histogram.rebin(rebin_thresholds,
                                                 metrics[histogram_key])
     matrices = _to_binary_confusion_matrices(thresholds, histogram)
     return {key: matrices}
示例#5
0
 def result(
     metrics: Dict[metric_types.MetricKey, Any]
 ) -> Dict[metric_types.MetricKey, Any]:
   thresholds = [
       left + i * (right - left) / num_buckets for i in range(num_buckets + 1)
   ]
   thresholds = [float('-inf')] + thresholds
   histogram = calibration_histogram.rebin(
       thresholds, metrics[histogram_key], left=left, right=right)
   return {key: _to_proto(thresholds, histogram)}
示例#6
0
  def result(
      metrics: Dict[metric_types.MetricKey, Any]
  ) -> Dict[metric_types.MetricKey, Matrices]:
    """Returns binary confusion matrices."""
    # Calibration histogram uses intervals of the form [start, end) where the
    # prediction >= start. The confusion matrices want intervals of the form
    # (start, end] where the prediction > start. Add a small epsilon so that >=
    # checks don't match. This correction shouldn't be needed in practice but
    # allows for correctness in small tests.
    if len(thresholds) == 1:
      # When there is only one threshold, we need to make adjustments so that
      # we have proper boundaries around the threshold for <, >= comparions.
      if thresholds[0] < 0:
        # This case is used when all prediction values are considered matches
        # (e.g. when calculating top_k for precision/recall).
        rebin_thresholds = [thresholds[0], thresholds[0] + _EPSILON]
      else:
        # This case is used for a single threshold within [0, 1] (e.g. 0.5).
        rebin_thresholds = [-_EPSILON, thresholds[0] + _EPSILON, 1.0 + _EPSILON]
    else:
      rebin_thresholds = [t + _EPSILON if t != 0 else t for t in thresholds]
      if thresholds[0] >= 0:
        # Add -epsilon bucket to account for differences in histogram vs
        # confusion matrix intervals mentioned above. If the epsilon bucket is
        # missing the false negatives and false positives will be 0 for the
        # first threshold.
        rebin_thresholds = [-_EPSILON] + rebin_thresholds
      if thresholds[-1] < 1.0:
        # If the last threshold < 1.0, then add a fence post at 1.0 + epsilon
        # othewise true negatives and true positives will be overcounted.
        rebin_thresholds = rebin_thresholds + [1.0 + _EPSILON]

    histogram = calibration_histogram.rebin(rebin_thresholds,
                                            metrics[histogram_key])
    matrices = _to_binary_confusion_matrices(thresholds, histogram)
    # Check if need to remove -epsilon bucket (or reset back to 1 bucket).
    start_index = 1 if thresholds[0] >= 0 or len(thresholds) == 1 else 0
    matrices = Matrices(
        thresholds,
        tp=matrices.tp[start_index:start_index + len(thresholds)],
        fp=matrices.fp[start_index:start_index + len(thresholds)],
        tn=matrices.tn[start_index:start_index + len(thresholds)],
        fn=matrices.fn[start_index:start_index + len(thresholds)])

    return {key: matrices}