def testSerializePlots(self):
     slice_key = _make_slice_key('fruit', 'apple')
     tfma_plots = {
         _full_key(metric_keys.CALIBRATION_PLOT_MATRICES):
         np.array([
             [0.0, 0.0, 0.0],
             [0.3, 1.0, 1.0],
             [0.7, 0.0, 1.0],
             [0.0, 0.0, 0.0],
         ]),
         _full_key(metric_keys.CALIBRATION_PLOT_BOUNDARIES):
         np.array([0.0, 0.5, 1.0]),
     }
     expected_plot_data = """
   slice_key {
     single_slice_keys {
       column: 'fruit'
       bytes_value: 'apple'
     }
   }
   plot_data {
     calibration_histogram_buckets {
       buckets {
         lower_threshold_inclusive: -inf
         upper_threshold_exclusive: 0.0
         num_weighted_examples { value: 0.0 }
         total_weighted_label { value: 0.0 }
         total_weighted_refined_prediction { value: 0.0 }
       }
       buckets {
         lower_threshold_inclusive: 0.0
         upper_threshold_exclusive: 0.5
         num_weighted_examples { value: 1.0 }
         total_weighted_label { value: 1.0 }
         total_weighted_refined_prediction { value: 0.3 }
       }
       buckets {
         lower_threshold_inclusive: 0.5
         upper_threshold_exclusive: 1.0
         num_weighted_examples { value: 1.0 }
         total_weighted_label { value: 0.0 }
         total_weighted_refined_prediction { value: 0.7 }
       }
       buckets {
         lower_threshold_inclusive: 1.0
         upper_threshold_exclusive: inf
         num_weighted_examples { value: 0.0 }
         total_weighted_label { value: 0.0 }
         total_weighted_refined_prediction { value: 0.0 }
       }
     }
   }
 """
     calibration_plot = (
         post_export_metrics.calibration_plot_and_prediction_histogram())
     serialized = metrics_and_plots_evaluator._serialize_plots(
         (slice_key, tfma_plots), [calibration_plot])
     self.assertProtoEquals(
         expected_plot_data,
         metrics_for_slice_pb2.PlotsForSlice.FromString(serialized))
 def testCalibrationPlotSerialization(self):
   # Calibration plots for the model
   # {prediction:0.3, true_label:+},
   # {prediction:0.7, true_label:-}
   #
   # These plots were generated by hand. For this test to make sense
   # it must actually match the kind of output the TFMA produces.
   tfma_plots = {
       metric_keys.CALIBRATION_PLOT_MATRICES:
           np.array([
               [0.0, 0.0, 0.0],
               [0.3, 1.0, 1.0],
               [0.7, 0.0, 1.0],
               [0.0, 0.0, 0.0],
           ]),
       metric_keys.CALIBRATION_PLOT_BOUNDARIES:
           np.array([0.0, 0.5, 1.0]),
   }
   expected_plot_data = """
     calibration_histogram_buckets {
       buckets {
         lower_threshold_inclusive: -inf
         upper_threshold_exclusive: 0.0
         num_weighted_examples { value: 0.0 }
         total_weighted_label { value: 0.0 }
         total_weighted_refined_prediction { value: 0.0 }
       }
       buckets {
         lower_threshold_inclusive: 0.0
         upper_threshold_exclusive: 0.5
         num_weighted_examples { value: 1.0 }
         total_weighted_label { value: 1.0 }
         total_weighted_refined_prediction { value: 0.3 }
       }
       buckets {
         lower_threshold_inclusive: 0.5
         upper_threshold_exclusive: 1.0
         num_weighted_examples { value: 1.0 }
         total_weighted_label { value: 0.0 }
         total_weighted_refined_prediction { value: 0.7 }
       }
       buckets {
         lower_threshold_inclusive: 1.0
         upper_threshold_exclusive: inf
         num_weighted_examples { value: 0.0 }
         total_weighted_label { value: 0.0 }
         total_weighted_refined_prediction { value: 0.0 }
       }
     }
   """
   plot_data = metrics_for_slice_pb2.PlotData()
   calibration_plot = (
       post_export_metrics.calibration_plot_and_prediction_histogram())
   calibration_plot.populate_plots_and_pop(tfma_plots, plot_data)
   self.assertProtoEquals(expected_plot_data, plot_data)
   self.assertFalse(metric_keys.CALIBRATION_PLOT_MATRICES in tfma_plots)
   self.assertFalse(metric_keys.CALIBRATION_PLOT_BOUNDARIES in tfma_plots)
Exemplo n.º 3
0
    def testSerializePlots_emptyPlot(self):
        slice_key = _make_slice_key('fruit', 'apple')
        tfma_plots = {metric_keys.ERROR_METRIC: 'error_message'}

        calibration_plot = (
            post_export_metrics.calibration_plot_and_prediction_histogram())
        actual_plot = metrics_and_plots_serialization._serialize_plots(
            (slice_key, tfma_plots), [calibration_plot])
        expected_plot = metrics_for_slice_pb2.PlotsForSlice()
        expected_plot.slice_key.CopyFrom(slicer.serialize_slice_key(slice_key))
        expected_plot.plots[
            metric_keys.ERROR_METRIC].debug_message = 'error_message'
        self.assertProtoEquals(
            expected_plot,
            metrics_for_slice_pb2.PlotsForSlice.FromString(actual_plot))
  def testCalibrationPlotAndPredictionHistogramLinearClassifier(self):
    temp_eval_export_dir = self._getEvalExportDir()
    _, eval_export_dir = (
        linear_classifier.simple_linear_classifier(None, temp_eval_export_dir))

    examples = [
        self._makeExample(age=3.0, language='english', label=1.0),
        self._makeExample(age=3.0, language='chinese', label=0.0),
        self._makeExample(age=4.0, language='english', label=1.0),
        self._makeExample(age=5.0, language='chinese', label=0.0)
    ]

    def check_result(got):  # pylint: disable=invalid-name
      try:
        self.assertEqual(1, len(got), 'got: %s' % got)
        (slice_key, value) = got[0]
        self.assertEqual((), slice_key)
        self.assertIn(metric_keys.CALIBRATION_PLOT_MATRICES, value)
        # We just check that the bucket sums look sane, since we don't know
        # the exact predictions of the model.
        #
        # Note that the correctness of the bucketing is tested in the other
        # two tests with the fixed prediction estimator. This test is more
        # for ensuring that this metric is compatible with the canned
        # Estimators, for which the prediction Tensor returned for a batch
        # of examples will be a N x 1 Tensor, rather than just an N element
        # vector.
        buckets = value[metric_keys.CALIBRATION_PLOT_MATRICES]
        bucket_sums = np.sum(buckets, axis=0)
        self.assertAlmostEqual(bucket_sums[1], 2.0)  # label sum
        self.assertAlmostEqual(bucket_sums[2], 4.0)  # weight sum
      except AssertionError as err:
        raise util.BeamAssertException(err)

    self._runTestWithCustomCheck(
        examples,
        eval_export_dir,
        [post_export_metrics.calibration_plot_and_prediction_histogram()],
        custom_plots_check=check_result)
  def testWriteMetricsAndPlots(self):
    metrics_file = os.path.join(self._getTempDir(), 'metrics')
    plots_file = os.path.join(self._getTempDir(), 'plots')
    temp_eval_export_dir = os.path.join(self._getTempDir(), 'eval_export_dir')

    _, eval_export_dir = (
        fixed_prediction_estimator.simple_fixed_prediction_estimator(
            None, temp_eval_export_dir))
    eval_config = config.EvalConfig(
        model_specs=[config.ModelSpec()],
        options=config.Options(
            disabled_outputs={'values': ['eval_config.json']}))
    eval_shared_model = self.createTestEvalSharedModel(
        eval_saved_model_path=eval_export_dir,
        add_metrics_callbacks=[
            post_export_metrics.example_count(),
            post_export_metrics.calibration_plot_and_prediction_histogram(
                num_buckets=2)
        ])
    extractors = [
        predict_extractor.PredictExtractor(eval_shared_model),
        slice_key_extractor.SliceKeyExtractor()
    ]
    evaluators = [
        metrics_and_plots_evaluator.MetricsAndPlotsEvaluator(eval_shared_model)
    ]
    output_paths = {
        constants.METRICS_KEY: metrics_file,
        constants.PLOTS_KEY: plots_file
    }
    writers = [
        metrics_plots_and_validations_writer.MetricsPlotsAndValidationsWriter(
            output_paths, eval_shared_model.add_metrics_callbacks)
    ]

    with beam.Pipeline() as pipeline:
      example1 = self._makeExample(prediction=0.0, label=1.0)
      example2 = self._makeExample(prediction=1.0, label=1.0)

      # pylint: disable=no-value-for-parameter
      _ = (
          pipeline
          | 'Create' >> beam.Create([
              example1.SerializeToString(),
              example2.SerializeToString(),
          ])
          | 'ExtractEvaluateAndWriteResults' >>
          model_eval_lib.ExtractEvaluateAndWriteResults(
              eval_config=eval_config,
              eval_shared_model=eval_shared_model,
              extractors=extractors,
              evaluators=evaluators,
              writers=writers))
      # pylint: enable=no-value-for-parameter

    expected_metrics_for_slice = text_format.Parse(
        """
        slice_key {}
        metrics {
          key: "average_loss"
          value {
            double_value {
              value: 0.5
            }
          }
        }
        metrics {
          key: "post_export_metrics/example_count"
          value {
            double_value {
              value: 2.0
            }
          }
        }
        """, metrics_for_slice_pb2.MetricsForSlice())

    metric_records = []
    for record in tf.compat.v1.python_io.tf_record_iterator(metrics_file):
      metric_records.append(
          metrics_for_slice_pb2.MetricsForSlice.FromString(record))
    self.assertEqual(1, len(metric_records), 'metrics: %s' % metric_records)
    self.assertProtoEquals(expected_metrics_for_slice, metric_records[0])

    expected_plots_for_slice = text_format.Parse(
        """
      slice_key {}
      plots {
        key: "post_export_metrics"
        value {
          calibration_histogram_buckets {
            buckets {
              lower_threshold_inclusive: -inf
              num_weighted_examples {}
              total_weighted_label {}
              total_weighted_refined_prediction {}
            }
            buckets {
              upper_threshold_exclusive: 0.5
              num_weighted_examples {
                value: 1.0
              }
              total_weighted_label {
                value: 1.0
              }
              total_weighted_refined_prediction {}
            }
            buckets {
              lower_threshold_inclusive: 0.5
              upper_threshold_exclusive: 1.0
              num_weighted_examples {
              }
              total_weighted_label {}
              total_weighted_refined_prediction {}
            }
            buckets {
              lower_threshold_inclusive: 1.0
              upper_threshold_exclusive: inf
              num_weighted_examples {
                value: 1.0
              }
              total_weighted_label {
                value: 1.0
              }
              total_weighted_refined_prediction {
                value: 1.0
              }
            }
         }
        }
      }
    """, metrics_for_slice_pb2.PlotsForSlice())

    plot_records = []
    for record in tf.compat.v1.python_io.tf_record_iterator(plots_file):
      plot_records.append(
          metrics_for_slice_pb2.PlotsForSlice.FromString(record))
    self.assertEqual(1, len(plot_records), 'plots: %s' % plot_records)
    self.assertProtoEquals(expected_plots_for_slice, plot_records[0])
Exemplo n.º 6
0
 def testConvertSlicePlotsToProtoLegacyStringKeys(self):
   slice_key = _make_slice_key('fruit', 'apple')
   tfma_plots = {
       metric_keys.CALIBRATION_PLOT_MATRICES:
           np.array([
               [0.0, 0.0, 0.0],
               [0.3, 1.0, 1.0],
               [0.7, 0.0, 1.0],
               [0.0, 0.0, 0.0],
           ]),
       metric_keys.CALIBRATION_PLOT_BOUNDARIES:
           np.array([0.0, 0.5, 1.0]),
   }
   expected_plot_data = """
     slice_key {
       single_slice_keys {
         column: 'fruit'
         bytes_value: 'apple'
       }
     }
     plots {
       key: "post_export_metrics"
       value {
         calibration_histogram_buckets {
           buckets {
             lower_threshold_inclusive: -inf
             upper_threshold_exclusive: 0.0
             num_weighted_examples { value: 0.0 }
             total_weighted_label { value: 0.0 }
             total_weighted_refined_prediction { value: 0.0 }
           }
           buckets {
             lower_threshold_inclusive: 0.0
             upper_threshold_exclusive: 0.5
             num_weighted_examples { value: 1.0 }
             total_weighted_label { value: 1.0 }
             total_weighted_refined_prediction { value: 0.3 }
           }
           buckets {
             lower_threshold_inclusive: 0.5
             upper_threshold_exclusive: 1.0
             num_weighted_examples { value: 1.0 }
             total_weighted_label { value: 0.0 }
             total_weighted_refined_prediction { value: 0.7 }
           }
           buckets {
             lower_threshold_inclusive: 1.0
             upper_threshold_exclusive: inf
             num_weighted_examples { value: 0.0 }
             total_weighted_label { value: 0.0 }
             total_weighted_refined_prediction { value: 0.0 }
           }
         }
       }
     }
   """
   calibration_plot = (
       post_export_metrics.calibration_plot_and_prediction_histogram())
   got = metrics_plots_and_validations_writer.convert_slice_plots_to_proto(
       (slice_key, tfma_plots), [calibration_plot])
   self.assertProtoEquals(expected_plot_data, got)
  def testCalibrationPlotAndPredictionHistogramWeighted(self):
    temp_eval_export_dir = self._getEvalExportDir()
    _, eval_export_dir = (
        fixed_prediction_estimator_extra_fields
        .simple_fixed_prediction_estimator_extra_fields(None,
                                                        temp_eval_export_dir))
    examples = [
        # For each example, we set label to prediction + 1.
        self._makeExample(
            prediction=-10.0,
            label=-9.0,
            fixed_float=1.0,
            fixed_string='',
            fixed_int=0),
        self._makeExample(
            prediction=-9.0,
            label=-8.0,
            fixed_float=2.0,
            fixed_string='',
            fixed_int=0),
        self._makeExample(
            prediction=0.0000,
            label=1.0000,
            fixed_float=0.0,
            fixed_string='',
            fixed_int=0),
        self._makeExample(
            prediction=0.00100,
            label=1.00100,
            fixed_float=1.0,
            fixed_string='',
            fixed_int=0),
        self._makeExample(
            prediction=0.00101,
            label=1.00101,
            fixed_float=2.0,
            fixed_string='',
            fixed_int=0),
        self._makeExample(
            prediction=0.00102,
            label=1.00102,
            fixed_float=3.0,
            fixed_string='',
            fixed_int=0),
        self._makeExample(
            prediction=10.0,
            label=11.0,
            fixed_float=7.0,
            fixed_string='',
            fixed_int=0),
    ]

    def check_result(got):  # pylint: disable=invalid-name
      try:
        self.assertEqual(1, len(got), 'got: %s' % got)
        (slice_key, value) = got[0]
        self.assertEqual((), slice_key)
        self.assertIn(metric_keys.CALIBRATION_PLOT_MATRICES, value)
        buckets = value[metric_keys.CALIBRATION_PLOT_MATRICES]
        self.assertSequenceAlmostEqual(buckets[0], [-28.0, -25.0, 3.0])
        self.assertSequenceAlmostEqual(buckets[1], [0.0, 0.0, 0.0])
        self.assertSequenceAlmostEqual(buckets[11], [0.00608, 6.00608, 6.0])
        self.assertSequenceAlmostEqual(buckets[10001], [70.0, 77.0, 7.0])
      except AssertionError as err:
        raise util.BeamAssertException(err)

    self._runTestWithCustomCheck(
        examples,
        eval_export_dir, [
            post_export_metrics.calibration_plot_and_prediction_histogram(
                example_weight_key='fixed_float')
        ],
        custom_plots_check=check_result)
  def testCalibrationPlotAndPredictionHistogramUnweighted(self):
    temp_eval_export_dir = self._getEvalExportDir()
    _, eval_export_dir = (
        fixed_prediction_estimator.simple_fixed_prediction_estimator(
            None, temp_eval_export_dir))
    examples = [
        # For each example, we set label to prediction + 1.
        # These two go in bucket 0: (-inf, 0)
        self._makeExample(prediction=-10.0, label=-9.0),
        self._makeExample(prediction=-9.0, label=-8.0),
        # This goes in bucket 1: [0, 0.00100)
        self._makeExample(prediction=0.00000, label=1.00000),
        # These three go in bucket 1: [0.00100, 0.00110)
        self._makeExample(prediction=0.00100, label=1.00100),
        self._makeExample(prediction=0.00101, label=1.00101),
        self._makeExample(prediction=0.00102, label=1.00102),
        # These two go in bucket 10000: [0.99990, 1.00000)
        self._makeExample(prediction=0.99998, label=1.99998),
        self._makeExample(prediction=0.99999, label=1.99999),
        # These four go in bucket 10001: [1.0000, +inf)
        self._makeExample(prediction=1.0, label=2.0),
        self._makeExample(prediction=8.0, label=9.0),
        self._makeExample(prediction=9.0, label=10.0),
        self._makeExample(prediction=10.0, label=11.0),
    ]

    calibration_plot = (
        post_export_metrics.calibration_plot_and_prediction_histogram())

    def check_result(got):  # pylint: disable=invalid-name
      try:
        self.assertEqual(1, len(got), 'got: %s' % got)
        (slice_key, value) = got[0]
        self.assertEqual((), slice_key)
        self.assertIn(metric_keys.CALIBRATION_PLOT_MATRICES, value)
        buckets = value[metric_keys.CALIBRATION_PLOT_MATRICES]
        self.assertSequenceAlmostEqual(buckets[0], [-19.0, -17.0, 2.0])
        self.assertSequenceAlmostEqual(buckets[1], [0.0, 1.0, 1.0])
        self.assertSequenceAlmostEqual(buckets[11], [0.00303, 3.00303, 3.0])
        self.assertSequenceAlmostEqual(buckets[10000], [1.99997, 3.99997, 2.0])
        self.assertSequenceAlmostEqual(buckets[10001], [28.0, 32.0, 4.0])
        self.assertIn(metric_keys.CALIBRATION_PLOT_BOUNDARIES, value)
        boundaries = value[metric_keys.CALIBRATION_PLOT_BOUNDARIES]
        self.assertAlmostEqual(0.0, boundaries[0])
        self.assertAlmostEqual(0.001, boundaries[10])
        self.assertAlmostEqual(0.005, boundaries[50])
        self.assertAlmostEqual(0.010, boundaries[100])
        self.assertAlmostEqual(0.100, boundaries[1000])
        self.assertAlmostEqual(0.800, boundaries[8000])
        self.assertAlmostEqual(1.000, boundaries[10000])
        plot_data = metrics_for_slice_pb2.PlotData()
        calibration_plot.populate_plots_and_pop(value, plot_data)
        self.assertProtoEquals(
            """lower_threshold_inclusive:1.0
            upper_threshold_exclusive: inf
            num_weighted_examples {
              value: 4.0
            }
            total_weighted_label {
              value: 32.0
            }
            total_weighted_refined_prediction {
              value: 28.0
            }""", plot_data.calibration_histogram_buckets.buckets[10001])
      except AssertionError as err:
        raise util.BeamAssertException(err)

    self._runTestWithCustomCheck(
        examples,
        eval_export_dir, [calibration_plot],
        custom_plots_check=check_result)