def testCalibrationPlotAndPredictionHistogramWeighted(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = (fixed_prediction_estimator_extra_fields.
                              simple_fixed_prediction_estimator_extra_fields(
                                  None, temp_eval_export_dir))
        examples = [
            # For each example, we set label to prediction + 1.
            self._makeExample(prediction=-10.0,
                              label=-9.0,
                              fixed_float=1.0,
                              fixed_string=''),
            self._makeExample(prediction=-9.0,
                              label=-8.0,
                              fixed_float=2.0,
                              fixed_string=''),
            self._makeExample(prediction=0.0000,
                              label=1.0000,
                              fixed_float=0.0,
                              fixed_string=''),
            self._makeExample(prediction=0.00100,
                              label=1.00100,
                              fixed_float=1.0,
                              fixed_string=''),
            self._makeExample(prediction=0.00101,
                              label=1.00101,
                              fixed_float=2.0,
                              fixed_string=''),
            self._makeExample(prediction=0.00102,
                              label=1.00102,
                              fixed_float=3.0,
                              fixed_string=''),
            self._makeExample(prediction=10.0,
                              label=11.0,
                              fixed_float=7.0,
                              fixed_string=''),
        ]

        def check_result(got):  # pylint: disable=invalid-name
            try:
                self.assertEqual(1, len(got), 'got: %s' % got)
                (slice_key, value) = got[0]
                self.assertEqual((), slice_key)
                self.assertIn(metric_keys.CALIBRATION_PLOT_MATRICES, value)
                buckets = value[metric_keys.CALIBRATION_PLOT_MATRICES]
                self.assertSequenceAlmostEqual(buckets[0], [-28.0, -25.0, 3.0])
                self.assertSequenceAlmostEqual(buckets[1], [0.0, 0.0, 0.0])
                self.assertSequenceAlmostEqual(buckets[11],
                                               [0.00608, 6.00608, 6.0])
                self.assertSequenceAlmostEqual(buckets[10001],
                                               [70.0, 77.0, 7.0])
            except AssertionError as err:
                raise util.BeamAssertException(err)

        self._runTestWithCustomCheck(
            examples,
            eval_export_dir, [
                post_export_metrics.calibration_plot_and_prediction_histogram(
                    example_weight_key='fixed_float')
            ],
            custom_plots_check=check_result)
示例#2
0
 def testSerializePlots(self):
     slice_key = _make_slice_key('fruit', 'apple')
     tfma_plots = {
         metric_keys.CALIBRATION_PLOT_MATRICES:
         np.array([
             [0.0, 0.0, 0.0],
             [0.3, 1.0, 1.0],
             [0.7, 0.0, 1.0],
             [0.0, 0.0, 0.0],
         ]),
         metric_keys.CALIBRATION_PLOT_BOUNDARIES:
         np.array([0.0, 0.5, 1.0]),
     }
     expected_plot_data = """
   slice_key {
     single_slice_keys {
       column: 'fruit'
       bytes_value: 'apple'
     }
   }
   plot_data {
     calibration_histogram_buckets {
       buckets {
         lower_threshold_inclusive: -inf
         upper_threshold_exclusive: 0.0
         num_weighted_examples { value: 0.0 }
         total_weighted_label { value: 0.0 }
         total_weighted_refined_prediction { value: 0.0 }
       }
       buckets {
         lower_threshold_inclusive: 0.0
         upper_threshold_exclusive: 0.5
         num_weighted_examples { value: 1.0 }
         total_weighted_label { value: 1.0 }
         total_weighted_refined_prediction { value: 0.3 }
       }
       buckets {
         lower_threshold_inclusive: 0.5
         upper_threshold_exclusive: 1.0
         num_weighted_examples { value: 1.0 }
         total_weighted_label { value: 0.0 }
         total_weighted_refined_prediction { value: 0.7 }
       }
       buckets {
         lower_threshold_inclusive: 1.0
         upper_threshold_exclusive: inf
         num_weighted_examples { value: 0.0 }
         total_weighted_label { value: 0.0 }
         total_weighted_refined_prediction { value: 0.0 }
       }
     }
   }
 """
     calibration_plot = (
         post_export_metrics.calibration_plot_and_prediction_histogram())
     serialized = serialization._serialize_plots((slice_key, tfma_plots),
                                                 [calibration_plot])
     self.assertProtoEquals(
         expected_plot_data,
         metrics_for_slice_pb2.PlotsForSlice.FromString(serialized))
    def testCalibrationPlotAndPredictionHistogramUnweighted(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = (
            fixed_prediction_estimator.simple_fixed_prediction_estimator(
                None, temp_eval_export_dir))
        examples = [
            # For each example, we set label to prediction + 1.
            # These two go in bucket 0: (-inf, 0)
            self._makeExample(prediction=-10.0, label=-9.0),
            self._makeExample(prediction=-9.0, label=-8.0),
            # This goes in bucket 1: [0, 0.00100)
            self._makeExample(prediction=0.00000, label=1.00000),
            # These three go in bucket 1: [0.00100, 0.00110)
            self._makeExample(prediction=0.00100, label=1.00100),
            self._makeExample(prediction=0.00101, label=1.00101),
            self._makeExample(prediction=0.00102, label=1.00102),
            # These two go in bucket 10000: [0.99990, 1.00000)
            self._makeExample(prediction=0.99998, label=1.99998),
            self._makeExample(prediction=0.99999, label=1.99999),
            # These four go in bucket 10001: [1.0000, +inf)
            self._makeExample(prediction=1.0, label=2.0),
            self._makeExample(prediction=8.0, label=9.0),
            self._makeExample(prediction=9.0, label=10.0),
            self._makeExample(prediction=10.0, label=11.0),
        ]

        def check_result(got):  # pylint: disable=invalid-name
            try:
                self.assertEqual(1, len(got), 'got: %s' % got)
                (slice_key, value) = got[0]
                self.assertEqual((), slice_key)
                self.assertIn(metric_keys.CALIBRATION_PLOT_MATRICES, value)
                buckets = value[metric_keys.CALIBRATION_PLOT_MATRICES]
                self.assertSequenceAlmostEqual(buckets[0], [-19.0, -17.0, 2.0])
                self.assertSequenceAlmostEqual(buckets[1], [0.0, 1.0, 1.0])
                self.assertSequenceAlmostEqual(buckets[11],
                                               [0.00303, 3.00303, 3.0])
                self.assertSequenceAlmostEqual(buckets[10000],
                                               [1.99997, 3.99997, 2.0])
                self.assertSequenceAlmostEqual(buckets[10001],
                                               [28.0, 32.0, 4.0])
                self.assertIn(metric_keys.CALIBRATION_PLOT_BOUNDARIES, value)
                boundaries = value[metric_keys.CALIBRATION_PLOT_BOUNDARIES]
                self.assertAlmostEqual(0.0, boundaries[0])
                self.assertAlmostEqual(0.001, boundaries[10])
                self.assertAlmostEqual(0.005, boundaries[50])
                self.assertAlmostEqual(0.010, boundaries[100])
                self.assertAlmostEqual(0.100, boundaries[1000])
                self.assertAlmostEqual(0.800, boundaries[8000])
                self.assertAlmostEqual(1.000, boundaries[10000])
            except AssertionError as err:
                raise util.BeamAssertException(err)

        self._runTestWithCustomCheck(
            examples,
            eval_export_dir,
            [post_export_metrics.calibration_plot_and_prediction_histogram()],
            custom_plots_check=check_result)
示例#4
0
 def testCalibrationPlotSerialization(self):
     # Calibration plots for the model
     # {prediction:0.3, true_label:+},
     # {prediction:0.7, true_label:-}
     #
     # These plots were generated by hand. For this test to make sense
     # it must actually match the kind of output the TFMA produces.
     tfma_plots = {
         metric_keys.CALIBRATION_PLOT_MATRICES:
         np.array([
             [0.0, 0.0, 0.0],
             [0.3, 1.0, 1.0],
             [0.7, 0.0, 1.0],
             [0.0, 0.0, 0.0],
         ]),
         metric_keys.CALIBRATION_PLOT_BOUNDARIES:
         np.array([0.0, 0.5, 1.0]),
     }
     expected_plot_data = """
   calibration_histogram_buckets {
     buckets {
       lower_threshold_inclusive: -inf
       upper_threshold_exclusive: 0.0
       num_weighted_examples { value: 0.0 }
       total_weighted_label { value: 0.0 }
       total_weighted_refined_prediction { value: 0.0 }
     }
     buckets {
       lower_threshold_inclusive: 0.0
       upper_threshold_exclusive: 0.5
       num_weighted_examples { value: 1.0 }
       total_weighted_label { value: 1.0 }
       total_weighted_refined_prediction { value: 0.3 }
     }
     buckets {
       lower_threshold_inclusive: 0.5
       upper_threshold_exclusive: 1.0
       num_weighted_examples { value: 1.0 }
       total_weighted_label { value: 0.0 }
       total_weighted_refined_prediction { value: 0.7 }
     }
     buckets {
       lower_threshold_inclusive: 1.0
       upper_threshold_exclusive: inf
       num_weighted_examples { value: 0.0 }
       total_weighted_label { value: 0.0 }
       total_weighted_refined_prediction { value: 0.0 }
     }
   }
 """
     plot_data = metrics_for_slice_pb2.PlotData()
     calibration_plot = (
         post_export_metrics.calibration_plot_and_prediction_histogram())
     calibration_plot.populate_plots_and_pop(tfma_plots, plot_data)
     self.assertProtoEquals(expected_plot_data, plot_data)
     self.assertFalse(metric_keys.CALIBRATION_PLOT_MATRICES in tfma_plots)
     self.assertFalse(metric_keys.CALIBRATION_PLOT_BOUNDARIES in tfma_plots)
def main():
  tf.logging.set_verbosity(tf.logging.INFO)
  args = parse_arguments()

  tfma_result = run_tfma(input_csv=args.input_csv,
                         tfma_run_dir=args.tfma_run_dir,
                         eval_model_base_dir=args.eval_model_dir,
                         slice_spec=ALL_SPECS,
                         working_dir=args.tfma_run_dir,
                         mode=args.mode, project=args.project,
                         setup_file=args.setup_file,
                         add_metrics_callbacks=[
                            post_export_metrics.calibration_plot_and_prediction_histogram(),
                            post_export_metrics.auc_plots()]
                         )
def main():
    tf.logging.set_verbosity(tf.logging.INFO)
    args = parse_arguments()

    tfma_result = run_tfma(
        input_csv=args.input_csv,
        tfma_run_dir=args.tfma_run_dir,
        eval_model_base_dir=args.eval_model_dir,
        slice_spec=ALL_SPECS,
        working_dir=args.tfma_run_dir,
        mode=args.mode,
        project=args.project,
        setup_file=args.setup_file,
        add_metrics_callbacks=[
            post_export_metrics.calibration_plot_and_prediction_histogram(),
            post_export_metrics.auc_plots()
        ])
示例#7
0
    def testCalibrationPlotAndPredictionHistogramLinearClassifier(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = (linear_classifier.simple_linear_classifier(
            None, temp_eval_export_dir))

        examples = [
            self._makeExample(age=3.0, language='english', label=1.0),
            self._makeExample(age=3.0, language='chinese', label=0.0),
            self._makeExample(age=4.0, language='english', label=1.0),
            self._makeExample(age=5.0, language='chinese', label=0.0)
        ]

        def check_result(got):  # pylint: disable=invalid-name
            try:
                self.assertEqual(1, len(got), 'got: %s' % got)
                (slice_key, value) = got[0]
                self.assertEqual((), slice_key)
                self.assertIn(metric_keys.CALIBRATION_PLOT_MATRICES, value)
                # We just check that the bucket sums look sane, since we don't know
                # the exact predictions of the model.
                #
                # Note that the correctness of the bucketing is tested in the other
                # two tests with the fixed prediction estimator. This test is more
                # for ensuring that this metric is compatible with the canned
                # Estimators, for which the prediction Tensor returned for a batch
                # of examples will be a N x 1 Tensor, rather than just an N element
                # vector.
                buckets = value[metric_keys.CALIBRATION_PLOT_MATRICES]
                bucket_sums = np.sum(buckets, axis=0)
                self.assertAlmostEqual(bucket_sums[1], 2.0)  # label sum
                self.assertAlmostEqual(bucket_sums[2], 4.0)  # weight sum
            except AssertionError as err:
                raise util.BeamAssertException(err)

        self._runTestWithCustomCheck(
            examples,
            eval_export_dir,
            [post_export_metrics.calibration_plot_and_prediction_histogram()],
            custom_plots_check=check_result)
示例#8
0
def process_tfma(eval_result_dir,
                 schema_file,
                 input_csv=None,
                 big_query_table=None,
                 eval_model_dir=None,
                 max_eval_rows=None,
                 pipeline_args=None):
  """Runs a batch job to evaluate the eval_model against the given input.

  Args:
    eval_result_dir: A directory where the evaluation result should be written
      to.
    schema_file: A file containing a text-serialized Schema that describes the
      eval data.
    input_csv: A path to a csv file which should be the input for evaluation.
      This can only be set if big_query_table is None.
    big_query_table: A BigQuery table name specified as DATASET.TABLE which
      should be the input for evaluation. This can only be set if input_csv is
      None.
    eval_model_dir: A directory where the eval model is located.
    max_eval_rows: Number of rows to query from BigQuery.

    pipeline_args: additional DataflowRunner or DirectRunner args passed to the
      beam pipeline.

  Raises:
    ValueError: if input_csv and big_query_table are not specified correctly.
  """

  if input_csv == big_query_table and input_csv is None:
    raise ValueError(
        'one of --input_csv or --big_query_table should be provided.')

  slice_spec = [
      slicer.SingleSliceSpec(),
      slicer.SingleSliceSpec(columns=['trip_start_hour'])
  ]

  schema = taxi.read_schema(schema_file)

  with beam.Pipeline(argv=pipeline_args) as pipeline:
    if input_csv:
      csv_coder = taxi.make_csv_coder(schema)
      raw_data = (
          pipeline
          | 'ReadFromText' >> beam.io.ReadFromText(
              input_csv, skip_header_lines=1)
          | 'ParseCSV' >> beam.Map(csv_coder.decode))
    else:
      assert big_query_table
      query = taxi.make_sql(big_query_table, max_eval_rows, for_eval=True)
      raw_feature_spec = taxi.get_raw_feature_spec(schema)
      raw_data = (
          pipeline
          | 'ReadBigQuery' >> beam.io.Read(
              beam.io.BigQuerySource(query=query, use_standard_sql=True))
          | 'CleanData' >>
          beam.Map(lambda x: (taxi.clean_raw_data_dict(x, raw_feature_spec))))

    # Examples must be in clean tf-example format.
    coder = taxi.make_proto_coder(schema)

    _ = (
        raw_data
        | 'ToSerializedTFExample' >> beam.Map(coder.encode)
        | 'EvaluateAndWriteResults' >> tfma.EvaluateAndWriteResults(
            eval_saved_model_path=eval_model_dir,
            slice_spec=slice_spec,
            add_metrics_callbacks=[
                post_export_metrics.calibration_plot_and_prediction_histogram(),
                post_export_metrics.auc_plots()
            ],
            output_path=eval_result_dir))