示例#1
0
    def testEvaluateNoSlicingAddPostExportAndCustomMetrics(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = linear_classifier.simple_linear_classifier(
            None, temp_eval_export_dir)

        with beam.Pipeline() as pipeline:
            example1 = self._makeExample(age=3.0,
                                         language='english',
                                         label=1.0)
            example2 = self._makeExample(age=3.0,
                                         language='chinese',
                                         label=0.0)
            example3 = self._makeExample(age=4.0,
                                         language='english',
                                         label=1.0)
            example4 = self._makeExample(age=5.0,
                                         language='chinese',
                                         label=0.0)

            metrics, plots = (pipeline
                              | beam.Create([
                                  example1.SerializeToString(),
                                  example2.SerializeToString(),
                                  example3.SerializeToString(),
                                  example4.SerializeToString()
                              ])
                              | evaluate.Evaluate(
                                  eval_saved_model_path=eval_export_dir,
                                  add_metrics_callbacks=[
                                      _addExampleCountMetricCallback,
                                      post_export_metrics.example_count(),
                                      post_export_metrics.example_weight(
                                          example_weight_key='age')
                                  ]))

            def check_result(got):
                try:
                    self.assertEqual(1, len(got), 'got: %s' % got)
                    (slice_key, value) = got[0]
                    self.assertEqual((), slice_key)
                    self.assertDictElementsAlmostEqual(
                        got_values_dict=value,
                        expected_values_dict={
                            'accuracy': 1.0,
                            'label/mean': 0.5,
                            'my_mean_age': 3.75,
                            'my_mean_age_times_label': 1.75,
                            'added_example_count': 4.0,
                            metric_keys.EXAMPLE_COUNT: 4.0,
                            metric_keys.EXAMPLE_WEIGHT: 15.0
                        })
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics, check_result, label='metrics')
            util.assert_that(plots, util.is_empty(), label='plots')
 def testPostExportMetricsLinearClassifier(self):
     temp_eval_export_dir = self._getEvalExportDir()
     _, eval_export_dir = linear_classifier.simple_linear_classifier(
         None, temp_eval_export_dir)
     examples = [
         self._makeExample(age=3.0, language='english', label=1.0),
         self._makeExample(age=3.0, language='chinese', label=0.0),
         self._makeExample(age=4.0, language='english', label=1.0),
         self._makeExample(age=5.0, language='chinese', label=0.0)
     ]
     metrics_to_check = [
         (metric_keys.EXAMPLE_COUNT, post_export_metrics.example_count(),
          4.0),
         (metric_keys.EXAMPLE_WEIGHT,
          post_export_metrics.example_weight('age'), 15.0),
     ]
     self._runTest(examples, eval_export_dir, metrics_to_check)
示例#3
0
 def testPostExportMetricsDNNRegressor(self):
     temp_eval_export_dir = self._getEvalExportDir()
     _, eval_export_dir = dnn_regressor.simple_dnn_regressor(
         None, temp_eval_export_dir)
     examples = [
         self._makeExample(age=3.0, language='english', label=1.0),
         self._makeExample(age=3.0, language='chinese', label=0.0),
         self._makeExample(age=4.0, language='english', label=1.0),
         self._makeExample(age=5.0, language='chinese', label=0.0)
     ]
     expected_values_dict = {
         metric_keys.EXAMPLE_COUNT: 4.0,
         metric_keys.EXAMPLE_WEIGHT: 15.0,
     }
     self._runTest(examples, eval_export_dir, [
         post_export_metrics.example_count(),
         post_export_metrics.example_weight('age')
     ], expected_values_dict)
示例#4
0
def EvaluateAndWriteResults(  # pylint: disable=invalid-name
    examples,
    eval_saved_model_path,
    output_path,
    display_only_data_location = None,
    slice_spec = None,
    example_weight_key = None,
    add_metrics_callbacks = None,  # pylint: disable=bad-whitespace
    desired_batch_size = None,
):
  """Public API version of evaluate.Evaluate that handles example weights.

  Users who want to construct their own Beam pipelines instead of using the
  lightweight run_model_analysis functions should use this PTransform.

  Example usage:

    with beam.Pipeline(runner=...) as p:
      _ = (p
           | 'ReadData' >> beam.io.ReadFromTFRecord(data_location)
           | 'EvaluateAndWriteResults' >> tfma.EvaluateAndWriteResults(
               eval_saved_model_path=model_location,
               output_path=output_path,
               display_only_data_location=data_location,
               slice_spec=slice_spec,
               example_weight_key=example_weight_key,
               ...))
    result = tfma.load_eval_result(output_path=output_path)
    tfma.view.render_slicing_metrics(result)

  Note that the exact serialization format is an internal implementation detail
  and subject to change. Users should only use the TFMA functions to write and
  read the results.

  Args:
    examples: PCollection of input examples. Can be any format the model accepts
      (e.g. string containing CSV row, TensorFlow.Example, etc).
    eval_saved_model_path: Path to EvalSavedModel. This directory should contain
      the saved_model.pb file.
    output_path: Path to output metrics and plots results.
    display_only_data_location: Optional path indicating where the examples
      were read from. This is used only for display purposes - data will not
      actually be read from this path.
    slice_spec: Optional list of SingleSliceSpec specifying the slices to slice
      the data into. If None, defaults to the overall slice.
    example_weight_key: The key of the example weight column. If None, weight
      will be 1 for each example.
    add_metrics_callbacks: Optional list of callbacks for adding additional
      metrics to the graph. The names of the metrics added by the callbacks
      should not conflict with existing metrics, or metrics added by other
      callbacks. See below for more details about what each callback should do.
    desired_batch_size: Optional batch size for batching in Predict and
      Aggregate.

  Returns:
    PDone.
  """

  if add_metrics_callbacks is None:
    add_metrics_callbacks = []

  # Always compute example weight and example count.
  # pytype: disable=module-attr
  example_count_callback = post_export_metrics.example_count()
  example_weight_metric_key = metric_keys.EXAMPLE_COUNT
  add_metrics_callbacks.append(example_count_callback)
  if example_weight_key:
    example_weight_metric_key = metric_keys.EXAMPLE_WEIGHT
    example_weight_callback = post_export_metrics.example_weight(
        example_weight_key)
    add_metrics_callbacks.append(example_weight_callback)
  # pytype: enable=module-attr

  metrics, plots = examples | 'Evaluate' >> evaluate.Evaluate(
      eval_saved_model_path=eval_saved_model_path,
      add_metrics_callbacks=add_metrics_callbacks,
      slice_spec=slice_spec,
      desired_batch_size=desired_batch_size)

  data_location = '<user provided PCollection>'
  if display_only_data_location is not None:
    data_location = display_only_data_location

  eval_config = api_types.EvalConfig(
      model_location=eval_saved_model_path,
      data_location=data_location,
      slice_spec=slice_spec,
      example_weight_metric_key=example_weight_metric_key)

  _ = ((metrics, plots)
       | 'SerializeMetricsAndPlots' >> serialization.SerializeMetricsAndPlots(
           post_export_metrics=add_metrics_callbacks)
       |
       'WriteMetricsPlotsAndConfig' >> serialization.WriteMetricsPlotsAndConfig(
           output_path=output_path, eval_config=eval_config))

  return beam.pvalue.PDone(examples.pipeline)