Пример #1
0
  def Do(self, input_dict,
         output_dict,
         exec_properties):
    """Runs a batch job to evaluate the eval_model against the given input.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - model_exports: exported model.
        - examples: examples for eval the model.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: model evaluation results.
      exec_properties: A dict of execution properties.
        - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec
          instance, providing the way to slice the data.

    Returns:
      None
    """
    if 'model_exports' not in input_dict:
      raise ValueError('\'model_exports\' is missing in input dict.')
    if 'examples' not in input_dict:
      raise ValueError('\'examples\' is missing in input dict.')
    if 'output' not in output_dict:
      raise ValueError('\'output\' is missing in output dict.')

    self._log_startup(input_dict, output_dict, exec_properties)

    # Extract input artifacts
    model_exports_uri = types.get_single_uri(input_dict['model_exports'])

    feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec()
    json_format.Parse(exec_properties['feature_slicing_spec'],
                      feature_slicing_spec)
    slice_spec = self._get_slice_spec_from_feature_slicing_spec(
        feature_slicing_spec)

    output_uri = types.get_single_uri(output_dict['output'])

    eval_model_path = path_utils.eval_model_path(model_exports_uri)

    tf.logging.info('Using {} for model eval.'.format(eval_model_path))
    eval_shared_model = tfma.default_eval_shared_model(
        eval_saved_model_path=eval_model_path)

    tf.logging.info('Evaluating model.')
    with beam.Pipeline(argv=self._get_beam_pipeline_args()) as pipeline:
      # pylint: disable=expression-not-assigned
      (pipeline
       | 'ReadData' >> beam.io.ReadFromTFRecord(
           file_pattern=io_utils.all_files_pattern(
               types.get_split_uri(input_dict['examples'], 'eval')))
       |
       'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults(
           eval_shared_model=eval_shared_model,
           slice_spec=slice_spec,
           output_path=output_uri))
    tf.logging.info(
        'Evaluation complete. Results written to {}.'.format(output_uri))
Пример #2
0
    def benchmarkAggregateCombineManualActuation(self):
        """Benchmark the aggregate combine stage "manually".

    Runs _AggregateCombineFn "manually" outside a Beam pipeline. Records the
    wall time taken.
    """

        # Run InputsToExtracts manually.
        records = []
        for x in self._dataset.read_raw_dataset(deserialize=False,
                                                limit=MAX_NUM_EXAMPLES):
            records.append({tfma.constants.INPUT_KEY: x})

        fn = tfma.extractors.predict_extractor._TFMAPredictionDoFn(  # pylint: disable=protected-access
            eval_shared_models={"": tfma.default_eval_shared_model(
                eval_saved_model_path=self._dataset.tfma_saved_model_path())},
            eval_config=None)
        fn.setup()

        # Predict
        predict_batch_size = 1000
        predict_result = []
        for batch in benchmark_utils.batched_iterator(records,
                                                      predict_batch_size):
            predict_result.extend(fn.process(batch))

        # AggregateCombineFn
        #
        # We simulate accumulating records into multiple different accumulators,
        # each with inputs_per_accumulator records, and then merging the resulting
        # accumulators together at one go.

        # Number of elements to feed into a single accumulator.
        # (This means we will have len(records) / inputs_per_accumulator
        # accumulators to merge).
        inputs_per_accumulator = 1000

        combiner = tfma.evaluators.aggregate._AggregateCombineFn(  # pylint: disable=protected-access
            eval_shared_model=tfma.default_eval_shared_model(
                eval_saved_model_path=self._dataset.tfma_saved_model_path()))
        accumulators = []

        start = time.time()
        for batch in benchmark_utils.batched_iterator(predict_result,
                                                      inputs_per_accumulator):
            accumulator = combiner.create_accumulator()
            for elem in batch:
                combiner.add_input(accumulator, elem)
            accumulators.append(accumulator)
        final_accumulator = combiner.merge_accumulators(accumulators)
        final_output = combiner.extract_output(final_accumulator)
        end = time.time()
        delta = end - start

        # Extract output to sanity check example count. This is not timed.
        extract_fn = tfma.evaluators.aggregate._ExtractOutputDoFn(  # pylint: disable=protected-access
            eval_shared_model=tfma.default_eval_shared_model(
                eval_saved_model_path=self._dataset.tfma_saved_model_path()))
        extract_fn.setup()
        interpreted_output = list(extract_fn.process(((), final_output)))
        if len(interpreted_output) != 1:
            raise ValueError("expecting exactly 1 interpreted output, got %d" %
                             (len(interpreted_output)))
        got_example_count = interpreted_output[0][1].get(
            "post_export_metrics/example_count")
        if got_example_count != self._dataset.num_examples(
                limit=MAX_NUM_EXAMPLES):
            raise ValueError(
                "example count mismatch: expecting %d got %d" %
                (self._dataset.num_examples(limit=MAX_NUM_EXAMPLES),
                 got_example_count))

        self.report_benchmark(
            iters=1,
            wall_time=delta,
            extras={
                "inputs_per_accumulator": inputs_per_accumulator,
                "num_examples":
                self._dataset.num_examples(limit=MAX_NUM_EXAMPLES)
            })
Пример #3
0
  def Do(self, input_dict: Dict[Text, List[types.Artifact]],
         output_dict: Dict[Text, List[types.Artifact]],
         exec_properties: Dict[Text, Any]) -> None:
    """Runs a batch job to evaluate the eval_model against the given input.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - model_exports: exported model.
        - examples: examples for eval the model.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: model evaluation results.
      exec_properties: A dict of execution properties.
        - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec
          instance, providing the way to slice the data.

    Returns:
      None
    """
    if 'model_exports' not in input_dict:
      raise ValueError('\'model_exports\' is missing in input dict.')
    if 'examples' not in input_dict:
      raise ValueError('\'examples\' is missing in input dict.')
    if 'output' not in output_dict:
      raise ValueError('\'output\' is missing in output dict.')

    self._log_startup(input_dict, output_dict, exec_properties)

    # Extract input artifacts
    model_exports_uri = artifact_utils.get_single_uri(
        input_dict['model_exports'])

    feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec()
    json_format.Parse(exec_properties['feature_slicing_spec'],
                      feature_slicing_spec)
    slice_spec = self._get_slice_spec_from_feature_slicing_spec(
        feature_slicing_spec)

    output_uri = artifact_utils.get_single_uri(output_dict['output'])

    eval_model_path = path_utils.eval_model_path(model_exports_uri)

    # Add fairness indicator metric callback if necessary.
    fairness_indicator_thresholds = exec_properties.get(
        'fairness_indicator_thresholds', None)
    add_metrics_callbacks = None
    if fairness_indicator_thresholds:
      # Need to import the following module so that the fairness indicator
      # post-export metric is registered.
      import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators  # pylint: disable=g-import-not-at-top, unused-variable
      add_metrics_callbacks = [
          tfma.post_export_metrics.fairness_indicators(  # pytype: disable=module-attr
              thresholds=fairness_indicator_thresholds),
      ]

    absl.logging.info('Using {} for model eval.'.format(eval_model_path))
    eval_shared_model = tfma.default_eval_shared_model(
        eval_saved_model_path=eval_model_path,
        add_metrics_callbacks=add_metrics_callbacks)

    absl.logging.info('Evaluating model.')
    with self._make_beam_pipeline() as pipeline:
      # pylint: disable=expression-not-assigned
      (pipeline
       | 'ReadData' >> beam.io.ReadFromTFRecord(
           file_pattern=io_utils.all_files_pattern(
               artifact_utils.get_split_uri(input_dict['examples'], 'eval')))
       |
       'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults(
           eval_shared_model=eval_shared_model,
           slice_spec=slice_spec,
           output_path=output_uri))
    absl.logging.info(
        'Evaluation complete. Results written to {}.'.format(output_uri))
Пример #4
0
def process_tfma(eval_result_dir,
                 schema_file,
                 input_csv=None,
                 big_query_table=None,
                 eval_model_dir=None,
                 max_eval_rows=None,
                 pipeline_args=None):
    """Runs a batch job to evaluate the eval_model against the given input.

  Args:
    eval_result_dir: A directory where the evaluation result should be written
      to.
    schema_file: A file containing a text-serialized Schema that describes the
      eval data.
    input_csv: A path to a csv file which should be the input for evaluation.
      This can only be set if big_query_table is None.
    big_query_table: A BigQuery table name specified as DATASET.TABLE which
      should be the input for evaluation. This can only be set if input_csv is
      None.
    eval_model_dir: A directory where the eval model is located.
    max_eval_rows: Number of rows to query from BigQuery.
    pipeline_args: additional DataflowRunner or DirectRunner args passed to the
      beam pipeline.

  Raises:
    ValueError: if input_csv and big_query_table are not specified correctly.
  """

    if input_csv == big_query_table and input_csv is None:
        raise ValueError(
            'one of --input_csv or --big_query_table should be provided.')

    slice_spec = [
        tfma.slicer.slicer.SingleSliceSpec(),
        tfma.slicer.slicer.SingleSliceSpec(columns=['trip_start_hour'])
    ]

    schema = taxi.read_schema(schema_file)

    eval_shared_model = tfma.default_eval_shared_model(
        eval_saved_model_path=eval_model_dir,
        add_metrics_callbacks=[
            tfma.post_export_metrics.calibration_plot_and_prediction_histogram(
            ),
            tfma.post_export_metrics.auc_plots()
        ])

    with beam.Pipeline(argv=pipeline_args) as pipeline:
        if input_csv:
            csv_coder = taxi.make_csv_coder(schema, input_csv)
            raw_data = (pipeline
                        | 'ReadFromText' >> beam.io.ReadFromText(
                            input_csv, skip_header_lines=1)
                        | 'ParseCSV' >> beam.Map(csv_coder.decode))
        else:
            assert big_query_table
            query = taxi.make_sql(big_query_table,
                                  max_eval_rows,
                                  for_eval=True)
            raw_feature_spec = taxi.get_raw_feature_spec(schema)
            raw_data = (
                pipeline
                | 'ReadBigQuery' >> beam.io.Read(
                    beam.io.BigQuerySource(query=query, use_standard_sql=True))
                | 'CleanData' >> beam.Map(
                    lambda x: (taxi.clean_raw_data_dict(x, raw_feature_spec))))

        # Examples must be in clean tf-example format.
        coder = taxi.make_proto_coder(schema)

        _ = (raw_data
             | 'ToSerializedTFExample' >> beam.Map(coder.encode)
             | 'ExtractEvaluateAndWriteResults' >>
             tfma.ExtractEvaluateAndWriteResults(
                 eval_shared_model=eval_shared_model,
                 slice_spec=slice_spec,
                 output_path=eval_result_dir))
Пример #5
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Runs a batch job to evaluate the eval_model against the given input.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - model_exports: exported model.
        - examples: examples for eval the model.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: model evaluation results.
      exec_properties: A dict of execution properties.
        - eval_config: JSON string of tfma.EvalConfig.
        - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec
          instance, providing the way to slice the data. Deprecated, use
          eval_config.slicing_specs instead.

    Returns:
      None
    """
        if constants.EXAMPLES_KEY not in input_dict:
            raise ValueError('EXAMPLES_KEY is missing from input dict.')
        if constants.MODEL_KEY not in input_dict:
            raise ValueError('MODEL_KEY is missing from input dict.')
        if constants.EVALUATION_KEY not in output_dict:
            raise ValueError('EVALUATION_KEY is missing from output dict.')
        if len(input_dict[constants.MODEL_KEY]) > 1:
            raise ValueError(
                'There can be only one candidate model, there are {}.'.format(
                    len(input_dict[constants.MODEL_KEY])))
        if constants.BASELINE_MODEL_KEY in input_dict and len(
                input_dict[constants.BASELINE_MODEL_KEY]) > 1:
            raise ValueError(
                'There can be only one baseline model, there are {}.'.format(
                    len(input_dict[constants.BASELINE_MODEL_KEY])))

        self._log_startup(input_dict, output_dict, exec_properties)

        # Add fairness indicator metric callback if necessary.
        fairness_indicator_thresholds = exec_properties.get(
            'fairness_indicator_thresholds', None)
        add_metrics_callbacks = None
        if fairness_indicator_thresholds:
            # Need to import the following module so that the fairness indicator
            # post-export metric is registered.
            import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators  # pylint: disable=g-import-not-at-top, unused-variable
            add_metrics_callbacks = [
                tfma.post_export_metrics.fairness_indicators(  # pytype: disable=module-attr
                    thresholds=fairness_indicator_thresholds),
            ]

        output_uri = artifact_utils.get_single_uri(
            output_dict[constants.EVALUATION_KEY])

        run_validation = False
        models = []
        if 'eval_config' in exec_properties and exec_properties['eval_config']:
            slice_spec = None
            has_baseline = bool(input_dict.get(constants.BASELINE_MODEL_KEY))
            eval_config = tfma.EvalConfig()
            json_format.Parse(exec_properties['eval_config'], eval_config)
            eval_config = tfma.update_eval_config_with_defaults(
                eval_config,
                maybe_add_baseline=has_baseline,
                maybe_remove_baseline=not has_baseline)
            tfma.verify_eval_config(eval_config)
            # Do not validate model when there is no thresholds configured. This is to
            # avoid accidentally blessing models when users forget to set thresholds.
            run_validation = bool(
                tfma.metrics.metric_thresholds_from_metrics_specs(
                    eval_config.metrics_specs))
            if len(eval_config.model_specs) > 2:
                raise ValueError(
                    """Cannot support more than two models. There are {} models in this
             eval_config.""".format(len(eval_config.model_specs)))
            # Extract model artifacts.
            for model_spec in eval_config.model_specs:
                if model_spec.is_baseline:
                    model_uri = artifact_utils.get_single_uri(
                        input_dict[constants.BASELINE_MODEL_KEY])
                else:
                    model_uri = artifact_utils.get_single_uri(
                        input_dict[constants.MODEL_KEY])
                if tfma.get_model_type(model_spec) == tfma.TF_ESTIMATOR:
                    model_path = path_utils.eval_model_path(model_uri)
                else:
                    model_path = path_utils.serving_model_path(model_uri)
                absl.logging.info('Using {} as {} model.'.format(
                    model_path, model_spec.name))
                models.append(
                    tfma.default_eval_shared_model(
                        model_name=model_spec.name,
                        eval_saved_model_path=model_path,
                        add_metrics_callbacks=add_metrics_callbacks,
                        eval_config=eval_config))
        else:
            eval_config = None
            assert ('feature_slicing_spec' in exec_properties
                    and exec_properties['feature_slicing_spec']
                    ), 'both eval_config and feature_slicing_spec are unset.'
            feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec()
            json_format.Parse(exec_properties['feature_slicing_spec'],
                              feature_slicing_spec)
            slice_spec = self._get_slice_spec_from_feature_slicing_spec(
                feature_slicing_spec)
            model_uri = artifact_utils.get_single_uri(
                input_dict[constants.MODEL_KEY])
            model_path = path_utils.eval_model_path(model_uri)
            absl.logging.info('Using {} for model eval.'.format(model_path))
            models.append(
                tfma.default_eval_shared_model(
                    eval_saved_model_path=model_path,
                    add_metrics_callbacks=add_metrics_callbacks))

        file_pattern = io_utils.all_files_pattern(
            artifact_utils.get_split_uri(input_dict[constants.EXAMPLES_KEY],
                                         'eval'))
        eval_shared_model = models[0] if len(models) == 1 else models
        schema = None
        if constants.SCHEMA_KEY in input_dict:
            schema = io_utils.SchemaReader().read(
                io_utils.get_only_uri_in_dir(
                    artifact_utils.get_single_uri(
                        input_dict[constants.SCHEMA_KEY])))

        absl.logging.info('Evaluating model.')
        with self._make_beam_pipeline() as pipeline:
            # pylint: disable=expression-not-assigned
            if _USE_TFXIO:
                tensor_adapter_config = None
                if tfma.is_batched_input(eval_shared_model, eval_config):
                    tfxio = tf_example_record.TFExampleRecord(
                        file_pattern=file_pattern,
                        schema=schema,
                        raw_record_column_name=tfma.BATCHED_INPUT_KEY)
                    if schema is not None:
                        tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
                            arrow_schema=tfxio.ArrowSchema(),
                            tensor_representations=tfxio.TensorRepresentations(
                            ))
                    data = pipeline | 'ReadFromTFRecordToArrow' >> tfxio.BeamSource(
                    )
                else:
                    data = pipeline | 'ReadFromTFRecord' >> beam.io.ReadFromTFRecord(
                        file_pattern=file_pattern)
                (data
                 | 'ExtractEvaluateAndWriteResults' >>
                 tfma.ExtractEvaluateAndWriteResults(
                     eval_shared_model=models[0]
                     if len(models) == 1 else models,
                     eval_config=eval_config,
                     output_path=output_uri,
                     slice_spec=slice_spec,
                     tensor_adapter_config=tensor_adapter_config))
            else:
                data = pipeline | 'ReadFromTFRecord' >> beam.io.ReadFromTFRecord(
                    file_pattern=file_pattern)
                (data
                 | 'ExtractEvaluateAndWriteResults' >>
                 tfma.ExtractEvaluateAndWriteResults(
                     eval_shared_model=models[0]
                     if len(models) == 1 else models,
                     eval_config=eval_config,
                     output_path=output_uri,
                     slice_spec=slice_spec))
        absl.logging.info(
            'Evaluation complete. Results written to {}.'.format(output_uri))

        if not run_validation:
            # TODO(jinhuang): delete the BLESSING_KEY from output_dict when supported.
            absl.logging.info(
                'No threshold configured, will not validate model.')
            return
        # Set up blessing artifact
        blessing = artifact_utils.get_single_instance(
            output_dict[constants.BLESSING_KEY])
        blessing.set_string_custom_property(
            constants.ARTIFACT_PROPERTY_CURRENT_MODEL_URI_KEY,
            artifact_utils.get_single_uri(input_dict[constants.MODEL_KEY]))
        blessing.set_int_custom_property(
            constants.ARTIFACT_PROPERTY_CURRENT_MODEL_ID_KEY,
            input_dict[constants.MODEL_KEY][0].id)
        if input_dict.get(constants.BASELINE_MODEL_KEY):
            baseline_model = input_dict[constants.BASELINE_MODEL_KEY][0]
            blessing.set_string_custom_property(
                constants.ARTIFACT_PROPERTY_BASELINE_MODEL_URI_KEY,
                baseline_model.uri)
            blessing.set_int_custom_property(
                constants.ARTIFACT_PROPERTY_BASELINE_MODEL_ID_KEY,
                baseline_model.id)
        if 'current_component_id' in exec_properties:
            blessing.set_string_custom_property(
                'component_id', exec_properties['current_component_id'])
        # Check validation result and write BLESSED file accordingly.
        absl.logging.info('Checking validation results.')
        validation_result = tfma.load_validation_result(output_uri)
        if validation_result.validation_ok:
            io_utils.write_string_file(
                os.path.join(blessing.uri, constants.BLESSED_FILE_NAME), '')
            blessing.set_int_custom_property(
                constants.ARTIFACT_PROPERTY_BLESSED_KEY,
                constants.BLESSED_VALUE)
        else:
            io_utils.write_string_file(
                os.path.join(blessing.uri, constants.NOT_BLESSED_FILE_NAME),
                '')
            blessing.set_int_custom_property(
                constants.ARTIFACT_PROPERTY_BLESSED_KEY,
                constants.NOT_BLESSED_VALUE)
        absl.logging.info('Blessing result {} written to {}.'.format(
            validation_result.validation_ok, blessing.uri))
Пример #6
0
def process_tfma(
    schema_file,
    big_query_table=None,
    eval_model_dir=None,
    max_eval_rows=None,
    pipeline_args=None,
    publish_to_bq=False,
    project=None,
    metrics_table=None,
    metrics_dataset=None):
  """Runs a batch job to evaluate the eval_model against the given input.

  Args:
  schema_file: A file containing a text-serialized Schema that describes the
      eval data.
  big_query_table: A BigQuery table name specified as DATASET.TABLE which
      should be the input for evaluation. This can only be set if input_csv is
      None.
  eval_model_dir: A directory where the eval model is located.
  max_eval_rows: Number of rows to query from BigQuery.
  pipeline_args: additional DataflowRunner or DirectRunner args passed to
  the beam pipeline.
  publish_to_bq:
  project:
  metrics_dataset:
  metrics_table:

  Raises:
  ValueError: if input_csv and big_query_table are not specified correctly.
  """

  if big_query_table is None:
    raise ValueError('--big_query_table should be provided.')

  slice_spec = [
      tfma.slicer.SingleSliceSpec(),
      tfma.slicer.SingleSliceSpec(columns=['trip_start_hour'])
  ]
  metrics_namespace = metrics_table

  schema = taxi.read_schema(schema_file)

  eval_shared_model = tfma.default_eval_shared_model(
      eval_saved_model_path=eval_model_dir,
      add_metrics_callbacks=[
          tfma.post_export_metrics.calibration_plot_and_prediction_histogram(),
          tfma.post_export_metrics.auc_plots()
      ])

  metrics_monitor = None
  if publish_to_bq:
    metrics_monitor = MetricsReader(
        publish_to_bq=publish_to_bq,
        project_name=project,
        bq_table=metrics_table,
        bq_dataset=metrics_dataset,
        namespace=metrics_namespace,
        filters=MetricsFilter().with_namespace(metrics_namespace))

  pipeline = beam.Pipeline(argv=pipeline_args)

  query = taxi.make_sql(big_query_table, max_eval_rows, for_eval=True)
  raw_feature_spec = taxi.get_raw_feature_spec(schema)
  raw_data = (
      pipeline
      | 'ReadBigQuery' >> ReadFromBigQuery(
          query=query, project=project, use_standard_sql=True)
      | 'Measure time: Start' >> beam.ParDo(MeasureTime(metrics_namespace))
      | 'CleanData' >>
      beam.Map(lambda x: (taxi.clean_raw_data_dict(x, raw_feature_spec))))

  # Examples must be in clean tf-example format.
  coder = taxi.make_proto_coder(schema)
  # Prepare arguments for Extract, Evaluate and Write steps
  extractors = tfma.default_extractors(
      eval_shared_model=eval_shared_model,
      slice_spec=slice_spec,
      desired_batch_size=None,
      materialize=False)

  evaluators = tfma.default_evaluators(
      eval_shared_model=eval_shared_model,
      desired_batch_size=None,
      num_bootstrap_samples=1)
  _ = (
      raw_data
      | 'ToSerializedTFExample' >> beam.Map(coder.encode)
      | 'Extract Results' >> tfma.InputsToExtracts()
      | 'Extract and evaluate' >> tfma.ExtractAndEvaluate(
          extractors=extractors, evaluators=evaluators)
      | 'Map Evaluations to PCollection' >> MapEvalToPCollection()
      | 'Measure time: End' >> beam.ParDo(MeasureTime(metrics_namespace)))
  result = pipeline.run()
  result.wait_until_finish()
  if metrics_monitor:
    metrics_monitor.publish_metrics(result)
Пример #7
0
def process_tfma(eval_result_dir,
                 schema_file,
                 input_csv=None,
                 big_query_table=None,
                 eval_model_dir=None,
                 max_eval_rows=None,
                 pipeline_args=None):
  """Runs a batch job to evaluate the eval_model against the given input.

  Args:
    eval_result_dir: A directory where the evaluation result should be written
      to.
    schema_file: A file containing a text-serialized Schema that describes the
      eval data.
    input_csv: A path to a csv file which should be the input for evaluation.
      This can only be set if big_query_table is None.
    big_query_table: A BigQuery table name specified as DATASET.TABLE which
      should be the input for evaluation. This can only be set if input_csv is
      None.
    eval_model_dir: A directory where the eval model is located.
    max_eval_rows: Number of rows to query from BigQuery.
    pipeline_args: additional DataflowRunner or DirectRunner args passed to the
      beam pipeline.

  Raises:
    ValueError: if input_csv and big_query_table are not specified correctly.
  """

  if input_csv == big_query_table and input_csv is None:
    raise ValueError(
        'one of --input_csv or --big_query_table should be provided.')

  slice_spec = [
      tfma.slicer.SingleSliceSpec(),
      tfma.slicer.SingleSliceSpec(columns=['trip_start_hour'])
  ]

  schema = taxi.read_schema(schema_file)

  eval_shared_model = tfma.default_eval_shared_model(
      eval_saved_model_path=eval_model_dir,
      add_metrics_callbacks=[
          tfma.post_export_metrics.calibration_plot_and_prediction_histogram(),
          tfma.post_export_metrics.auc_plots()
      ])

  with beam.Pipeline(argv=pipeline_args) as pipeline:
    if input_csv:
      csv_coder = taxi.make_csv_coder(schema)
      raw_data = (
          pipeline
          | 'ReadFromText' >> beam.io.ReadFromText(
              input_csv, skip_header_lines=1)
          | 'ParseCSV' >> beam.Map(csv_coder.decode))
    else:
      assert big_query_table
      query = taxi.make_sql(big_query_table, max_eval_rows, for_eval=True)
      raw_feature_spec = taxi.get_raw_feature_spec(schema)
      raw_data = (
          pipeline
          | 'ReadBigQuery' >> beam.io.Read(
              beam.io.BigQuerySource(query=query, use_standard_sql=True))
          | 'CleanData' >>
          beam.Map(lambda x: (taxi.clean_raw_data_dict(x, raw_feature_spec))))

    # Examples must be in clean tf-example format.
    coder = taxi.make_proto_coder(schema)

    _ = (
        raw_data
        | 'ToSerializedTFExample' >> beam.Map(coder.encode)
        |
        'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults(
            eval_shared_model=eval_shared_model,
            slice_spec=slice_spec,
            output_path=eval_result_dir))
Пример #8
0
  def Do(self, input_dict: Dict[Text, List[types.Artifact]],
         output_dict: Dict[Text, List[types.Artifact]],
         exec_properties: Dict[Text, Any]) -> None:
    """Runs a batch job to evaluate the eval_model against the given input.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - model_exports: exported model.
        - examples: examples for eval the model.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: model evaluation results.
      exec_properties: A dict of execution properties.
        - eval_config: JSON string of tfma.EvalConfig.
        - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec
          instance, providing the way to slice the data. Deprecated, use
          eval_config.slicing_specs instead.
        - example_splits: JSON-serialized list of names of splits on which the
          metrics are computed. Default behavior (when example_splits is set to
          None) is using the 'eval' split.

    Returns:
      None
    """
    if constants.EXAMPLES_KEY not in input_dict:
      raise ValueError('EXAMPLES_KEY is missing from input dict.')
    if constants.MODEL_KEY not in input_dict:
      raise ValueError('MODEL_KEY is missing from input dict.')
    if constants.EVALUATION_KEY not in output_dict:
      raise ValueError('EVALUATION_KEY is missing from output dict.')
    if len(input_dict[constants.MODEL_KEY]) > 1:
      raise ValueError('There can be only one candidate model, there are %d.' %
                       (len(input_dict[constants.MODEL_KEY])))
    if constants.BASELINE_MODEL_KEY in input_dict and len(
        input_dict[constants.BASELINE_MODEL_KEY]) > 1:
      raise ValueError('There can be only one baseline model, there are %d.' %
                       (len(input_dict[constants.BASELINE_MODEL_KEY])))

    self._log_startup(input_dict, output_dict, exec_properties)

    # Add fairness indicator metric callback if necessary.
    fairness_indicator_thresholds = exec_properties.get(
        'fairness_indicator_thresholds', None)
    add_metrics_callbacks = None
    if fairness_indicator_thresholds:
      add_metrics_callbacks = [
          tfma.post_export_metrics.fairness_indicators(  # pytype: disable=module-attr
              thresholds=fairness_indicator_thresholds),
      ]

    output_uri = artifact_utils.get_single_uri(
        output_dict[constants.EVALUATION_KEY])

    run_validation = False
    models = []
    if 'eval_config' in exec_properties and exec_properties['eval_config']:
      slice_spec = None
      has_baseline = bool(input_dict.get(constants.BASELINE_MODEL_KEY))
      eval_config = tfma.EvalConfig()
      json_format.Parse(exec_properties['eval_config'], eval_config)
      eval_config = tfma.update_eval_config_with_defaults(
          eval_config,
          maybe_add_baseline=has_baseline,
          maybe_remove_baseline=not has_baseline)
      tfma.verify_eval_config(eval_config)
      # Do not validate model when there is no thresholds configured. This is to
      # avoid accidentally blessing models when users forget to set thresholds.
      run_validation = bool(tfma.metrics.metric_thresholds_from_metrics_specs(
          eval_config.metrics_specs))
      if len(eval_config.model_specs) > 2:
        raise ValueError(
            """Cannot support more than two models. There are %d models in this
             eval_config.""" % (len(eval_config.model_specs)))
      # Extract model artifacts.
      for model_spec in eval_config.model_specs:
        if model_spec.is_baseline:
          model_uri = artifact_utils.get_single_uri(
              input_dict[constants.BASELINE_MODEL_KEY])
        else:
          model_uri = artifact_utils.get_single_uri(
              input_dict[constants.MODEL_KEY])
        if tfma.get_model_type(model_spec) == tfma.TF_ESTIMATOR:
          model_path = path_utils.eval_model_path(model_uri)
        else:
          model_path = path_utils.serving_model_path(model_uri)
        logging.info('Using %s as %s model.', model_path, model_spec.name)
        models.append(tfma.default_eval_shared_model(
            model_name=model_spec.name,
            eval_saved_model_path=model_path,
            add_metrics_callbacks=add_metrics_callbacks,
            eval_config=eval_config))
    else:
      eval_config = None
      assert ('feature_slicing_spec' in exec_properties and
              exec_properties['feature_slicing_spec']
             ), 'both eval_config and feature_slicing_spec are unset.'
      feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec()
      json_format.Parse(exec_properties['feature_slicing_spec'],
                        feature_slicing_spec)
      slice_spec = self._get_slice_spec_from_feature_slicing_spec(
          feature_slicing_spec)
      model_uri = artifact_utils.get_single_uri(input_dict[constants.MODEL_KEY])
      model_path = path_utils.eval_model_path(model_uri)
      logging.info('Using %s for model eval.', model_path)
      models.append(tfma.default_eval_shared_model(
          eval_saved_model_path=model_path,
          add_metrics_callbacks=add_metrics_callbacks))

    eval_shared_model = models[0] if len(models) == 1 else models
    schema = None
    if constants.SCHEMA_KEY in input_dict:
      schema = io_utils.SchemaReader().read(
          io_utils.get_only_uri_in_dir(
              artifact_utils.get_single_uri(input_dict[constants.SCHEMA_KEY])))

    # Load and deserialize example splits from execution properties.
    example_splits = json_utils.loads(
        exec_properties.get(constants.EXAMPLE_SPLITS_KEY, 'null'))
    if not example_splits:
      example_splits = ['eval']
      logging.info("The 'example_splits' parameter is not set, using 'eval' "
                   'split.')

    logging.info('Evaluating model.')
    with self._make_beam_pipeline() as pipeline:
      examples_list = []
      tensor_adapter_config = None
      # pylint: disable=expression-not-assigned
      if _USE_TFXIO and tfma.is_batched_input(eval_shared_model, eval_config):
        tfxio_factory = tfxio_utils.get_tfxio_factory_from_artifact(
            examples=[
                artifact_utils.get_single_instance(
                    input_dict[constants.EXAMPLES_KEY])
            ],
            telemetry_descriptors=_TELEMETRY_DESCRIPTORS,
            schema=schema,
            raw_record_column_name=tfma_constants.ARROW_INPUT_COLUMN)
        # TODO(b/161935932): refactor after TFXIO supports multiple patterns.
        for split in example_splits:
          file_pattern = io_utils.all_files_pattern(
              artifact_utils.get_split_uri(input_dict[constants.EXAMPLES_KEY],
                                           split))
          tfxio = tfxio_factory(file_pattern)
          data = (
              pipeline
              | 'ReadFromTFRecordToArrow[%s]' % split >> tfxio.BeamSource())
          examples_list.append(data)
        if schema is not None:
          # Use last tfxio as TensorRepresentations and ArrowSchema are fixed.
          tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
              arrow_schema=tfxio.ArrowSchema(),
              tensor_representations=tfxio.TensorRepresentations())
      else:
        for split in example_splits:
          file_pattern = io_utils.all_files_pattern(
              artifact_utils.get_split_uri(input_dict[constants.EXAMPLES_KEY],
                                           split))
          data = (
              pipeline
              | 'ReadFromTFRecord[%s]' % split >>
              beam.io.ReadFromTFRecord(file_pattern=file_pattern))
          examples_list.append(data)

      (examples_list | 'FlattenExamples' >> beam.Flatten()
       |
       'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults(
           eval_shared_model=models[0] if len(models) == 1 else models,
           eval_config=eval_config,
           output_path=output_uri,
           slice_spec=slice_spec,
           tensor_adapter_config=tensor_adapter_config))
    logging.info('Evaluation complete. Results written to %s.', output_uri)

    if not run_validation:
      # TODO(jinhuang): delete the BLESSING_KEY from output_dict when supported.
      logging.info('No threshold configured, will not validate model.')
      return
    # Set up blessing artifact
    blessing = artifact_utils.get_single_instance(
        output_dict[constants.BLESSING_KEY])
    blessing.set_string_custom_property(
        constants.ARTIFACT_PROPERTY_CURRENT_MODEL_URI_KEY,
        artifact_utils.get_single_uri(input_dict[constants.MODEL_KEY]))
    blessing.set_int_custom_property(
        constants.ARTIFACT_PROPERTY_CURRENT_MODEL_ID_KEY,
        input_dict[constants.MODEL_KEY][0].id)
    if input_dict.get(constants.BASELINE_MODEL_KEY):
      baseline_model = input_dict[constants.BASELINE_MODEL_KEY][0]
      blessing.set_string_custom_property(
          constants.ARTIFACT_PROPERTY_BASELINE_MODEL_URI_KEY,
          baseline_model.uri)
      blessing.set_int_custom_property(
          constants.ARTIFACT_PROPERTY_BASELINE_MODEL_ID_KEY, baseline_model.id)
    if 'current_component_id' in exec_properties:
      blessing.set_string_custom_property(
          'component_id', exec_properties['current_component_id'])
    # Check validation result and write BLESSED file accordingly.
    logging.info('Checking validation results.')
    validation_result = tfma.load_validation_result(output_uri)
    if validation_result.validation_ok:
      io_utils.write_string_file(
          os.path.join(blessing.uri, constants.BLESSED_FILE_NAME), '')
      blessing.set_int_custom_property(constants.ARTIFACT_PROPERTY_BLESSED_KEY,
                                       constants.BLESSED_VALUE)
    else:
      io_utils.write_string_file(
          os.path.join(blessing.uri, constants.NOT_BLESSED_FILE_NAME), '')
      blessing.set_int_custom_property(constants.ARTIFACT_PROPERTY_BLESSED_KEY,
                                       constants.NOT_BLESSED_VALUE)
    logging.info('Blessing result %s written to %s.',
                 validation_result.validation_ok, blessing.uri)