Exemplos de MetricsAndPlotsEvaluator em Python, exemplos de tensorflow_model_analysis.evaluators.metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator em Python

Exemplo n.º 1

0

Exibir arquivo

def default_evaluators(  # pylint: disable=invalid-name
    eval_shared_model: Optional[Union[types.EvalSharedModel,
                                      Dict[Text,
                                           types.EvalSharedModel]]] = None,
    eval_config: config.EvalConfig = None,
    compute_confidence_intervals: Optional[bool] = False,
    k_anonymization_count: int = 1,
    desired_batch_size: Optional[int] = None,
    serialize: bool = False,
    random_seed_for_testing: Optional[int] = None) -> List[
        evaluator.Evaluator]:
    """Returns the default evaluators for use in ExtractAndEvaluate.

  Args:
    eval_shared_model: Optional shared model (single-model evaluation) or dict
      of shared models keyed by model name (multi-model evaluation). Only
      required if there are metrics to be computed in-graph using the model.
    eval_config: Eval config.
    compute_confidence_intervals: Deprecated (use eval_config).
    k_anonymization_count: Deprecated (use eval_config).
    desired_batch_size: Optional batch size for batching in combiner.
    serialize: Deprecated.
    random_seed_for_testing: Provide for deterministic tests only.
  """
    disabled_outputs = []
    if eval_config and eval_config.options:
        disabled_outputs = eval_config.options.disabled_outputs
    if (constants.METRICS_KEY in disabled_outputs
            and constants.PLOTS_KEY in disabled_outputs):
        return []
    if (eval_shared_model and not isinstance(eval_shared_model, dict) and
        ((not eval_shared_model.model_loader.tags
          or eval_constants.EVAL_TAG in eval_shared_model.model_loader.tags)
         and (not eval_config or not eval_config.metrics_specs))):
        # Backwards compatibility for previous add_metrics_callbacks implementation.
        if eval_config is not None:
            if eval_config.options.HasField('compute_confidence_intervals'):
                compute_confidence_intervals = (
                    eval_config.options.compute_confidence_intervals.value)
            if eval_config.options.HasField('k_anonymization_count'):
                k_anonymization_count = eval_config.options.k_anonymization_count.value
        return [
            metrics_and_plots_evaluator.MetricsAndPlotsEvaluator(
                eval_shared_model,
                compute_confidence_intervals=compute_confidence_intervals,
                k_anonymization_count=k_anonymization_count,
                desired_batch_size=desired_batch_size,
                serialize=serialize,
                random_seed_for_testing=random_seed_for_testing)
        ]
    else:
        return [
            metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator(
                eval_config=eval_config, eval_shared_model=eval_shared_model)
        ]

Exemplo n.º 2

0

Exibir arquivo

Arquivo: model_eval_lib.py Projeto: hate-deadline/model-analysis

def default_evaluators(  # pylint: disable=invalid-name
        eval_shared_model: Optional[types.EvalSharedModel] = None,
        eval_shared_models: Optional[List[types.EvalSharedModel]] = None,
        eval_config: config.EvalConfig = None,
        desired_batch_size: Optional[int] = None,
        compute_confidence_intervals: Optional[bool] = False,
        k_anonymization_count: int = 1,
        serialize: bool = False) -> List[evaluator.Evaluator]:
    """Returns the default evaluators for use in ExtractAndEvaluate.

  Args:
    eval_shared_model: Optional shared model (single-model evaluation). Required
      if any of the metrics are derived or computed using the model.
    eval_shared_models: Optional shared models (multi-model evaluation).
      Required if any of the metrics are derived or computed using the model.
    eval_config: Eval config.
    desired_batch_size: Deprecated (use eval_config).
    compute_confidence_intervals: Deprecated (use eval_config).
    k_anonymization_count: Deprecated (use eval_config).
    serialize: Deprecated.
  """
    # TODO(b/141016373): Add support for multiple models.
    if eval_shared_model is not None:
        eval_shared_models = [eval_shared_model]
    disabled_outputs = []
    if eval_config and eval_config.output_data_specs:
        disabled_outputs = eval_config.output_data_specs[0].disabled_outputs
    if (constants.METRICS_KEY in disabled_outputs
            and constants.PLOTS_KEY in disabled_outputs):
        return []
    if ((not eval_shared_models[0].model_loader.tags
         or eval_constants.EVAL_TAG in eval_shared_models[0].model_loader.tags)
            and (not eval_config or not eval_config.metrics_specs)):
        # Backwards compatibility for previous EvalSavedModel implementation.
        if eval_config is not None:
            if eval_config.options.HasField('desired_batch_size'):
                desired_batch_size = eval_config.options.desired_batch_size.value
            if eval_config.options.HasField('compute_confidence_intervals'):
                compute_confidence_intervals = (
                    eval_config.options.compute_confidence_intervals.value)
            if eval_config.options.HasField('k_anonymization_count'):
                k_anonymization_count = eval_config.options.k_anonymization_count.value
        return [
            metrics_and_plots_evaluator.MetricsAndPlotsEvaluator(
                eval_shared_models[0],
                desired_batch_size,
                compute_confidence_intervals=compute_confidence_intervals,
                k_anonymization_count=k_anonymization_count,
                serialize=serialize)
        ]
    else:
        return [
            metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator(
                eval_config=eval_config, eval_shared_models=eval_shared_models)
        ]

Exemplo n.º 3

0

Exibir arquivo

Arquivo: tfma_v2_benchmark_base.py Projeto: vsatyav007/tfx

    def benchmarkMiniPipelineBatched(self):
        """Benchmark a batched "mini" TFMA - predict, slice and compute metrics.

    Runs a "mini" version of TFMA in a Beam pipeline. Records the wall time
    taken for the whole pipeline.
    """
        self._init_model()
        pipeline = beam.Pipeline(runner=fn_api_runner.FnApiRunner())
        tfx_io = test_util.InMemoryTFExampleRecord(
            schema=benchmark_utils.read_schema(
                self._dataset.tf_metadata_schema_path()),
            raw_record_column_name=constants.ARROW_INPUT_COLUMN)
        raw_data = (pipeline
                    | "Examples" >> beam.Create(
                        self._dataset.read_raw_dataset(deserialize=False,
                                                       limit=MAX_NUM_EXAMPLES))
                    | "BatchExamples" >> tfx_io.BeamSource()
                    | "InputsToExtracts" >> tfma.BatchedInputsToExtracts())

        _ = (raw_data
             | "BatchedInputExtractor" >> batched_input_extractor.
             BatchedInputExtractor(eval_config=self._eval_config).ptransform
             | "V2BatchedPredictExtractor" >>
             batched_predict_extractor_v2.BatchedPredictExtractor(
                 eval_config=self._eval_config,
                 eval_shared_model=self._eval_shared_model).ptransform
             | "UnbatchExtractor" >>
             unbatch_extractor.UnbatchExtractor().ptransform
             | "SliceKeyExtractor" >>
             tfma.extractors.SliceKeyExtractor().ptransform
             | "V2ComputeMetricsAndPlots" >>
             metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator(
                 eval_config=self._eval_config,
                 eval_shared_model=self._eval_shared_model).ptransform)

        start = time.time()
        result = pipeline.run()
        result.wait_until_finish()
        end = time.time()
        delta = end - start

        self.report_benchmark(
            iters=1,
            wall_time=delta,
            extras={
                "num_examples":
                self._dataset.num_examples(limit=MAX_NUM_EXAMPLES)
            })

Exemplo n.º 4

0

Exibir arquivo

Arquivo: tfma_v2_benchmark_base.py Projeto: vsatyav007/tfx

    def benchmarkMiniPipelineUnbatched(self):
        """Benchmark an unbatched "mini" TFMA - predict, slice and compute metrics.

    Runs a "mini" version of TFMA in a Beam pipeline. Records the wall time
    taken for the whole pipeline.
    """
        self._init_model()
        pipeline = beam.Pipeline(runner=fn_api_runner.FnApiRunner())
        raw_data = (pipeline
                    | "Examples" >> beam.Create(
                        self._dataset.read_raw_dataset(deserialize=False,
                                                       limit=MAX_NUM_EXAMPLES))
                    | "InputsToExtracts" >> tfma.InputsToExtracts())

        _ = (raw_data
             | "InputExtractor" >> input_extractor.InputExtractor(
                 eval_config=self._eval_config).ptransform
             | "V2PredictExtractor" >> predict_extractor_v2.PredictExtractor(
                 eval_config=self._eval_config,
                 eval_shared_model=self._eval_shared_model).ptransform
             | "SliceKeyExtractor" >>
             tfma.extractors.SliceKeyExtractor().ptransform
             | "V2ComputeMetricsAndPlots" >>
             metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator(
                 eval_config=self._eval_config,
                 eval_shared_model=self._eval_shared_model).ptransform)

        start = time.time()
        result = pipeline.run()
        result.wait_until_finish()
        end = time.time()
        delta = end - start

        self.report_benchmark(
            iters=1,
            wall_time=delta,
            extras={
                "num_examples":
                self._dataset.num_examples(limit=MAX_NUM_EXAMPLES)
            })

Exemplo n.º 5

0

Exibir arquivo

Arquivo: metrics_plots_and_validations_writer_test.py Projeto: genehwung/model-analysis

  def testWriteValidationResults(self):
    model_dir, baseline_dir = self._getExportDir(), self._getBaselineDir()
    eval_shared_model = self._build_keras_model(model_dir, mul=0)
    baseline_eval_shared_model = self._build_keras_model(baseline_dir, mul=1)
    validations_file = os.path.join(self._getTempDir(),
                                    constants.VALIDATIONS_KEY)
    examples = [
        self._makeExample(
            input=0.0,
            label=1.0,
            example_weight=1.0,
            extra_feature='non_model_feature'),
        self._makeExample(
            input=1.0,
            label=0.0,
            example_weight=0.5,
            extra_feature='non_model_feature'),
    ]

    eval_config = config.EvalConfig(
        model_specs=[
            config.ModelSpec(
                name='candidate',
                label_key='label',
                example_weight_key='example_weight'),
            config.ModelSpec(
                name='baseline',
                label_key='label',
                example_weight_key='example_weight',
                is_baseline=True)
        ],
        slicing_specs=[config.SlicingSpec()],
        metrics_specs=[
            config.MetricsSpec(
                metrics=[
                    config.MetricConfig(
                        class_name='WeightedExampleCount',
                        # 1.5 < 1, NOT OK.
                        threshold=config.MetricThreshold(
                            value_threshold=config.GenericValueThreshold(
                                upper_bound={'value': 1}))),
                    config.MetricConfig(
                        class_name='ExampleCount',
                        # 2 > 10, NOT OK.
                        threshold=config.MetricThreshold(
                            value_threshold=config.GenericValueThreshold(
                                lower_bound={'value': 10}))),
                    config.MetricConfig(
                        class_name='MeanLabel',
                        # 0 > 0 and 0 > 0%?: NOT OK.
                        threshold=config.MetricThreshold(
                            change_threshold=config.GenericChangeThreshold(
                                direction=config.MetricDirection
                                .HIGHER_IS_BETTER,
                                relative={'value': 0},
                                absolute={'value': 0}))),
                    config.MetricConfig(
                        # MeanPrediction = (0+0)/(1+0.5) = 0
                        class_name='MeanPrediction',
                        # -.01 < 0 < .01, OK.
                        # Diff% = -.333/.333 = -100% < -99%, OK.
                        # Diff = 0 - .333 = -.333 < 0, OK.
                        threshold=config.MetricThreshold(
                            value_threshold=config.GenericValueThreshold(
                                upper_bound={'value': .01},
                                lower_bound={'value': -.01}),
                            change_threshold=config.GenericChangeThreshold(
                                direction=config.MetricDirection
                                .LOWER_IS_BETTER,
                                relative={'value': -.99},
                                absolute={'value': 0})))
                ],
                model_names=['candidate', 'baseline']),
        ],
        options=config.Options(
            disabled_outputs={'values': ['eval_config.json']}),
    )
    slice_spec = [
        slicer.SingleSliceSpec(spec=s) for s in eval_config.slicing_specs
    ]
    eval_shared_models = {
        'candidate': eval_shared_model,
        'baseline': baseline_eval_shared_model
    }
    extractors = [
        input_extractor.InputExtractor(eval_config),
        predict_extractor_v2.PredictExtractor(
            eval_shared_model=eval_shared_models, eval_config=eval_config),
        slice_key_extractor.SliceKeyExtractor(slice_spec=slice_spec)
    ]
    evaluators = [
        metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator(
            eval_config=eval_config, eval_shared_model=eval_shared_models)
    ]
    output_paths = {
        constants.VALIDATIONS_KEY: validations_file,
    }
    writers = [
        metrics_plots_and_validations_writer.MetricsPlotsAndValidationsWriter(
            output_paths, add_metrics_callbacks=[])
    ]

    with beam.Pipeline() as pipeline:

      # pylint: disable=no-value-for-parameter
      _ = (
          pipeline
          | 'Create' >> beam.Create([e.SerializeToString() for e in examples])
          | 'ExtractEvaluateAndWriteResults' >>
          model_eval_lib.ExtractEvaluateAndWriteResults(
              eval_config=eval_config,
              eval_shared_model=eval_shared_model,
              extractors=extractors,
              evaluators=evaluators,
              writers=writers))
      # pylint: enable=no-value-for-parameter

    validation_result = model_eval_lib.load_validation_result(
        os.path.dirname(validations_file))

    expected_validations = [
        text_format.Parse(
            """
            metric_key {
              name: "weighted_example_count"
              model_name: "candidate"
            }
            metric_threshold {
              value_threshold {
                upper_bound {
                  value: 1.0
                }
              }
            }
            metric_value {
              double_value {
                value: 1.5
              }
            }
            """, validation_result_pb2.ValidationFailure()),
        text_format.Parse(
            """
            metric_key {
              name: "example_count"
            }
            metric_threshold {
              value_threshold {
                lower_bound {
                  value: 10.0
                }
              }
            }
            metric_value {
              double_value {
                value: 2.0
              }
            }
            """, validation_result_pb2.ValidationFailure()),
        text_format.Parse(
            """
            metric_key {
              name: "mean_label"
              model_name: "candidate"
              is_diff: true
            }
            metric_threshold {
              change_threshold {
                absolute {
                  value: 0.0
                }
                relative {
                  value: 0.0
                }
                direction: HIGHER_IS_BETTER
              }
            }
            metric_value {
              double_value {
                value: 0.0
              }
            }
            """, validation_result_pb2.ValidationFailure()),
    ]
    self.assertFalse(validation_result.validation_ok)
    self.assertLen(validation_result.metric_validations_per_slice, 1)
    self.assertCountEqual(
        expected_validations,
        validation_result.metric_validations_per_slice[0].failures)

Exemplo n.º 6

0

Exibir arquivo

    def testRunModelAnalysisWithQueryBasedMetrics(self):
        input_layer = tf.keras.layers.Input(shape=(1, ), name='age')
        output_layer = tf.keras.layers.Dense(
            1, activation=tf.nn.sigmoid)(input_layer)
        model = tf.keras.models.Model(input_layer, output_layer)
        model.compile(optimizer=tf.keras.optimizers.Adam(lr=.001),
                      loss=tf.keras.losses.binary_crossentropy)

        features = {'age': [[20.0]]}
        labels = [[1]]
        example_weights = [1.0]
        dataset = tf.data.Dataset.from_tensor_slices(
            (features, labels, example_weights))
        dataset = dataset.shuffle(buffer_size=1).repeat().batch(1)
        model.fit(dataset, steps_per_epoch=1)

        model_location = os.path.join(self._getTempDir(), 'export_dir')
        model.save(model_location, save_format='tf')

        examples = [
            self._makeExample(age=3.0, language='english', label=1.0),
            self._makeExample(age=5.0, language='chinese', label=0.0),
            self._makeExample(age=3.0, language='english', label=0.0),
            self._makeExample(age=5.0, language='chinese', label=1.0)
        ]
        data_location = self._writeTFExamplesToTFRecords(examples)
        slicing_specs = [config.SlicingSpec()]
        eval_config = config.EvalConfig(
            input_data_specs=[config.InputDataSpec(location=data_location)],
            model_specs=[
                config.ModelSpec(location=model_location, label_key='label')
            ],
            output_data_specs=[
                config.OutputDataSpec(default_location=self._getTempDir())
            ],
            slicing_specs=slicing_specs,
            metrics_specs=metric_specs.specs_from_metrics(
                [ndcg.NDCG(gain_key='age', name='ndcg')],
                binarize=config.BinarizationOptions(top_k_list=[1]),
                query_key='language'))
        eval_shared_model = model_eval_lib.default_eval_shared_model(
            eval_saved_model_path=model_location,
            tags=[tf.saved_model.SERVING])
        eval_result = model_eval_lib.run_model_analysis(
            eval_config=eval_config,
            eval_shared_models=[eval_shared_model],
            evaluators=[
                metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator(
                    eval_config=eval_config,
                    eval_shared_models=[eval_shared_model])
            ])

        self.assertEqual(eval_result.config.model_specs[0].location,
                         model_location)
        self.assertEqual(eval_result.config.input_data_specs[0].location,
                         data_location)
        self.assertLen(eval_result.slicing_metrics, 1)
        got_slice_key, got_metrics = eval_result.slicing_metrics[0]
        self.assertEqual(got_slice_key, ())
        self.assertIn('', got_metrics)  # output_name
        got_metrics = got_metrics['']
        expected_metrics = {
            '': {
                'example_count': True,
                'weighted_example_count': True,
            },
            'topK:1': {
                'ndcg': True,
            },
        }
        for group in expected_metrics:
            self.assertIn(group, got_metrics)
            for k in expected_metrics[group]:
                self.assertIn(k, got_metrics[group])

Exemplo n.º 7

0

Exibir arquivo

  def testWriteValidationResults(self, output_file_format):
    model_dir, baseline_dir = self._getExportDir(), self._getBaselineDir()
    eval_shared_model = self._build_keras_model(model_dir, mul=0)
    baseline_eval_shared_model = self._build_keras_model(baseline_dir, mul=1)
    validations_file = os.path.join(self._getTempDir(),
                                    constants.VALIDATIONS_KEY)
    schema = text_format.Parse(
        """
        tensor_representation_group {
          key: ""
          value {
            tensor_representation {
              key: "input"
              value {
                dense_tensor {
                  column_name: "input"
                  shape { dim { size: 1 } }
                }
              }
            }
          }
        }
        feature {
          name: "input"
          type: FLOAT
        }
        feature {
          name: "label"
          type: FLOAT
        }
        feature {
          name: "example_weight"
          type: FLOAT
        }
        feature {
          name: "extra_feature"
          type: BYTES
        }
        """, schema_pb2.Schema())
    tfx_io = test_util.InMemoryTFExampleRecord(
        schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN)
    tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
        arrow_schema=tfx_io.ArrowSchema(),
        tensor_representations=tfx_io.TensorRepresentations())
    examples = [
        self._makeExample(
            input=0.0,
            label=1.0,
            example_weight=1.0,
            extra_feature='non_model_feature'),
        self._makeExample(
            input=1.0,
            label=0.0,
            example_weight=0.5,
            extra_feature='non_model_feature'),
    ]

    eval_config = config.EvalConfig(
        model_specs=[
            config.ModelSpec(
                name='candidate',
                label_key='label',
                example_weight_key='example_weight'),
            config.ModelSpec(
                name='baseline',
                label_key='label',
                example_weight_key='example_weight',
                is_baseline=True)
        ],
        slicing_specs=[config.SlicingSpec()],
        metrics_specs=[
            config.MetricsSpec(
                metrics=[
                    config.MetricConfig(
                        class_name='WeightedExampleCount',
                        # 1.5 < 1, NOT OK.
                        threshold=config.MetricThreshold(
                            value_threshold=config.GenericValueThreshold(
                                upper_bound={'value': 1}))),
                    config.MetricConfig(
                        class_name='ExampleCount',
                        # 2 > 10, NOT OK.
                        threshold=config.MetricThreshold(
                            value_threshold=config.GenericValueThreshold(
                                lower_bound={'value': 10}))),
                    config.MetricConfig(
                        class_name='MeanLabel',
                        # 0 > 0 and 0 > 0%?: NOT OK.
                        threshold=config.MetricThreshold(
                            change_threshold=config.GenericChangeThreshold(
                                direction=config.MetricDirection
                                .HIGHER_IS_BETTER,
                                relative={'value': 0},
                                absolute={'value': 0}))),
                    config.MetricConfig(
                        # MeanPrediction = (0+0)/(1+0.5) = 0
                        class_name='MeanPrediction',
                        # -.01 < 0 < .01, OK.
                        # Diff% = -.333/.333 = -100% < -99%, OK.
                        # Diff = 0 - .333 = -.333 < 0, OK.
                        threshold=config.MetricThreshold(
                            value_threshold=config.GenericValueThreshold(
                                upper_bound={'value': .01},
                                lower_bound={'value': -.01}),
                            change_threshold=config.GenericChangeThreshold(
                                direction=config.MetricDirection
                                .LOWER_IS_BETTER,
                                relative={'value': -.99},
                                absolute={'value': 0})))
                ],
                model_names=['candidate', 'baseline']),
        ],
        options=config.Options(
            disabled_outputs={'values': ['eval_config.json']}),
    )
    slice_spec = [
        slicer.SingleSliceSpec(spec=s) for s in eval_config.slicing_specs
    ]
    eval_shared_models = {
        'candidate': eval_shared_model,
        'baseline': baseline_eval_shared_model
    }
    extractors = [
        batched_input_extractor.BatchedInputExtractor(eval_config),
        batched_predict_extractor_v2.BatchedPredictExtractor(
            eval_shared_model=eval_shared_models,
            eval_config=eval_config,
            tensor_adapter_config=tensor_adapter_config),
        unbatch_extractor.UnbatchExtractor(),
        slice_key_extractor.SliceKeyExtractor(slice_spec=slice_spec)
    ]
    evaluators = [
        metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator(
            eval_config=eval_config, eval_shared_model=eval_shared_models)
    ]
    output_paths = {
        constants.VALIDATIONS_KEY: validations_file,
    }
    writers = [
        metrics_plots_and_validations_writer.MetricsPlotsAndValidationsWriter(
            output_paths,
            add_metrics_callbacks=[],
            output_file_format=output_file_format)
    ]

    with beam.Pipeline() as pipeline:
      # pylint: disable=no-value-for-parameter
      _ = (
          pipeline
          | 'Create' >> beam.Create([e.SerializeToString() for e in examples])
          | 'BatchExamples' >> tfx_io.BeamSource()
          | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
          | 'ExtractEvaluate' >> model_eval_lib.ExtractAndEvaluate(
              extractors=extractors, evaluators=evaluators)
          | 'WriteResults' >> model_eval_lib.WriteResults(writers=writers))
      # pylint: enable=no-value-for-parameter

    validation_result = (
        metrics_plots_and_validations_writer
        .load_and_deserialize_validation_result(
            os.path.dirname(validations_file)))

    expected_validations = [
        text_format.Parse(
            """
            metric_key {
              name: "weighted_example_count"
              model_name: "candidate"
            }
            metric_threshold {
              value_threshold {
                upper_bound {
                  value: 1.0
                }
              }
            }
            metric_value {
              double_value {
                value: 1.5
              }
            }
            """, validation_result_pb2.ValidationFailure()),
        text_format.Parse(
            """
            metric_key {
              name: "example_count"
              model_name: "candidate"
            }
            metric_threshold {
              value_threshold {
                lower_bound {
                  value: 10.0
                }
              }
            }
            metric_value {
              double_value {
                value: 2.0
              }
            }
            """, validation_result_pb2.ValidationFailure()),
        text_format.Parse(
            """
            metric_key {
              name: "mean_label"
              model_name: "candidate"
              is_diff: true
            }
            metric_threshold {
              change_threshold {
                absolute {
                  value: 0.0
                }
                relative {
                  value: 0.0
                }
                direction: HIGHER_IS_BETTER
              }
            }
            metric_value {
              double_value {
                value: 0.0
              }
            }
            """, validation_result_pb2.ValidationFailure()),
    ]
    self.assertFalse(validation_result.validation_ok)
    self.assertLen(validation_result.metric_validations_per_slice, 1)
    self.assertCountEqual(
        expected_validations,
        validation_result.metric_validations_per_slice[0].failures)

Exemplo n.º 8

0

Exibir arquivo

def default_evaluators(  # pylint: disable=invalid-name
    eval_shared_model: Optional[Union[types.EvalSharedModel,
                                      Dict[Text,
                                           types.EvalSharedModel]]] = None,
    eval_config: config.EvalConfig = None,
    compute_confidence_intervals: Optional[bool] = False,
    k_anonymization_count: int = 1,
    desired_batch_size: Optional[int] = None,
    serialize: bool = False,
    random_seed_for_testing: Optional[int] = None) -> List[evaluator.Evaluator]:
  """Returns the default evaluators for use in ExtractAndEvaluate.

  Args:
    eval_shared_model: Optional shared model (single-model evaluation) or dict
      of shared models keyed by model name (multi-model evaluation). Only
      required if there are metrics to be computed in-graph using the model.
    eval_config: Eval config.
    compute_confidence_intervals: Deprecated (use eval_config).
    k_anonymization_count: Deprecated (use eval_config).
    desired_batch_size: Optional batch size for batching in combiner.
    serialize: Deprecated.
    random_seed_for_testing: Provide for deterministic tests only.
  """
  disabled_outputs = []
  if eval_config:
    eval_config = config.update_eval_config_with_defaults(eval_config)
    disabled_outputs = eval_config.options.disabled_outputs.values
    if model_util.get_model_types(eval_config) == set([constants.TF_LITE]):
      # no in-graph metrics present when tflite is used.
      if eval_shared_model:
        if isinstance(eval_shared_model, dict):
          eval_shared_model = {
              k: v._replace(include_default_metrics=False)
              for k, v in eval_shared_model.items()
          }
        else:
          eval_shared_model = eval_shared_model._replace(
              include_default_metrics=False)
  if (constants.METRICS_KEY in disabled_outputs and
      constants.PLOTS_KEY in disabled_outputs):
    return []
  if _is_legacy_eval(eval_shared_model, eval_config):
    # Backwards compatibility for previous add_metrics_callbacks implementation.
    if eval_config is not None:
      if eval_config.options.HasField('compute_confidence_intervals'):
        compute_confidence_intervals = (
            eval_config.options.compute_confidence_intervals.value)
      if eval_config.options.HasField('k_anonymization_count'):
        k_anonymization_count = eval_config.options.k_anonymization_count.value
    return [
        metrics_and_plots_evaluator.MetricsAndPlotsEvaluator(
            eval_shared_model,
            compute_confidence_intervals=compute_confidence_intervals,
            k_anonymization_count=k_anonymization_count,
            desired_batch_size=desired_batch_size,
            serialize=serialize,
            random_seed_for_testing=random_seed_for_testing)
    ]
  else:
    return [
        metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator(
            eval_config=eval_config, eval_shared_model=eval_shared_model)
    ]

Exemplo n.º 9

0

Exibir arquivo