Exemplo n.º 1
0
    def testVerifyEvaluatorRaisesValueError(self):
        extractors = [
            extractor.Extractor(stage_name='ExtractorThatExists',
                                ptransform=None)
        ]
        evaluator.verify_evaluator(
            evaluator.Evaluator(stage_name='EvaluatorWithoutError',
                                run_after='ExtractorThatExists',
                                ptransform=None), extractors)

        with self.assertRaises(ValueError):
            evaluator.verify_evaluator(
                evaluator.Evaluator(stage_name='EvaluatorWithError',
                                    run_after='ExtractorThatDoesNotExist',
                                    ptransform=None), extractors)
Exemplo n.º 2
0
def QueryBasedMetricsEvaluator(  # pylint: disable=invalid-name
    query_id: Text,
    prediction_key: Text,
    combine_fns: List[beam.CombineFn],
    metrics_key: Text = constants.METRICS_KEY,
    run_after: Text = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME,
) -> evaluator.Evaluator:
    """Creates an Evaluator for evaluating metrics and plots.

  Args:
    query_id: Key of query ID column in the features dictionary.
    prediction_key: Key in predictions dictionary to use as the prediction (for
      sorting examples within the query). Use the empty string if the Estimator
      returns a predictions Tensor (not a dictionary).
    combine_fns: List of query based metrics combine functions.
    metrics_key: Name to use for metrics key in Evaluation output.
    run_after: Extractor to run after (None means before any extractors).

  Returns:
    Evaluator for computing query-based metrics. The output will be stored under
    'metrics' and 'plots' keys.
  """
    # pylint: disable=no-value-for-parameter
    return evaluator.Evaluator(stage_name='EvaluateQueryBasedMetrics',
                               run_after=run_after,
                               ptransform=EvaluateQueryBasedMetrics(
                                   query_id=query_id,
                                   prediction_key=prediction_key,
                                   combine_fns=combine_fns,
                                   metrics_key=metrics_key))
def AnalysisTableEvaluator(  # pylint: disable=invalid-name
    key = constants.ANALYSIS_KEY,
    run_after = extractor.LAST_EXTRACTOR_STAGE_NAME,
    include = None,
    exclude = None):
  """Creates an Evaluator for returning Extracts data for analysis.

  If both include and exclude are None then tfma.INPUT_KEY extracts will be
  excluded by default.

  Args:
    key: Name to use for key in Evaluation output.
    run_after: Extractor to run after (None means before any extractors).
    include: Keys of extracts to include in output. Keys starting with '_' are
      automatically filtered out at write time.
    exclude: Keys of extracts to exclude from output.

  Returns:
    Evaluator for collecting analysis data. The output is stored under the key
    'analysis'.

  Raises:
    ValueError: If both include and exclude are used.
  """
  # pylint: disable=no-value-for-parameter
  return evaluator.Evaluator(
      stage_name='EvaluateExtracts',
      run_after=run_after,
      ptransform=EvaluateExtracts(key=key, include=include, exclude=exclude))
Exemplo n.º 4
0
def MetricsAndPlotsEvaluator(  # pylint: disable=invalid-name
    eval_shared_model,
    desired_batch_size = None,
    metrics_key = constants.METRICS_KEY,
    plots_key = constants.PLOTS_KEY,
    run_after = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME,
    num_bootstrap_samples = 1,
):
  """Creates an Evaluator for evaluating metrics and plots.

  Args:
    eval_shared_model: Shared model parameters for EvalSavedModel.
    desired_batch_size: Optional batch size for batching in Aggregate.
    metrics_key: Name to use for metrics key in Evaluation output.
    plots_key: Name to use for plots key in Evaluation output.
    run_after: Extractor to run after (None means before any extractors).
    num_bootstrap_samples: Number of bootstrap samples to draw. If more than 1,
      confidence intervals will be computed for metrics. Suggested value is at
      least 20.

  Returns:
    Evaluator for evaluating metrics and plots. The output will be stored under
    'metrics' and 'plots' keys.
  """
  # pylint: disable=no-value-for-parameter
  return evaluator.Evaluator(
      stage_name='EvaluateMetricsAndPlots',
      run_after=run_after,
      ptransform=EvaluateMetricsAndPlots(
          eval_shared_model=eval_shared_model,
          desired_batch_size=desired_batch_size,
          metrics_key=metrics_key,
          plots_key=plots_key,
          num_bootstrap_samples=num_bootstrap_samples))
def MetricsAndPlotsEvaluator(  # pylint: disable=invalid-name
    eval_config: config.EvalConfig,
    eval_shared_models: List[types.EvalSharedModel],
    metrics_key: Text = constants.METRICS_KEY,
    plots_key: Text = constants.PLOTS_KEY,
    run_after: Text = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME
) -> evaluator.Evaluator:
    """Creates an Evaluator for evaluating metrics and plots.

  Args:
    eval_config: Eval config.
    eval_shared_models: Shared model instances.
    metrics_key: Name to use for metrics key in Evaluation output.
    plots_key: Name to use for plots key in Evaluation output.
    run_after: Extractor to run after (None means before any extractors).

  Returns:
    Evaluator for evaluating metrics and plots. The output will be stored under
    'metrics' and 'plots' keys.
  """
    # pylint: disable=no-value-for-parameter
    return evaluator.Evaluator(stage_name='EvaluateMetricsAndPlots',
                               run_after=run_after,
                               ptransform=_EvaluateMetricsAndPlots(
                                   eval_config=eval_config,
                                   eval_shared_models=eval_shared_models,
                                   metrics_key=metrics_key,
                                   plots_key=plots_key))
def MetricsAndPlotsEvaluator(  # pylint: disable=invalid-name
    eval_config: config.EvalConfig,
    eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None,
    metrics_key: Text = constants.METRICS_KEY,
    plots_key: Text = constants.PLOTS_KEY,
    run_after: Text = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME
) -> evaluator.Evaluator:
    """Creates an Evaluator for evaluating metrics and plots.

  Args:
    eval_config: Eval config.
    eval_shared_model: Optional shared model (single-model evaluation) or list
      of shared models (multi-model evaluation). Only required if there are
      metrics to be computed in-graph using the model.
    metrics_key: Name to use for metrics key in Evaluation output.
    plots_key: Name to use for plots key in Evaluation output.
    run_after: Extractor to run after (None means before any extractors).

  Returns:
    Evaluator for evaluating metrics and plots. The output will be stored under
    'metrics' and 'plots' keys.
  """
    eval_shared_models = model_util.verify_and_update_eval_shared_models(
        eval_shared_model)
    if eval_shared_models:
        eval_shared_models = {m.model_name: m for m in eval_shared_models}

    # pylint: disable=no-value-for-parameter
    return evaluator.Evaluator(stage_name='EvaluateMetricsAndPlots',
                               run_after=run_after,
                               ptransform=_EvaluateMetricsAndPlots(
                                   eval_config=eval_config,
                                   eval_shared_models=eval_shared_models,
                                   metrics_key=metrics_key,
                                   plots_key=plots_key))
def MetricsAndPlotsEvaluator(  # pylint: disable=invalid-name
        eval_shared_model,
        desired_batch_size=None,
        metrics_key=constants.METRICS_KEY,
        plots_key=constants.PLOTS_KEY,
        run_after=slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME):
    """Creates an Evaluator for evaluating metrics and plots.

  Args:
    eval_shared_model: Shared model parameters for EvalSavedModel.
    desired_batch_size: Optional batch size for batching in Aggregate.
    metrics_key: Name to use for metrics key in Evaluation output.
    plots_key: Name to use for plots key in Evaluation output.
    run_after: Extractor to run after (None means before any extractors).

  Returns:
    Evaluator for evaluating metrics and plots. The output will be stored under
    'metrics' and 'plots' keys.
  """
    # pylint: disable=no-value-for-parameter
    return evaluator.Evaluator(stage_name='EvaluateMetricsAndPlots',
                               run_after=run_after,
                               ptransform=EvaluateMetricsAndPlots(
                                   eval_shared_model=eval_shared_model,
                                   desired_batch_size=desired_batch_size,
                                   metrics_key=metrics_key,
                                   plots_key=plots_key))
def MetricsPlotsAndValidationsEvaluator(  # pylint: disable=invalid-name
    eval_config: config_pb2.EvalConfig,
    eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None,
    metrics_key: Text = constants.METRICS_KEY,
    plots_key: Text = constants.PLOTS_KEY,
    attributions_key: Text = constants.ATTRIBUTIONS_KEY,
    run_after: Text = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME,
    schema: Optional[schema_pb2.Schema] = None,
    random_seed_for_testing: Optional[int] = None,
    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None
) -> evaluator.Evaluator:
  """Creates an Evaluator for evaluating metrics and plots.

  Args:
    eval_config: Eval config.
    eval_shared_model: Optional shared model (single-model evaluation) or list
      of shared models (multi-model evaluation). Only required if there are
      metrics to be computed in-graph using the model.
    metrics_key: Name to use for metrics key in Evaluation output.
    plots_key: Name to use for plots key in Evaluation output.
    attributions_key: Name to use for attributions key in Evaluation output.
    run_after: Extractor to run after (None means before any extractors).
    schema: A schema to use for customizing metrics and plots.
    random_seed_for_testing: Seed to use for unit testing.
    tensor_adapter_config: Tensor adapter config which specifies how to obtain
      tensors from the Arrow RecordBatch. The model's signature will be invoked
      with those tensors (matched by names). If None, an attempt will be made to
      create an adapter based on the model's input signature otherwise the model
      will be invoked with raw examples (assuming a  signature of a single 1-D
      string tensor).

  Returns:
    Evaluator for evaluating metrics and plots. The output will be stored under
    'metrics' and 'plots' keys.
  """
  eval_shared_models = model_util.verify_and_update_eval_shared_models(
      eval_shared_model)
  if eval_shared_models:
    eval_shared_models = {m.model_name: m for m in eval_shared_models}

  # pylint: disable=no-value-for-parameter
  return evaluator.Evaluator(
      stage_name='EvaluateMetricsAndPlots',
      run_after=run_after,
      ptransform=_EvaluateMetricsPlotsAndValidations(
          eval_config=eval_config,
          eval_shared_models=eval_shared_models,
          metrics_key=metrics_key,
          plots_key=plots_key,
          attributions_key=attributions_key,
          schema=schema,
          random_seed_for_testing=random_seed_for_testing,
          tensor_adapter_config=tensor_adapter_config))
Exemplo n.º 9
0
def AnalysisTableEvaluator(  # pylint: disable=invalid-name
        key=constants.ANALYSIS_KEY,
        run_after=extractor.LAST_EXTRACTOR_STAGE_NAME):
    """Creates an Evaluator for returning Extracts data for analysis.

  Args:
    key: Name to use for key in Evaluation output.
    run_after: Extractor to run after (None means before any extractors).

  Returns:
    Evaluator for collecting analysis data. The output is stored under the key
    'analysis'.
  """
    # pylint: disable=no-value-for-parameter
    return evaluator.Evaluator(stage_name='EvaluateExtracts',
                               run_after=run_after,
                               ptransform=EvaluateExtracts(key=key))
Exemplo n.º 10
0
def MetricsAndPlotsEvaluator(  # pylint: disable=invalid-name
        eval_shared_model: types.EvalSharedModel,
        desired_batch_size: Optional[int] = None,
        metrics_key: Text = constants.METRICS_KEY,
        plots_key: Text = constants.PLOTS_KEY,
        run_after: Text = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME,
        compute_confidence_intervals: Optional[bool] = False,
        min_slice_size: int = 1,
        serialize=False,
        random_seed_for_testing: Optional[int] = None) -> evaluator.Evaluator:
    """Creates an Evaluator for evaluating metrics and plots.

  Args:
    eval_shared_model: Shared model parameters for EvalSavedModel.
    desired_batch_size: Optional batch size for batching in Aggregate.
    metrics_key: Name to use for metrics key in Evaluation output.
    plots_key: Name to use for plots key in Evaluation output.
    run_after: Extractor to run after (None means before any extractors).
    compute_confidence_intervals: Whether or not to compute confidence
      intervals.
    min_slice_size: If the number of examples in a specific slice is less
      than min_slice_size, then an error will be returned for that slice.
      This will be useful to ensure privacy by not displaying the aggregated
      data for smaller number of examples.
    serialize: If true, serialize the metrics to protos as part of the
      evaluation as well.
    random_seed_for_testing: Provide for deterministic tests only.

  Returns:
    Evaluator for evaluating metrics and plots. The output will be stored under
    'metrics' and 'plots' keys.
  """
    # pylint: disable=no-value-for-parameter
    return evaluator.Evaluator(
        stage_name='EvaluateMetricsAndPlots',
        run_after=run_after,
        ptransform=EvaluateMetricsAndPlots(
            eval_shared_model=eval_shared_model,
            desired_batch_size=desired_batch_size,
            metrics_key=metrics_key,
            plots_key=plots_key,
            compute_confidence_intervals=compute_confidence_intervals,
            min_slice_size=min_slice_size,
            serialize=serialize,
            random_seed_for_testing=random_seed_for_testing))
Exemplo n.º 11
0
def MetricsPlotsAndValidationsEvaluator(  # pylint: disable=invalid-name
        eval_config: config_pb2.EvalConfig,
        eval_shared_model: Optional[
            types.MaybeMultipleEvalSharedModels] = None,
        metrics_key: str = constants.METRICS_KEY,
        plots_key: str = constants.PLOTS_KEY,
        attributions_key: str = constants.ATTRIBUTIONS_KEY,
        run_after: str = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME,
        schema: Optional[schema_pb2.Schema] = None,
        random_seed_for_testing: Optional[int] = None) -> evaluator.Evaluator:
    """Creates an Evaluator for evaluating metrics and plots.

  Args:
    eval_config: Eval config.
    eval_shared_model: Optional shared model (single-model evaluation) or list
      of shared models (multi-model evaluation). Only required if there are
      metrics to be computed in-graph using the model.
    metrics_key: Name to use for metrics key in Evaluation output.
    plots_key: Name to use for plots key in Evaluation output.
    attributions_key: Name to use for attributions key in Evaluation output.
    run_after: Extractor to run after (None means before any extractors).
    schema: A schema to use for customizing metrics and plots.
    random_seed_for_testing: Seed to use for unit testing.

  Returns:
    Evaluator for evaluating metrics and plots. The output will be stored under
    'metrics' and 'plots' keys.
  """
    eval_shared_models = model_util.verify_and_update_eval_shared_models(
        eval_shared_model)
    if eval_shared_models:
        eval_shared_models = {m.model_name: m for m in eval_shared_models}

    # pylint: disable=no-value-for-parameter
    return evaluator.Evaluator(
        stage_name='EvaluateMetricsAndPlots',
        run_after=run_after,
        ptransform=_EvaluateMetricsPlotsAndValidations(
            eval_config=eval_config,
            eval_shared_models=eval_shared_models,
            metrics_key=metrics_key,
            plots_key=plots_key,
            attributions_key=attributions_key,
            schema=schema,
            random_seed_for_testing=random_seed_for_testing))
def MetricsAndPlotsEvaluator(  # pylint: disable=invalid-name
        eval_shared_model: types.EvalSharedModel,
        desired_batch_size: Optional[int] = None,
        metrics_key: Text = constants.METRICS_KEY,
        plots_key: Text = constants.PLOTS_KEY,
        run_after: Text = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME,
        num_bootstrap_samples: Optional[int] = 1,
        k_anonymization_count: int = 1,
        serialize=False) -> evaluator.Evaluator:
    """Creates an Evaluator for evaluating metrics and plots.

  Args:
    eval_shared_model: Shared model parameters for EvalSavedModel.
    desired_batch_size: Optional batch size for batching in Aggregate.
    metrics_key: Name to use for metrics key in Evaluation output.
    plots_key: Name to use for plots key in Evaluation output.
    run_after: Extractor to run after (None means before any extractors).
    num_bootstrap_samples: Number of bootstrap samples to draw. If more than 1,
      confidence intervals will be computed for metrics. Suggested value is at
      least 20.
    k_anonymization_count: If the number of examples in a specific slice is less
      than k_anonymization_count, then an error will be returned for that slice.
      This will be useful to ensure privacy by not displaying the aggregated
      data for smaller number of examples.
    serialize: If true, serialize the metrics to protos as part of the
      evaluation as well.

  Returns:
    Evaluator for evaluating metrics and plots. The output will be stored under
    'metrics' and 'plots' keys.
  """
    # pylint: disable=no-value-for-parameter
    return evaluator.Evaluator(stage_name='EvaluateMetricsAndPlots',
                               run_after=run_after,
                               ptransform=EvaluateMetricsAndPlots(
                                   eval_shared_model=eval_shared_model,
                                   desired_batch_size=desired_batch_size,
                                   metrics_key=metrics_key,
                                   plots_key=plots_key,
                                   num_bootstrap_samples=num_bootstrap_samples,
                                   k_anonymization_count=k_anonymization_count,
                                   serialize=serialize))
Exemplo n.º 13
0
def MetricsAndPlotsEvaluator(  # pylint: disable=invalid-name
    eval_config: config.EvalConfig,
    eval_shared_model: Optional[Union[types.EvalSharedModel,
                                      Dict[Text,
                                           types.EvalSharedModel]]] = None,
    metrics_key: Text = constants.METRICS_KEY,
    plots_key: Text = constants.PLOTS_KEY,
    run_after: Text = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME
) -> evaluator.Evaluator:
  """Creates an Evaluator for evaluating metrics and plots.

  Args:
    eval_config: Eval config.
    eval_shared_model: Optional shared model (single-model evaluation) or dict
      of shared models keyed by model name (multi-model evaluation). Only
      required if there are metrics to be computed in-graph using the model.
    metrics_key: Name to use for metrics key in Evaluation output.
    plots_key: Name to use for plots key in Evaluation output.
    run_after: Extractor to run after (None means before any extractors).

  Returns:
    Evaluator for evaluating metrics and plots. The output will be stored under
    'metrics' and 'plots' keys.
  """
  eval_shared_models = eval_shared_model
  if eval_shared_models:
    if not isinstance(eval_shared_model, dict):
      eval_shared_models = {'': eval_shared_model}
    # To maintain consistency between settings where single models are used,
    # always use '' as the model name regardless of whether a name is passed.
    if len(eval_shared_models) == 1:
      eval_shared_models = {'': list(eval_shared_models.values())[0]}

  # pylint: disable=no-value-for-parameter
  return evaluator.Evaluator(
      stage_name='EvaluateMetricsAndPlots',
      run_after=run_after,
      ptransform=_EvaluateMetricsAndPlots(
          eval_config=eval_config,
          eval_shared_models=eval_shared_models,
          metrics_key=metrics_key,
          plots_key=plots_key))
Exemplo n.º 14
0
def AnalysisTableEvaluator(  # pylint: disable=invalid-name
    key: str = constants.ANALYSIS_KEY,
    run_after: str = extractor.LAST_EXTRACTOR_STAGE_NAME,
    include: Optional[Union[Iterable[str], Dict[str, Any]]] = None,
    exclude: Optional[Union[Iterable[str],
                            Dict[str, Any]]] = None) -> evaluator.Evaluator:
    """Creates an Evaluator for returning Extracts data for analysis.

  If both include and exclude are None then tfma.INPUT_KEY extracts will be
  excluded by default.

  Args:
    key: Name to use for key in Evaluation output.
    run_after: Extractor to run after (None means before any extractors).
    include: List or map of keys to include in output. Keys starting with '_'
      are automatically filtered out at write time. If a map of keys is passed
      then the keys and sub-keys that exist in the map will be included in the
      output. An empty dict behaves as a wildcard matching all keys or the value
      itself. Since matching on feature values is not currently supported, an
      empty dict must be used to represent the leaf nodes.
      For example: {'key1': {'key1-subkey': {}}, 'key2': {}}.
    exclude: List or map of keys to exclude from output. If a map of keys is
      passed then the keys and sub-keys that exist in the map will be excluded
      from the output. An empty dict behaves as a wildcard matching all keys or
      the value itself. Since matching on feature values is not currently
      supported, an empty dict must be used to represent the leaf nodes.
      For example: {'key1': {'key1-subkey': {}}, 'key2': {}}.

  Returns:
    Evaluator for collecting analysis data. The output is stored under the key
    'analysis'.

  Raises:
    ValueError: If both include and exclude are used.
  """
    # pylint: disable=no-value-for-parameter
    return evaluator.Evaluator(stage_name='EvaluateExtracts',
                               run_after=run_after,
                               ptransform=EvaluateExtracts(key=key,
                                                           include=include,
                                                           exclude=exclude))