def testVerifyEvaluatorRaisesValueError(self): extractors = [ extractor.Extractor(stage_name='ExtractorThatExists', ptransform=None) ] evaluator.verify_evaluator( evaluator.Evaluator(stage_name='EvaluatorWithoutError', run_after='ExtractorThatExists', ptransform=None), extractors) with self.assertRaises(ValueError): evaluator.verify_evaluator( evaluator.Evaluator(stage_name='EvaluatorWithError', run_after='ExtractorThatDoesNotExist', ptransform=None), extractors)
def QueryBasedMetricsEvaluator( # pylint: disable=invalid-name query_id: Text, prediction_key: Text, combine_fns: List[beam.CombineFn], metrics_key: Text = constants.METRICS_KEY, run_after: Text = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME, ) -> evaluator.Evaluator: """Creates an Evaluator for evaluating metrics and plots. Args: query_id: Key of query ID column in the features dictionary. prediction_key: Key in predictions dictionary to use as the prediction (for sorting examples within the query). Use the empty string if the Estimator returns a predictions Tensor (not a dictionary). combine_fns: List of query based metrics combine functions. metrics_key: Name to use for metrics key in Evaluation output. run_after: Extractor to run after (None means before any extractors). Returns: Evaluator for computing query-based metrics. The output will be stored under 'metrics' and 'plots' keys. """ # pylint: disable=no-value-for-parameter return evaluator.Evaluator(stage_name='EvaluateQueryBasedMetrics', run_after=run_after, ptransform=EvaluateQueryBasedMetrics( query_id=query_id, prediction_key=prediction_key, combine_fns=combine_fns, metrics_key=metrics_key))
def AnalysisTableEvaluator( # pylint: disable=invalid-name key = constants.ANALYSIS_KEY, run_after = extractor.LAST_EXTRACTOR_STAGE_NAME, include = None, exclude = None): """Creates an Evaluator for returning Extracts data for analysis. If both include and exclude are None then tfma.INPUT_KEY extracts will be excluded by default. Args: key: Name to use for key in Evaluation output. run_after: Extractor to run after (None means before any extractors). include: Keys of extracts to include in output. Keys starting with '_' are automatically filtered out at write time. exclude: Keys of extracts to exclude from output. Returns: Evaluator for collecting analysis data. The output is stored under the key 'analysis'. Raises: ValueError: If both include and exclude are used. """ # pylint: disable=no-value-for-parameter return evaluator.Evaluator( stage_name='EvaluateExtracts', run_after=run_after, ptransform=EvaluateExtracts(key=key, include=include, exclude=exclude))
def MetricsAndPlotsEvaluator( # pylint: disable=invalid-name eval_shared_model, desired_batch_size = None, metrics_key = constants.METRICS_KEY, plots_key = constants.PLOTS_KEY, run_after = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME, num_bootstrap_samples = 1, ): """Creates an Evaluator for evaluating metrics and plots. Args: eval_shared_model: Shared model parameters for EvalSavedModel. desired_batch_size: Optional batch size for batching in Aggregate. metrics_key: Name to use for metrics key in Evaluation output. plots_key: Name to use for plots key in Evaluation output. run_after: Extractor to run after (None means before any extractors). num_bootstrap_samples: Number of bootstrap samples to draw. If more than 1, confidence intervals will be computed for metrics. Suggested value is at least 20. Returns: Evaluator for evaluating metrics and plots. The output will be stored under 'metrics' and 'plots' keys. """ # pylint: disable=no-value-for-parameter return evaluator.Evaluator( stage_name='EvaluateMetricsAndPlots', run_after=run_after, ptransform=EvaluateMetricsAndPlots( eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size, metrics_key=metrics_key, plots_key=plots_key, num_bootstrap_samples=num_bootstrap_samples))
def MetricsAndPlotsEvaluator( # pylint: disable=invalid-name eval_config: config.EvalConfig, eval_shared_models: List[types.EvalSharedModel], metrics_key: Text = constants.METRICS_KEY, plots_key: Text = constants.PLOTS_KEY, run_after: Text = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME ) -> evaluator.Evaluator: """Creates an Evaluator for evaluating metrics and plots. Args: eval_config: Eval config. eval_shared_models: Shared model instances. metrics_key: Name to use for metrics key in Evaluation output. plots_key: Name to use for plots key in Evaluation output. run_after: Extractor to run after (None means before any extractors). Returns: Evaluator for evaluating metrics and plots. The output will be stored under 'metrics' and 'plots' keys. """ # pylint: disable=no-value-for-parameter return evaluator.Evaluator(stage_name='EvaluateMetricsAndPlots', run_after=run_after, ptransform=_EvaluateMetricsAndPlots( eval_config=eval_config, eval_shared_models=eval_shared_models, metrics_key=metrics_key, plots_key=plots_key))
def MetricsAndPlotsEvaluator( # pylint: disable=invalid-name eval_config: config.EvalConfig, eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None, metrics_key: Text = constants.METRICS_KEY, plots_key: Text = constants.PLOTS_KEY, run_after: Text = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME ) -> evaluator.Evaluator: """Creates an Evaluator for evaluating metrics and plots. Args: eval_config: Eval config. eval_shared_model: Optional shared model (single-model evaluation) or list of shared models (multi-model evaluation). Only required if there are metrics to be computed in-graph using the model. metrics_key: Name to use for metrics key in Evaluation output. plots_key: Name to use for plots key in Evaluation output. run_after: Extractor to run after (None means before any extractors). Returns: Evaluator for evaluating metrics and plots. The output will be stored under 'metrics' and 'plots' keys. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) if eval_shared_models: eval_shared_models = {m.model_name: m for m in eval_shared_models} # pylint: disable=no-value-for-parameter return evaluator.Evaluator(stage_name='EvaluateMetricsAndPlots', run_after=run_after, ptransform=_EvaluateMetricsAndPlots( eval_config=eval_config, eval_shared_models=eval_shared_models, metrics_key=metrics_key, plots_key=plots_key))
def MetricsAndPlotsEvaluator( # pylint: disable=invalid-name eval_shared_model, desired_batch_size=None, metrics_key=constants.METRICS_KEY, plots_key=constants.PLOTS_KEY, run_after=slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME): """Creates an Evaluator for evaluating metrics and plots. Args: eval_shared_model: Shared model parameters for EvalSavedModel. desired_batch_size: Optional batch size for batching in Aggregate. metrics_key: Name to use for metrics key in Evaluation output. plots_key: Name to use for plots key in Evaluation output. run_after: Extractor to run after (None means before any extractors). Returns: Evaluator for evaluating metrics and plots. The output will be stored under 'metrics' and 'plots' keys. """ # pylint: disable=no-value-for-parameter return evaluator.Evaluator(stage_name='EvaluateMetricsAndPlots', run_after=run_after, ptransform=EvaluateMetricsAndPlots( eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size, metrics_key=metrics_key, plots_key=plots_key))
def MetricsPlotsAndValidationsEvaluator( # pylint: disable=invalid-name eval_config: config_pb2.EvalConfig, eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None, metrics_key: Text = constants.METRICS_KEY, plots_key: Text = constants.PLOTS_KEY, attributions_key: Text = constants.ATTRIBUTIONS_KEY, run_after: Text = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME, schema: Optional[schema_pb2.Schema] = None, random_seed_for_testing: Optional[int] = None, tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None ) -> evaluator.Evaluator: """Creates an Evaluator for evaluating metrics and plots. Args: eval_config: Eval config. eval_shared_model: Optional shared model (single-model evaluation) or list of shared models (multi-model evaluation). Only required if there are metrics to be computed in-graph using the model. metrics_key: Name to use for metrics key in Evaluation output. plots_key: Name to use for plots key in Evaluation output. attributions_key: Name to use for attributions key in Evaluation output. run_after: Extractor to run after (None means before any extractors). schema: A schema to use for customizing metrics and plots. random_seed_for_testing: Seed to use for unit testing. tensor_adapter_config: Tensor adapter config which specifies how to obtain tensors from the Arrow RecordBatch. The model's signature will be invoked with those tensors (matched by names). If None, an attempt will be made to create an adapter based on the model's input signature otherwise the model will be invoked with raw examples (assuming a signature of a single 1-D string tensor). Returns: Evaluator for evaluating metrics and plots. The output will be stored under 'metrics' and 'plots' keys. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) if eval_shared_models: eval_shared_models = {m.model_name: m for m in eval_shared_models} # pylint: disable=no-value-for-parameter return evaluator.Evaluator( stage_name='EvaluateMetricsAndPlots', run_after=run_after, ptransform=_EvaluateMetricsPlotsAndValidations( eval_config=eval_config, eval_shared_models=eval_shared_models, metrics_key=metrics_key, plots_key=plots_key, attributions_key=attributions_key, schema=schema, random_seed_for_testing=random_seed_for_testing, tensor_adapter_config=tensor_adapter_config))
def AnalysisTableEvaluator( # pylint: disable=invalid-name key=constants.ANALYSIS_KEY, run_after=extractor.LAST_EXTRACTOR_STAGE_NAME): """Creates an Evaluator for returning Extracts data for analysis. Args: key: Name to use for key in Evaluation output. run_after: Extractor to run after (None means before any extractors). Returns: Evaluator for collecting analysis data. The output is stored under the key 'analysis'. """ # pylint: disable=no-value-for-parameter return evaluator.Evaluator(stage_name='EvaluateExtracts', run_after=run_after, ptransform=EvaluateExtracts(key=key))
def MetricsAndPlotsEvaluator( # pylint: disable=invalid-name eval_shared_model: types.EvalSharedModel, desired_batch_size: Optional[int] = None, metrics_key: Text = constants.METRICS_KEY, plots_key: Text = constants.PLOTS_KEY, run_after: Text = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME, compute_confidence_intervals: Optional[bool] = False, min_slice_size: int = 1, serialize=False, random_seed_for_testing: Optional[int] = None) -> evaluator.Evaluator: """Creates an Evaluator for evaluating metrics and plots. Args: eval_shared_model: Shared model parameters for EvalSavedModel. desired_batch_size: Optional batch size for batching in Aggregate. metrics_key: Name to use for metrics key in Evaluation output. plots_key: Name to use for plots key in Evaluation output. run_after: Extractor to run after (None means before any extractors). compute_confidence_intervals: Whether or not to compute confidence intervals. min_slice_size: If the number of examples in a specific slice is less than min_slice_size, then an error will be returned for that slice. This will be useful to ensure privacy by not displaying the aggregated data for smaller number of examples. serialize: If true, serialize the metrics to protos as part of the evaluation as well. random_seed_for_testing: Provide for deterministic tests only. Returns: Evaluator for evaluating metrics and plots. The output will be stored under 'metrics' and 'plots' keys. """ # pylint: disable=no-value-for-parameter return evaluator.Evaluator( stage_name='EvaluateMetricsAndPlots', run_after=run_after, ptransform=EvaluateMetricsAndPlots( eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size, metrics_key=metrics_key, plots_key=plots_key, compute_confidence_intervals=compute_confidence_intervals, min_slice_size=min_slice_size, serialize=serialize, random_seed_for_testing=random_seed_for_testing))
def MetricsPlotsAndValidationsEvaluator( # pylint: disable=invalid-name eval_config: config_pb2.EvalConfig, eval_shared_model: Optional[ types.MaybeMultipleEvalSharedModels] = None, metrics_key: str = constants.METRICS_KEY, plots_key: str = constants.PLOTS_KEY, attributions_key: str = constants.ATTRIBUTIONS_KEY, run_after: str = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME, schema: Optional[schema_pb2.Schema] = None, random_seed_for_testing: Optional[int] = None) -> evaluator.Evaluator: """Creates an Evaluator for evaluating metrics and plots. Args: eval_config: Eval config. eval_shared_model: Optional shared model (single-model evaluation) or list of shared models (multi-model evaluation). Only required if there are metrics to be computed in-graph using the model. metrics_key: Name to use for metrics key in Evaluation output. plots_key: Name to use for plots key in Evaluation output. attributions_key: Name to use for attributions key in Evaluation output. run_after: Extractor to run after (None means before any extractors). schema: A schema to use for customizing metrics and plots. random_seed_for_testing: Seed to use for unit testing. Returns: Evaluator for evaluating metrics and plots. The output will be stored under 'metrics' and 'plots' keys. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) if eval_shared_models: eval_shared_models = {m.model_name: m for m in eval_shared_models} # pylint: disable=no-value-for-parameter return evaluator.Evaluator( stage_name='EvaluateMetricsAndPlots', run_after=run_after, ptransform=_EvaluateMetricsPlotsAndValidations( eval_config=eval_config, eval_shared_models=eval_shared_models, metrics_key=metrics_key, plots_key=plots_key, attributions_key=attributions_key, schema=schema, random_seed_for_testing=random_seed_for_testing))
def MetricsAndPlotsEvaluator( # pylint: disable=invalid-name eval_shared_model: types.EvalSharedModel, desired_batch_size: Optional[int] = None, metrics_key: Text = constants.METRICS_KEY, plots_key: Text = constants.PLOTS_KEY, run_after: Text = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME, num_bootstrap_samples: Optional[int] = 1, k_anonymization_count: int = 1, serialize=False) -> evaluator.Evaluator: """Creates an Evaluator for evaluating metrics and plots. Args: eval_shared_model: Shared model parameters for EvalSavedModel. desired_batch_size: Optional batch size for batching in Aggregate. metrics_key: Name to use for metrics key in Evaluation output. plots_key: Name to use for plots key in Evaluation output. run_after: Extractor to run after (None means before any extractors). num_bootstrap_samples: Number of bootstrap samples to draw. If more than 1, confidence intervals will be computed for metrics. Suggested value is at least 20. k_anonymization_count: If the number of examples in a specific slice is less than k_anonymization_count, then an error will be returned for that slice. This will be useful to ensure privacy by not displaying the aggregated data for smaller number of examples. serialize: If true, serialize the metrics to protos as part of the evaluation as well. Returns: Evaluator for evaluating metrics and plots. The output will be stored under 'metrics' and 'plots' keys. """ # pylint: disable=no-value-for-parameter return evaluator.Evaluator(stage_name='EvaluateMetricsAndPlots', run_after=run_after, ptransform=EvaluateMetricsAndPlots( eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size, metrics_key=metrics_key, plots_key=plots_key, num_bootstrap_samples=num_bootstrap_samples, k_anonymization_count=k_anonymization_count, serialize=serialize))
def MetricsAndPlotsEvaluator( # pylint: disable=invalid-name eval_config: config.EvalConfig, eval_shared_model: Optional[Union[types.EvalSharedModel, Dict[Text, types.EvalSharedModel]]] = None, metrics_key: Text = constants.METRICS_KEY, plots_key: Text = constants.PLOTS_KEY, run_after: Text = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME ) -> evaluator.Evaluator: """Creates an Evaluator for evaluating metrics and plots. Args: eval_config: Eval config. eval_shared_model: Optional shared model (single-model evaluation) or dict of shared models keyed by model name (multi-model evaluation). Only required if there are metrics to be computed in-graph using the model. metrics_key: Name to use for metrics key in Evaluation output. plots_key: Name to use for plots key in Evaluation output. run_after: Extractor to run after (None means before any extractors). Returns: Evaluator for evaluating metrics and plots. The output will be stored under 'metrics' and 'plots' keys. """ eval_shared_models = eval_shared_model if eval_shared_models: if not isinstance(eval_shared_model, dict): eval_shared_models = {'': eval_shared_model} # To maintain consistency between settings where single models are used, # always use '' as the model name regardless of whether a name is passed. if len(eval_shared_models) == 1: eval_shared_models = {'': list(eval_shared_models.values())[0]} # pylint: disable=no-value-for-parameter return evaluator.Evaluator( stage_name='EvaluateMetricsAndPlots', run_after=run_after, ptransform=_EvaluateMetricsAndPlots( eval_config=eval_config, eval_shared_models=eval_shared_models, metrics_key=metrics_key, plots_key=plots_key))
def AnalysisTableEvaluator( # pylint: disable=invalid-name key: str = constants.ANALYSIS_KEY, run_after: str = extractor.LAST_EXTRACTOR_STAGE_NAME, include: Optional[Union[Iterable[str], Dict[str, Any]]] = None, exclude: Optional[Union[Iterable[str], Dict[str, Any]]] = None) -> evaluator.Evaluator: """Creates an Evaluator for returning Extracts data for analysis. If both include and exclude are None then tfma.INPUT_KEY extracts will be excluded by default. Args: key: Name to use for key in Evaluation output. run_after: Extractor to run after (None means before any extractors). include: List or map of keys to include in output. Keys starting with '_' are automatically filtered out at write time. If a map of keys is passed then the keys and sub-keys that exist in the map will be included in the output. An empty dict behaves as a wildcard matching all keys or the value itself. Since matching on feature values is not currently supported, an empty dict must be used to represent the leaf nodes. For example: {'key1': {'key1-subkey': {}}, 'key2': {}}. exclude: List or map of keys to exclude from output. If a map of keys is passed then the keys and sub-keys that exist in the map will be excluded from the output. An empty dict behaves as a wildcard matching all keys or the value itself. Since matching on feature values is not currently supported, an empty dict must be used to represent the leaf nodes. For example: {'key1': {'key1-subkey': {}}, 'key2': {}}. Returns: Evaluator for collecting analysis data. The output is stored under the key 'analysis'. Raises: ValueError: If both include and exclude are used. """ # pylint: disable=no-value-for-parameter return evaluator.Evaluator(stage_name='EvaluateExtracts', run_after=run_after, ptransform=EvaluateExtracts(key=key, include=include, exclude=exclude))