def PredictExtractor( eval_config: config.EvalConfig, eval_shared_model: types.MaybeMultipleEvalSharedModels, desired_batch_size: Optional[int] = None) -> extractor.Extractor: """Creates an extractor for performing predictions. The extractor's PTransform loads and runs the serving saved_model(s) against every extract yielding a copy of the incoming extracts with an additional extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model inputs are searched for under tfma.FEATURES_KEY (keras only) or tfma.INPUT_KEY (if tfma.FEATURES_KEY is not set or the model is non-keras). If multiple models are used the predictions will be stored in a dict keyed by model name. Args: eval_config: Eval config. eval_shared_model: Shared model (single-model evaluation) or list of shared models (multi-model evaluation). desired_batch_size: Optional batch size. Returns: Extractor for extracting predictions. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) # pylint: disable=no-value-for-parameter return extractor.Extractor( stage_name=PREDICT_EXTRACTOR_STAGE_NAME, ptransform=_ExtractPredictions( eval_config=eval_config, eval_shared_models={m.model_name: m for m in eval_shared_models}, desired_batch_size=desired_batch_size))
def TFLitePredictExtractor( eval_config: config.EvalConfig, eval_shared_model: Union[types.EvalSharedModel, Dict[Text, types.EvalSharedModel]], desired_batch_size: Optional[int] = None) -> extractor.Extractor: """Creates an extractor for performing predictions on tflite models. The extractor's PTransform loads and interprets the tflite flatbuffer against every extract yielding a copy of the incoming extracts with an additional extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model inputs are searched for under tfma.FEATURES_KEY. If multiple models are used the predictions will be stored in a dict keyed by model name. Args: eval_config: Eval config. eval_shared_model: Shared model (single-model evaluation) or dict of shared models keyed by model name (multi-model evaluation). desired_batch_size: Optional batch size. Returns: Extractor for extracting predictions. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) # pylint: disable=no-value-for-parameter return extractor.Extractor( stage_name=TFLITE_PREDICT_EXTRACTOR_STAGE_NAME, ptransform=_ExtractTFLitePredictions( eval_config=eval_config, eval_shared_models={m.model_name: m for m in eval_shared_models}, desired_batch_size=desired_batch_size))
def TransformedFeaturesExtractor( eval_config: config.EvalConfig, eval_shared_model: types.MaybeMultipleEvalSharedModels, tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None, ) -> extractor.Extractor: """Creates an extractor for extracting transformed features. The extractor's PTransform loads the saved_model(s) invoking the preprocessing functions against every extract yielding a copy of the incoming extracts with a tfma.TRANSFORMED_FEATURES_KEY containing the output from the preprocessing functions. Args: eval_config: Eval config. eval_shared_model: Shared model (single-model evaluation) or list of shared models (multi-model evaluation). tensor_adapter_config: Tensor adapter config which specifies how to obtain tensors from the Arrow RecordBatch. If None, the tensors are matched (best effort) againt the inputs expected by the signature function. Returns: Extractor for extracting preprocessed features. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) # pylint: disable=no-value-for-parameter return extractor.Extractor( stage_name=_TRANSFORMED_FEATURES_EXTRACTOR_STAGE_NAME, ptransform=_ExtractTransformedFeatures( eval_config=eval_config, eval_shared_models={m.model_name: m for m in eval_shared_models}, tensor_adapter_config=tensor_adapter_config))
def BatchedPredictExtractor( eval_config: config.EvalConfig, eval_shared_model: types.MaybeMultipleEvalSharedModels, tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None, ) -> extractor.Extractor: """Creates an extractor for performing predictions over a batch. The extractor's PTransform loads and runs the serving saved_model(s) against every extract yielding a copy of the incoming extracts with an additional extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model inputs are searched for under tfma.FEATURES_KEY (keras only) or tfma.INPUT_KEY (if tfma.FEATURES_KEY is not set or the model is non-keras). If multiple models are used the predictions will be stored in a dict keyed by model name. Args: eval_config: Eval config. eval_shared_model: Shared model (single-model evaluation) or list of shared models (multi-model evaluation). tensor_adapter_config: Tensor adapter config which specifies how to obtain tensors from the Arrow RecordBatch. If None, we feed the raw examples to the model. Returns: Extractor for extracting predictions. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) # pylint: disable=no-value-for-parameter return extractor.Extractor( stage_name=BATCHED_PREDICT_EXTRACTOR_STAGE_NAME, ptransform=_ExtractBatchedPredictions( eval_config=eval_config, eval_shared_models={m.model_name: m for m in eval_shared_models}, tensor_adapter_config=tensor_adapter_config))
def MetricsAndPlotsEvaluator( # pylint: disable=invalid-name eval_config: config.EvalConfig, eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None, metrics_key: Text = constants.METRICS_KEY, plots_key: Text = constants.PLOTS_KEY, run_after: Text = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME ) -> evaluator.Evaluator: """Creates an Evaluator for evaluating metrics and plots. Args: eval_config: Eval config. eval_shared_model: Optional shared model (single-model evaluation) or list of shared models (multi-model evaluation). Only required if there are metrics to be computed in-graph using the model. metrics_key: Name to use for metrics key in Evaluation output. plots_key: Name to use for plots key in Evaluation output. run_after: Extractor to run after (None means before any extractors). Returns: Evaluator for evaluating metrics and plots. The output will be stored under 'metrics' and 'plots' keys. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) if eval_shared_models: eval_shared_models = {m.model_name: m for m in eval_shared_models} # pylint: disable=no-value-for-parameter return evaluator.Evaluator(stage_name='EvaluateMetricsAndPlots', run_after=run_after, ptransform=_EvaluateMetricsAndPlots( eval_config=eval_config, eval_shared_models=eval_shared_models, metrics_key=metrics_key, plots_key=plots_key))
def PredictionsExtractor( eval_config: config.EvalConfig, eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None, tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None, ) -> extractor.Extractor: """Creates an extractor for performing predictions over a batch. The extractor runs in two modes: 1) If one or more EvalSharedModels are provided The extractor's PTransform loads and runs the serving saved_model(s) against every extract yielding a copy of the incoming extracts with an additional extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model inputs are searched for under tfma.FEATURES_KEY (keras only) or tfma.INPUT_KEY (if tfma.FEATURES_KEY is not set or the model is non-keras). If multiple models are used the predictions will be stored in a dict keyed by model name. 2) If no EvalSharedModels are provided The extractor's PTransform uses the config's ModelSpec.prediction_key(s) to lookup the associated prediction values stored as features under the tfma.FEATURES_KEY in extracts. The resulting values are then added to the extracts under the key tfma.PREDICTIONS_KEY. Note that the use of a prediction_key in the ModelSpecs serve two use cases: (a) as a key into the dict of predictions output (option 1) (b) as the key for a pre-computed prediction stored as a feature (option 2) Args: eval_config: Eval config. eval_shared_model: Shared model (single-model evaluation) or list of shared models (multi-model evaluation) or None (predictions obtained from features). tensor_adapter_config: Tensor adapter config which specifies how to obtain tensors from the Arrow RecordBatch. The model's signature will be invoked with those tensors (matched by names). If None, an attempt will be made to create an adapter based on the model's input signature otherwise the model will be invoked with raw examples (assuming a signature of a single 1-D string tensor). Returns: Extractor for extracting predictions. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) if eval_shared_models: eval_shared_models = {m.model_name: m for m in eval_shared_models} # pylint: disable=no-value-for-parameter return extractor.Extractor( stage_name=_PREDICTIONS_EXTRACTOR_STAGE_NAME, ptransform=_ExtractPredictions( eval_config=eval_config, eval_shared_models=eval_shared_models, tensor_adapter_config=tensor_adapter_config))
def _model_types( eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] ) -> Optional[Set[Text]]: """Returns model types associated with given EvalSharedModels.""" eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) if not eval_shared_models: return None else: return set([m.model_type for m in eval_shared_models])
def _update_eval_config_with_defaults( eval_config: config.EvalConfig, eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] ) -> config.EvalConfig: """Returns updated eval config with default values.""" eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) maybe_add_baseline = eval_shared_models and len(eval_shared_models) == 2 return config.update_eval_config_with_defaults( eval_config, maybe_add_baseline=maybe_add_baseline)
def MetricsPlotsAndValidationsEvaluator( # pylint: disable=invalid-name eval_config: config.EvalConfig, eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None, metrics_key: Text = constants.METRICS_KEY, plots_key: Text = constants.PLOTS_KEY, attributions_key: Text = constants.ATTRIBUTIONS_KEY, run_after: Text = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME, schema: Optional[schema_pb2.Schema] = None, random_seed_for_testing: Optional[int] = None, tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None ) -> evaluator.Evaluator: """Creates an Evaluator for evaluating metrics and plots. Args: eval_config: Eval config. eval_shared_model: Optional shared model (single-model evaluation) or list of shared models (multi-model evaluation). Only required if there are metrics to be computed in-graph using the model. metrics_key: Name to use for metrics key in Evaluation output. plots_key: Name to use for plots key in Evaluation output. attributions_key: Name to use for attributions key in Evaluation output. run_after: Extractor to run after (None means before any extractors). schema: A schema to use for customizing metrics and plots. random_seed_for_testing: Seed to use for unit testing. tensor_adapter_config: Tensor adapter config which specifies how to obtain tensors from the Arrow RecordBatch. The model's signature will be invoked with those tensors (matched by names). If None, an attempt will be made to create an adapter based on the model's input signature otherwise the model will be invoked with raw examples (assuming a signature of a single 1-D string tensor). Returns: Evaluator for evaluating metrics and plots. The output will be stored under 'metrics' and 'plots' keys. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) if eval_shared_models: eval_shared_models = {m.model_name: m for m in eval_shared_models} # pylint: disable=no-value-for-parameter return evaluator.Evaluator( stage_name='EvaluateMetricsAndPlots', run_after=run_after, ptransform=_EvaluateMetricsPlotsAndValidations( eval_config=eval_config, eval_shared_models=eval_shared_models, metrics_key=metrics_key, plots_key=plots_key, attributions_key=attributions_key, schema=schema, random_seed_for_testing=random_seed_for_testing, tensor_adapter_config=tensor_adapter_config))
def BatchedPredictExtractor( eval_config: config.EvalConfig, eval_shared_model: types.MaybeMultipleEvalSharedModels, tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None, ) -> extractor.Extractor: eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) return extractor.Extractor( stage_name=BATCHED_PREDICT_EXTRACTOR_STAGE_NAME, ptransform=_ExtractBatchedPredictions( eval_config=eval_config, eval_shared_models={m.model_name: m for m in eval_shared_models}, tensor_adapter_config=tensor_adapter_config))
def PredictExtractor( eval_shared_model: types.MaybeMultipleEvalSharedModels, desired_batch_size: Optional[int] = None, materialize: Optional[bool] = True, eval_config: Optional[config.EvalConfig] = None ) -> extractor.Extractor: """Creates an Extractor for TFMAPredict. The extractor's PTransform loads and runs the eval_saved_model against every example yielding a copy of the Extracts input with an additional extract of type FeaturesPredictionsLabels keyed by tfma.FEATURES_PREDICTIONS_LABELS_KEY unless eval_config is not None in which case the features, predictions, and labels will be stored separately under tfma.FEATURES_KEY, tfma.PREDICTIONS_KEY, and tfma.LABELS_KEY respectively. Args: eval_shared_model: Shared model (single-model evaluation) or list of shared models (multi-model evaluation). desired_batch_size: Optional batch size for batching in Aggregate. materialize: True to call the FeatureExtractor to add MaterializedColumn entries for the features, predictions, and labels. eval_config: Eval config. Returns: Extractor for extracting features, predictions, labels, and other tensors during predict. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) # pylint: disable=no-value-for-parameter return extractor.Extractor( stage_name=PREDICT_EXTRACTOR_STAGE_NAME, ptransform=_TFMAPredict( eval_shared_models={m.model_name: m for m in eval_shared_models}, desired_batch_size=desired_batch_size, materialize=materialize, eval_config=eval_config))
def _make_sklearn_predict_extractor( eval_shared_model: tfma.EvalSharedModel, ) -> extractor.Extractor: """Creates an extractor for performing predictions using a scikit-learn model. The extractor's PTransform loads and runs the serving pickle against every extract yielding a copy of the incoming extracts with an additional extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model inputs are searched for under tfma.FEATURES_KEY. Args: eval_shared_model: Shared model (single-model evaluation). Returns: Extractor for extracting predictions. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) return extractor.Extractor( stage_name=_PREDICT_EXTRACTOR_STAGE_NAME, ptransform=_ExtractPredictions( # pylint: disable=no-value-for-parameter eval_shared_models={m.model_name: m for m in eval_shared_models}))
def run_model_analysis( eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None, eval_config: config.EvalConfig = None, data_location: Text = '', file_format: Text = 'tfrecords', output_path: Optional[Text] = None, extractors: Optional[List[extractor.Extractor]] = None, evaluators: Optional[List[evaluator.Evaluator]] = None, writers: Optional[List[writer.Writer]] = None, pipeline_options: Optional[Any] = None, slice_spec: Optional[List[slicer.SingleSliceSpec]] = None, write_config: Optional[bool] = True, compute_confidence_intervals: Optional[bool] = False, k_anonymization_count: int = 1, desired_batch_size: Optional[int] = None, random_seed_for_testing: Optional[int] = None ) -> Union[EvalResult, EvalResults]: """Runs TensorFlow model analysis. It runs a Beam pipeline to compute the slicing metrics exported in TensorFlow Eval SavedModel and returns the results. This is a simplified API for users who want to quickly get something running locally. Users who wish to create their own Beam pipelines can use the Evaluate PTransform instead. Args: eval_shared_model: Optional shared model (single-model evaluation) or list of shared models (multi-model evaluation). Only required if needed by default extractors, evaluators, or writers. eval_config: Eval config. data_location: The location of the data files. file_format: The file format of the data, can be either 'text' or 'tfrecords' for now. By default, 'tfrecords' will be used. output_path: The directory to output metrics and results to. If None, we use a temporary directory. extractors: Optional list of Extractors to apply to Extracts. Typically these will be added by calling the default_extractors function. If no extractors are provided, default_extractors (non-materialized) will be used. evaluators: Optional list of Evaluators for evaluating Extracts. Typically these will be added by calling the default_evaluators function. If no evaluators are provided, default_evaluators will be used. writers: Optional list of Writers for writing Evaluation output. Typically these will be added by calling the default_writers function. If no writers are provided, default_writers will be used. pipeline_options: Optional arguments to run the Pipeline, for instance whether to run directly. slice_spec: Deprecated (use EvalConfig). write_config: Deprecated (use EvalConfig). compute_confidence_intervals: Deprecated (use EvalConfig). k_anonymization_count: Deprecated (use EvalConfig). desired_batch_size: Optional batch size for batching in Predict. random_seed_for_testing: Provide for deterministic tests only. Returns: An EvalResult that can be used with the TFMA visualization functions. Raises: ValueError: If the file_format is unknown to us. """ _assert_tensorflow_version() if output_path is None: output_path = tempfile.mkdtemp() if not tf.io.gfile.exists(output_path): tf.io.gfile.makedirs(output_path) if eval_config is None: model_specs = [] eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) for shared_model in eval_shared_models: example_weight_key = shared_model.example_weight_key example_weight_keys = {} if example_weight_key and isinstance(example_weight_key, dict): example_weight_keys = example_weight_key example_weight_key = '' model_specs.append( config.ModelSpec( name=shared_model.model_name, example_weight_key=example_weight_key, example_weight_keys=example_weight_keys)) slicing_specs = None if slice_spec: slicing_specs = [s.to_proto() for s in slice_spec] options = config.Options() options.compute_confidence_intervals.value = compute_confidence_intervals options.k_anonymization_count.value = k_anonymization_count if not write_config: options.disabled_outputs.values.append(_EVAL_CONFIG_FILE) eval_config = config.EvalConfig( model_specs=model_specs, slicing_specs=slicing_specs, options=options) else: eval_config = _update_eval_config_with_defaults(eval_config, eval_shared_model) with beam.Pipeline(options=pipeline_options) as p: if file_format == 'tfrecords': data = p | 'ReadFromTFRecord' >> beam.io.ReadFromTFRecord( file_pattern=data_location, compression_type=beam.io.filesystem.CompressionTypes.AUTO) elif file_format == 'text': data = p | 'ReadFromText' >> beam.io.textio.ReadFromText(data_location) else: raise ValueError('unknown file_format: {}'.format(file_format)) # pylint: disable=no-value-for-parameter _ = ( data | 'ExtractEvaluateAndWriteResults' >> ExtractEvaluateAndWriteResults( eval_config=eval_config, eval_shared_model=eval_shared_model, display_only_data_location=data_location, display_only_file_format=file_format, output_path=output_path, extractors=extractors, evaluators=evaluators, writers=writers, desired_batch_size=desired_batch_size, random_seed_for_testing=random_seed_for_testing)) # pylint: enable=no-value-for-parameter if len(eval_config.model_specs) <= 1: return load_eval_result(output_path) else: results = [] for spec in eval_config.model_specs: results.append(load_eval_result(output_path, model_name=spec.name)) return EvalResults(results, constants.MODEL_CENTRIC_MODE)
def ExtractEvaluateAndWriteResults( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None, eval_config: config.EvalConfig = None, extractors: Optional[List[extractor.Extractor]] = None, evaluators: Optional[List[evaluator.Evaluator]] = None, writers: Optional[List[writer.Writer]] = None, output_path: Optional[Text] = None, display_only_data_location: Optional[Text] = None, display_only_file_format: Optional[Text] = None, slice_spec: Optional[List[slicer.SingleSliceSpec]] = None, write_config: Optional[bool] = True, compute_confidence_intervals: Optional[bool] = False, k_anonymization_count: int = 1, desired_batch_size: Optional[int] = None, random_seed_for_testing: Optional[int] = None) -> beam.pvalue.PDone: """PTransform for performing extraction, evaluation, and writing results. Users who want to construct their own Beam pipelines instead of using the lightweight run_model_analysis functions should use this PTransform. Example usage: eval_config = tfma.EvalConfig(slicing_specs=[...], metrics_specs=[...]) eval_shared_model = tfma.default_eval_shared_model( eval_saved_model_path=model_location, eval_config=eval_config) with beam.Pipeline(runner=...) as p: _ = (p | 'ReadData' >> beam.io.ReadFromTFRecord(data_location) | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults( eval_shared_model=eval_shared_model, eval_config=eval_config, ...)) result = tfma.load_eval_result(output_path=output_path) tfma.view.render_slicing_metrics(result) Note that the exact serialization format is an internal implementation detail and subject to change. Users should only use the TFMA functions to write and read the results. Args: examples: PCollection of input examples. Can be any format the model accepts (e.g. string containing CSV row, TensorFlow.Example, etc). eval_shared_model: Optional shared model (single-model evaluation) or list of shared models (multi-model evaluation). Only required if needed by default extractors, evaluators, or writers and for display purposes of the model path. eval_config: Eval config. extractors: Optional list of Extractors to apply to Extracts. Typically these will be added by calling the default_extractors function. If no extractors are provided, default_extractors (non-materialized) will be used. evaluators: Optional list of Evaluators for evaluating Extracts. Typically these will be added by calling the default_evaluators function. If no evaluators are provided, default_evaluators will be used. writers: Optional list of Writers for writing Evaluation output. Typically these will be added by calling the default_writers function. If no writers are provided, default_writers will be used. output_path: Path to output metrics and plots results. display_only_data_location: Optional path indicating where the examples were read from. This is used only for display purposes - data will not actually be read from this path. display_only_file_format: Optional format of the examples. This is used only for display purposes. slice_spec: Deprecated (use EvalConfig). write_config: Deprecated (use EvalConfig). compute_confidence_intervals: Deprecated (use EvalConfig). k_anonymization_count: Deprecated (use EvalConfig). desired_batch_size: Optional batch size for batching in Predict. random_seed_for_testing: Provide for deterministic tests only. Raises: ValueError: If EvalConfig invalid or matching Extractor not found for an Evaluator. Returns: PDone. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) if eval_config is None: model_specs = [] for shared_model in eval_shared_models: example_weight_key = shared_model.example_weight_key example_weight_keys = {} if example_weight_key and isinstance(example_weight_key, dict): example_weight_keys = example_weight_key example_weight_key = '' model_specs.append( config.ModelSpec( name=shared_model.model_name, example_weight_key=example_weight_key, example_weight_keys=example_weight_keys)) slicing_specs = None if slice_spec: slicing_specs = [s.to_proto() for s in slice_spec] options = config.Options() options.compute_confidence_intervals.value = compute_confidence_intervals options.k_anonymization_count.value = k_anonymization_count if not write_config: options.disabled_outputs.values.append(_EVAL_CONFIG_FILE) eval_config = config.EvalConfig( model_specs=model_specs, slicing_specs=slicing_specs, options=options) else: eval_config = _update_eval_config_with_defaults(eval_config, eval_shared_model) config.verify_eval_config(eval_config) if not extractors: extractors = default_extractors( eval_config=eval_config, eval_shared_model=eval_shared_model, materialize=False, desired_batch_size=desired_batch_size) if not evaluators: evaluators = default_evaluators( eval_config=eval_config, eval_shared_model=eval_shared_model, random_seed_for_testing=random_seed_for_testing) for v in evaluators: evaluator.verify_evaluator(v, extractors) if not writers: writers = default_writers( output_path=output_path, eval_shared_model=eval_shared_model) # pylint: disable=no-value-for-parameter _ = ( examples | 'InputsToExtracts' >> InputsToExtracts() | 'ExtractAndEvaluate' >> ExtractAndEvaluate( extractors=extractors, evaluators=evaluators) | 'WriteResults' >> WriteResults(writers=writers)) if _EVAL_CONFIG_FILE not in eval_config.options.disabled_outputs.values: data_location = '<user provided PCollection>' if display_only_data_location is not None: data_location = display_only_data_location file_format = '<unknown>' if display_only_file_format is not None: file_format = display_only_file_format model_locations = {} for v in (eval_shared_models or [None]): k = '' if v is None else v.model_name model_locations[k] = ('<unknown>' if v is None or v.model_path is None else v.model_path) _ = ( examples.pipeline | WriteEvalConfig(eval_config, output_path, data_location, file_format, model_locations)) # pylint: enable=no-value-for-parameter return beam.pvalue.PDone(examples.pipeline)
def default_extractors( # pylint: disable=invalid-name eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None, eval_config: config.EvalConfig = None, slice_spec: Optional[List[slicer.SingleSliceSpec]] = None, desired_batch_size: Optional[int] = None, materialize: Optional[bool] = True) -> List[extractor.Extractor]: """Returns the default extractors for use in ExtractAndEvaluate. Args: eval_shared_model: Shared model (single-model evaluation) or list of shared models (multi-model evaluation). Required unless the predictions are provided alongside of the features (i.e. model-agnostic evaluations). eval_config: Eval config. slice_spec: Deprecated (use EvalConfig). desired_batch_size: Optional batch size for batching in Predict. materialize: True to have extractors create materialized output. Raises: NotImplementedError: If eval_config contains mixed serving and eval models. """ if eval_config is not None: eval_config = _update_eval_config_with_defaults(eval_config, eval_shared_model) slice_spec = [ slicer.SingleSliceSpec(spec=spec) for spec in eval_config.slicing_specs ] if _is_legacy_eval(eval_shared_model, eval_config): # Backwards compatibility for previous add_metrics_callbacks implementation. return [ predict_extractor.PredictExtractor( eval_shared_model, desired_batch_size, materialize=materialize), slice_key_extractor.SliceKeyExtractor( slice_spec, materialize=materialize) ] elif eval_shared_model: model_types = _model_types(eval_shared_model) eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) if not model_types.issubset(constants.VALID_MODEL_TYPES): raise NotImplementedError( 'model type must be one of: {}. evalconfig={}'.format( str(constants.VALID_MODEL_TYPES), eval_config)) if model_types == set([constants.TF_LITE]): return [ input_extractor.InputExtractor(eval_config=eval_config), tflite_predict_extractor.TFLitePredictExtractor( eval_config=eval_config, eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size), slice_key_extractor.SliceKeyExtractor( slice_spec, materialize=materialize) ] elif constants.TF_LITE in model_types: raise NotImplementedError( 'support for mixing tf_lite and non-tf_lite models is not ' 'implemented: eval_config={}'.format(eval_config)) elif (eval_config and model_types == set([constants.TF_ESTIMATOR]) and all(eval_constants.EVAL_TAG in m.model_loader.tags for m in eval_shared_models)): return [ predict_extractor.PredictExtractor( eval_shared_model, desired_batch_size, materialize=materialize, eval_config=eval_config), slice_key_extractor.SliceKeyExtractor( slice_spec, materialize=materialize) ] elif (eval_config and constants.TF_ESTIMATOR in model_types and any(eval_constants.EVAL_TAG in m.model_loader.tags for m in eval_shared_models)): raise NotImplementedError( 'support for mixing eval and non-eval estimator models is not ' 'implemented: eval_config={}'.format(eval_config)) else: return [ input_extractor.InputExtractor(eval_config=eval_config), predict_extractor_v2.PredictExtractor( eval_config=eval_config, eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size), slice_key_extractor.SliceKeyExtractor( slice_spec, materialize=materialize) ] else: return [ input_extractor.InputExtractor(eval_config=eval_config), slice_key_extractor.SliceKeyExtractor( slice_spec, materialize=materialize) ]