Exemplo n.º 1
0
def SliceKeyExtractor(
    slice_spec: Optional[List[slicer.SingleSliceSpec]] = None,
    eval_config: Optional[config.EvalConfig] = None,
    materialize: Optional[bool] = True) -> extractor.Extractor:
  """Creates an extractor for extracting slice keys.

  The incoming Extracts must contain features stored under tfma.FEATURES_KEY
  and optionally under tfma.TRANSFORMED_FEATURES.

  The extractor's PTransform yields a copy of the Extracts input with an
  additional extract pointing at the list of SliceKeyType values keyed by
  tfma.SLICE_KEY_TYPES_KEY. If materialize is True then a materialized version
  of the slice keys will be added under the key tfma.MATERIALZED_SLICE_KEYS_KEY.

  Args:
    slice_spec: Deprecated (use EvalConfig).
    eval_config: Optional EvalConfig containing slicing_specs specifying the
      slices to slice the data into. If slicing_specs are empty, defaults to
      overall slice.
    materialize: True to add MaterializedColumn entries for the slice keys.

  Returns:
    Extractor for slice keys.
  """
  if slice_spec and eval_config:
    raise ValueError('slice_spec is deprecated, only use eval_config')
  if eval_config:
    slice_spec = [
        slicer.SingleSliceSpec(spec=spec) for spec in eval_config.slicing_specs
    ]
  if not slice_spec:
    slice_spec = [slicer.SingleSliceSpec()]
  return extractor.Extractor(
      stage_name=SLICE_KEY_EXTRACTOR_STAGE_NAME,
      ptransform=ExtractSliceKeys(slice_spec, eval_config, materialize))
def TransformedFeaturesExtractor(
    eval_config: config_pb2.EvalConfig,
    eval_shared_model: types.MaybeMultipleEvalSharedModels,
    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None,
) -> extractor.Extractor:
    """Creates an extractor for extracting transformed features.

  The extractor's PTransform loads the saved_model(s) invoking the preprocessing
  functions against every extract yielding a copy of the incoming extracts with
  a tfma.TRANSFORMED_FEATURES_KEY containing the output from the preprocessing
  functions.

  Args:
    eval_config: Eval config.
    eval_shared_model: Shared model (single-model evaluation) or list of shared
      models (multi-model evaluation).
    tensor_adapter_config: Tensor adapter config which specifies how to obtain
      tensors from the Arrow RecordBatch. If None, the tensors are matched (best
      effort) againt the inputs expected by the signature function.

  Returns:
    Extractor for extracting preprocessed features.
  """
    eval_shared_models = model_util.verify_and_update_eval_shared_models(
        eval_shared_model)

    # pylint: disable=no-value-for-parameter
    return extractor.Extractor(
        stage_name=_TRANSFORMED_FEATURES_EXTRACTOR_STAGE_NAME,
        ptransform=_ExtractTransformedFeatures(
            eval_config=eval_config,
            eval_shared_models={m.model_name: m
                                for m in eval_shared_models},
            tensor_adapter_config=tensor_adapter_config))
def TransformedFeaturesExtractor(
    eval_config: config_pb2.EvalConfig,
    eval_shared_model: types.MaybeMultipleEvalSharedModels,
) -> extractor.Extractor:
    """Creates an extractor for extracting transformed features.

  The extractor's PTransform loads the saved_model(s) invoking the preprocessing
  functions against every extract yielding a copy of the incoming extracts with
  a tfma.TRANSFORMED_FEATURES_KEY containing the output from the preprocessing
  functions.

  Args:
    eval_config: Eval config.
    eval_shared_model: Shared model (single-model evaluation) or list of shared
      models (multi-model evaluation).

  Returns:
    Extractor for extracting preprocessed features.
  """
    eval_shared_models = model_util.verify_and_update_eval_shared_models(
        eval_shared_model)

    # pylint: disable=no-value-for-parameter
    return extractor.Extractor(
        stage_name=_TRANSFORMED_FEATURES_EXTRACTOR_STAGE_NAME,
        ptransform=_ExtractTransformedFeatures(
            eval_config=eval_config,
            eval_shared_models={m.model_name: m
                                for m in eval_shared_models}))
Exemplo n.º 4
0
def PredictExtractor(eval_shared_model: types.EvalSharedModel,
                     desired_batch_size: Optional[int] = None,
                     materialize: Optional[bool] = True) -> extractor.Extractor:
  """Creates an Extractor for TFMAPredict.

  The extractor's PTransform loads and runs the eval_saved_model against every
  example yielding a copy of the Extracts input with an additional extract
  of type FeaturesPredictionsLabels keyed by
  tfma.FEATURES_PREDICTIONS_LABELS_KEY.

  Args:
    eval_shared_model: Shared model parameters for EvalSavedModel.
    desired_batch_size: Optional batch size for batching in Aggregate.
    materialize: True to call the FeatureExtractor to add MaterializedColumn
      entries for the features, predictions, and labels.

  Returns:
    Extractor for extracting features, predictions, labels, and other tensors
    during predict.
  """
  # pylint: disable=no-value-for-parameter
  return extractor.Extractor(
      stage_name=PREDICT_EXTRACTOR_STAGE_NAME,
      ptransform=_TFMAPredict(
          eval_shared_model=eval_shared_model,
          desired_batch_size=desired_batch_size,
          materialize=materialize))
Exemplo n.º 5
0
def BatchedPredictExtractor(
    eval_config: config.EvalConfig,
    eval_shared_model: types.MaybeMultipleEvalSharedModels,
    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None,
) -> extractor.Extractor:
  """Creates an extractor for performing predictions over a batch.

  The extractor's PTransform loads and runs the serving saved_model(s) against
  every extract yielding a copy of the incoming extracts with an additional
  extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model
  inputs are searched for under tfma.FEATURES_KEY (keras only) or tfma.INPUT_KEY
  (if tfma.FEATURES_KEY is not set or the model is non-keras). If multiple
  models are used the predictions will be stored in a dict keyed by model name.

  Args:
    eval_config: Eval config.
    eval_shared_model: Shared model (single-model evaluation) or list of shared
      models (multi-model evaluation).
    tensor_adapter_config: Tensor adapter config which specifies how to obtain
      tensors from the Arrow RecordBatch. If None, we feed the raw examples to
      the model.

  Returns:
    Extractor for extracting predictions.
  """
  eval_shared_models = model_util.verify_and_update_eval_shared_models(
      eval_shared_model)

  # pylint: disable=no-value-for-parameter
  return extractor.Extractor(
      stage_name=BATCHED_PREDICT_EXTRACTOR_STAGE_NAME,
      ptransform=_ExtractBatchedPredictions(
          eval_config=eval_config,
          eval_shared_models={m.model_name: m for m in eval_shared_models},
          tensor_adapter_config=tensor_adapter_config))
def AutoSliceKeyExtractor(  # pylint: disable=invalid-name
        statistics: statistics_pb2.DatasetFeatureStatisticsList,
        max_cross_size: int = 2,
        features_to_ignore: Optional[Set[Text]] = None,
        materialize: bool = True) -> extractor.Extractor:
    """Creates an extractor for automatically extracting slice keys.

  The incoming Extracts must contain a FeaturesPredictionsLabels extract keyed
  by tfma.FEATURES_PREDICTIONS_LABELS_KEY. Typically this will be obtained by
  calling the PredictExtractor.

  The extractor's PTransform yields a copy of the Extracts input with an
  additional extract pointing at the list of SliceKeyType values keyed by
  tfma.SLICE_KEY_TYPES_KEY. If materialize is True then a materialized version
  of the slice keys will be added under the key tfma.MATERIALZED_SLICE_KEYS_KEY.

  Args:
    statistics: Data statistics.
    max_cross_size: Maximum size feature crosses to consider.
    features_to_ignore: Set of features to ignore for slicing.
    materialize: True to add MaterializedColumn entries for the slice keys.

  Returns:
    Extractor for slice keys.
  """
    slice_spec = slice_spec_from_stats(statistics,
                                       max_cross_size=max_cross_size,
                                       features_to_ignore=features_to_ignore)
    return extractor.Extractor(stage_name=SLICE_KEY_EXTRACTOR_STAGE_NAME,
                               ptransform=_AutoExtractSliceKeys(
                                   slice_spec, statistics, materialize))
def AutoSliceKeyExtractor(
    statistics: statistics_pb2.DatasetFeatureStatisticsList,
    materialize: Optional[bool] = True
) -> extractor.Extractor:
  """Creates an extractor for automatically extracting slice keys.

  The incoming Extracts must contain a FeaturesPredictionsLabels extract keyed
  by tfma.FEATURES_PREDICTIONS_LABELS_KEY. Typically this will be obtained by
  calling the PredictExtractor.

  The extractor's PTransform yields a copy of the Extracts input with an
  additional extract pointing at the list of SliceKeyType values keyed by
  tfma.SLICE_KEY_TYPES_KEY. If materialize is True then a materialized version
  of the slice keys will be added under the key tfma.MATERIALZED_SLICE_KEYS_KEY.

  Args:
    statistics: Data statistics.
    materialize: True to add MaterializedColumn entries for the slice keys.

  Returns:
    Extractor for slice keys.
  """
  slice_spec = slice_spec_from_stats(statistics)
  return extractor.Extractor(
      stage_name=SLICE_KEY_EXTRACTOR_STAGE_NAME,
      ptransform=slice_key_extractor._ExtractSliceKeys(slice_spec, materialize))  # pylint: disable=protected-access
def ModelAgnosticExtractor(
    model_agnostic_config: agnostic_predict.ModelAgnosticConfig,
    desired_batch_size: Optional[int] = None) -> extractor.Extractor:
  """Creates an Extractor for ModelAgnosticEval.

  The extractor's PTransform creates and runs ModelAgnosticEval against every
  example yielding a copy of the Extracts input with an additional extract of
  type FeaturesPredictionsLabels keyed by tfma.FEATURES_PREDICTIONS_LABELS_KEY.

  Args:
    model_agnostic_config: The config to use to be able to generate Features,
      Predictions, and Labels dict. This can be done through explicit labeling
      of keys in the input tf.Example.
    desired_batch_size: Optional batch size for batching in Predict.

  Returns:
    Extractor for extracting features, predictions, and labels during predict.

  Raises:
    ValueError: Supplied ModelAgnosticConfig is invalid.
  """
  return extractor.Extractor(
      stage_name='ModelAgnosticExtractor',
      ptransform=ModelAgnosticExtract(
          model_agnostic_config=model_agnostic_config,
          desired_batch_size=desired_batch_size))
def PredictExtractor(
        eval_config: config.EvalConfig,
        eval_shared_model: types.MaybeMultipleEvalSharedModels,
        desired_batch_size: Optional[int] = None) -> extractor.Extractor:
    """Creates an extractor for performing predictions.

  The extractor's PTransform loads and runs the serving saved_model(s) against
  every extract yielding a copy of the incoming extracts with an additional
  extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model
  inputs are searched for under tfma.FEATURES_KEY (keras only) or tfma.INPUT_KEY
  (if tfma.FEATURES_KEY is not set or the model is non-keras). If multiple
  models are used the predictions will be stored in a dict keyed by model name.

  Args:
    eval_config: Eval config.
    eval_shared_model: Shared model (single-model evaluation) or list of shared
      models (multi-model evaluation).
    desired_batch_size: Optional batch size.

  Returns:
    Extractor for extracting predictions.
  """
    eval_shared_models = model_util.verify_and_update_eval_shared_models(
        eval_shared_model)

    # pylint: disable=no-value-for-parameter
    return extractor.Extractor(
        stage_name=PREDICT_EXTRACTOR_STAGE_NAME,
        ptransform=_ExtractPredictions(
            eval_config=eval_config,
            eval_shared_models={m.model_name: m
                                for m in eval_shared_models},
            desired_batch_size=desired_batch_size))
Exemplo n.º 10
0
def InputExtractor(eval_config: config.EvalConfig) -> extractor.Extractor:
  """Creates an extractor for extracting features, labels, and example weights.

  The extractor's PTransform parses tf.train.Example protos stored under the
  tfma.INPUT_KEY in the incoming extracts and adds the resulting features,
  labels, and example weights to the extracts under the keys tfma.FEATURES_KEY,
  tfma.LABELS_KEY, and tfma.EXAMPLE_WEIGHTS_KEY. If the eval_config contains a
  prediction_key and a corresponding key is found in the parse example, then
  predictions will also be extracted and stored under the tfma.PREDICTIONS_KEY.
  Any extracts that already exist will be merged with the values parsed by this
  extractor with this extractor's values taking precedence when duplicate keys
  are detected.

  Note that the use of a prediction_key in an eval_config serves two use cases:
    (1) as a key into the dict of predictions output by predict extractor
    (2) as the key for a pre-computed prediction stored as a feature.
  The InputExtractor can be used to handle case (2). These cases are meant to be
  exclusive (i.e. if approach (2) is used then a predict extractor would not be
  configured and if (1) is used then a key matching the predictons would not be
  stored in the features). However, if a feature key happens to match the same
  name as the prediction output key then both paths may be executed. In this
  case, the value stored here will be replaced by the predict extractor (though
  it will still be popped from the features).

  Args:
    eval_config: Eval config.

  Returns:
    Extractor for extracting features, labels, and example weights inputs.
  """
  # pylint: disable=no-value-for-parameter
  return extractor.Extractor(
      stage_name=INPUT_EXTRACTOR_STAGE_NAME,
      ptransform=_ExtractInputs(eval_config=eval_config))
def TFLitePredictExtractor(
    eval_config: config.EvalConfig,
    eval_shared_model: Union[types.EvalSharedModel,
                             Dict[Text, types.EvalSharedModel]],
    desired_batch_size: Optional[int] = None) -> extractor.Extractor:
  """Creates an extractor for performing predictions on tflite models.

  The extractor's PTransform loads and interprets the tflite flatbuffer against
  every extract yielding a copy of the incoming extracts with an additional
  extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model
  inputs are searched for under tfma.FEATURES_KEY. If multiple
  models are used the predictions will be stored in a dict keyed by model name.

  Args:
    eval_config: Eval config.
    eval_shared_model: Shared model (single-model evaluation) or dict of shared
      models keyed by model name (multi-model evaluation).
    desired_batch_size: Optional batch size.

  Returns:
    Extractor for extracting predictions.
  """
  eval_shared_models = model_util.verify_and_update_eval_shared_models(
      eval_shared_model)

  # pylint: disable=no-value-for-parameter
  return extractor.Extractor(
      stage_name=TFLITE_PREDICT_EXTRACTOR_STAGE_NAME,
      ptransform=_ExtractTFLitePredictions(
          eval_config=eval_config,
          eval_shared_models={m.model_name: m for m in eval_shared_models},
          desired_batch_size=desired_batch_size))
Exemplo n.º 12
0
def SliceKeyExtractor(
        slice_spec: Optional[List[slicer.SingleSliceSpec]] = None,
        materialize: Optional[bool] = True) -> extractor.Extractor:
    """Creates an extractor for extracting slice keys.

  The incoming Extracts must contain a FeaturesPredictionsLabels extract keyed
  by tfma.FEATURES_PREDICTIONS_LABELS_KEY. Typically this will be obtained by
  calling the PredictExtractor.

  The extractor's PTransform yields a copy of the Extracts input with an
  additional extract pointing at the list of SliceKeyType values keyed by
  tfma.SLICE_KEY_TYPES_KEY. If materialize is True then a materialized version
  of the slice keys will be added under the key tfma.MATERIALZED_SLICE_KEYS_KEY.

  Args:
    slice_spec: Optional list of SingleSliceSpec specifying the slices to slice
      the data into. If None, defaults to the overall slice.
    materialize: True to add MaterializedColumn entries for the slice keys.

  Returns:
    Extractor for slice keys.
  """
    if slice_spec is None:
        slice_spec = [slicer.SingleSliceSpec()]
    return extractor.Extractor(stage_name=SLICE_KEY_EXTRACTOR_STAGE_NAME,
                               ptransform=_ExtractSliceKeys(
                                   slice_spec, materialize))
def TFJSPredictExtractor(  # pylint: disable=invalid-name
    eval_config: config_pb2.EvalConfig,
    eval_shared_model: Union[types.EvalSharedModel, Dict[str,
                                                         types.EvalSharedModel]]
) -> extractor.Extractor:
  """Creates an extractor for performing predictions on tfjs models.

  The extractor's PTransform loads and interprets the tfjs model against
  every extract yielding a copy of the incoming extracts with an additional
  extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model
  inputs are searched for under tfma.FEATURES_KEY. If multiple
  models are used the predictions will be stored in a dict keyed by model name.

  Args:
    eval_config: Eval config.
    eval_shared_model: Shared model (single-model evaluation) or dict of shared
      models keyed by model name (multi-model evaluation).

  Returns:
    Extractor for extracting predictions.
  """
  eval_shared_models = model_util.verify_and_update_eval_shared_models(
      eval_shared_model)

  # pylint: disable=no-value-for-parameter
  return extractor.Extractor(
      stage_name=_TFJS_PREDICT_EXTRACTOR_STAGE_NAME,
      ptransform=_ExtractTFJSPredictions(
          eval_config=eval_config,
          eval_shared_models={m.model_name: m for m in eval_shared_models}))
Exemplo n.º 14
0
def FeatureExtractor(additional_extracts=None,
                     excludes=None,
                     extract_source=constants.FEATURES_PREDICTIONS_LABELS_KEY):
    # pylint: disable=no-value-for-parameter
    return extractor.Extractor(stage_name=FEATURE_EXTRACTOR_STAGE_NAME,
                               ptransform=_ExtractFeatures(
                                   additional_extracts=additional_extracts,
                                   excludes=excludes,
                                   source=extract_source))
def PredictionsExtractor(
    eval_config: config.EvalConfig,
    eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None,
    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None,
) -> extractor.Extractor:
  """Creates an extractor for performing predictions over a batch.

  The extractor runs in two modes:

  1) If one or more EvalSharedModels are provided

  The extractor's PTransform loads and runs the serving saved_model(s) against
  every extract yielding a copy of the incoming extracts with an additional
  extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model
  inputs are searched for under tfma.FEATURES_KEY (keras only) or tfma.INPUT_KEY
  (if tfma.FEATURES_KEY is not set or the model is non-keras). If multiple
  models are used the predictions will be stored in a dict keyed by model name.

  2) If no EvalSharedModels are provided

  The extractor's PTransform uses the config's ModelSpec.prediction_key(s)
  to lookup the associated prediction values stored as features under the
  tfma.FEATURES_KEY in extracts. The resulting values are then added to the
  extracts under the key tfma.PREDICTIONS_KEY.

  Note that the use of a prediction_key in the ModelSpecs serve two use cases:
    (a) as a key into the dict of predictions output (option 1)
    (b) as the key for a pre-computed prediction stored as a feature (option 2)

  Args:
    eval_config: Eval config.
    eval_shared_model: Shared model (single-model evaluation) or list of shared
      models (multi-model evaluation) or None (predictions obtained from
      features).
    tensor_adapter_config: Tensor adapter config which specifies how to obtain
      tensors from the Arrow RecordBatch. The model's signature will be invoked
      with those tensors (matched by names). If None, an attempt will be made to
      create an adapter based on the model's input signature otherwise the model
      will be invoked with raw examples (assuming a  signature of a single 1-D
      string tensor).

  Returns:
    Extractor for extracting predictions.
  """
  eval_shared_models = model_util.verify_and_update_eval_shared_models(
      eval_shared_model)
  if eval_shared_models:
    eval_shared_models = {m.model_name: m for m in eval_shared_models}

  # pylint: disable=no-value-for-parameter
  return extractor.Extractor(
      stage_name=_PREDICTIONS_EXTRACTOR_STAGE_NAME,
      ptransform=_ExtractPredictions(
          eval_config=eval_config,
          eval_shared_models=eval_shared_models,
          tensor_adapter_config=tensor_adapter_config))
Exemplo n.º 16
0
def FeatureExtractor(
        additional_extracts: Optional[List[Text]] = None,
        excludes: Optional[List[bytes]] = None,
        extract_source: Text = constants.FEATURES_PREDICTIONS_LABELS_KEY,
        extract_dest: Text = constants.MATERIALIZE_COLUMNS):
    # pylint: disable=no-value-for-parameter
    return extractor.Extractor(stage_name=FEATURE_EXTRACTOR_STAGE_NAME,
                               ptransform=_ExtractFeatures(
                                   additional_extracts=additional_extracts,
                                   excludes=excludes,
                                   source=extract_source,
                                   dest=extract_dest))
Exemplo n.º 17
0
def UnbatchExtractor() -> extractor.Extractor:
    """Creates an extractor for unbatching batched extracts.

  This extractor removes Arrow RecordBatch from the batched extract and outputs
  per-example extracts with the remaining keys. We assume that the remaining
  keys in the input extract contain list of objects (one per example).

  Returns:
    Extractor for unbatching batched extracts.
  """
    # pylint: disable=no-value-for-parameter
    return extractor.Extractor(stage_name=UNBATCH_EXTRACTOR_STAGE_NAME,
                               ptransform=_UnbatchInputs())
Exemplo n.º 18
0
def BatchedPredictExtractor(
    eval_config: config.EvalConfig,
    eval_shared_model: types.MaybeMultipleEvalSharedModels,
    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None,
) -> extractor.Extractor:
    eval_shared_models = model_util.verify_and_update_eval_shared_models(
        eval_shared_model)

    return extractor.Extractor(
        stage_name=BATCHED_PREDICT_EXTRACTOR_STAGE_NAME,
        ptransform=_ExtractBatchedPredictions(
            eval_config=eval_config,
            eval_shared_models={m.model_name: m
                                for m in eval_shared_models},
            tensor_adapter_config=tensor_adapter_config))
Exemplo n.º 19
0
    def testVerifyEvaluatorRaisesValueError(self):
        extractors = [
            extractor.Extractor(stage_name='ExtractorThatExists',
                                ptransform=None)
        ]
        evaluator.verify_evaluator(
            evaluator.Evaluator(stage_name='EvaluatorWithoutError',
                                run_after='ExtractorThatExists',
                                ptransform=None), extractors)

        with self.assertRaises(ValueError):
            evaluator.verify_evaluator(
                evaluator.Evaluator(stage_name='EvaluatorWithError',
                                    run_after='ExtractorThatDoesNotExist',
                                    ptransform=None), extractors)
def SqlSliceKeyExtractor(
        eval_config: config_pb2.EvalConfig) -> extractor.Extractor:
    """Creates an extractor for sql slice keys.

  This extractor extracts slices keys in a batch based on the SQL statement in
  the eval config.

  Args:
    eval_config: EvalConfig containing slicing_specs specifying the slices to
      slice the data into.

  Returns:
    Extractor for extracting slice keys in batch.
  """
    # pylint: disable=no-value-for-parameter
    return extractor.Extractor(stage_name=_SQL_SLICE_KEY_EXTRACTOR_STAGE_NAME,
                               ptransform=_ExtractSqlSliceKey(eval_config))
Exemplo n.º 21
0
def LabelsExtractor(eval_config: config_pb2.EvalConfig) -> extractor.Extractor:
    """Creates an extractor for extracting labels.

  The extractor's PTransform uses the config's ModelSpec.label_key(s) to lookup
  the associated label values stored as features under the tfma.FEATURES_KEY
  (and optionally tfma.TRANSFORMED_FEATURES_KEY) in extracts. The resulting
  values are then added to the extracts under the key tfma.LABELS_KEY.

  Args:
    eval_config: Eval config.

  Returns:
    Extractor for extracting labels.
  """
    # pylint: disable=no-value-for-parameter
    return extractor.Extractor(
        stage_name=_LABELS_EXTRACTOR_STAGE_NAME,
        ptransform=_ExtractLabels(eval_config=eval_config))
Exemplo n.º 22
0
def PredictExtractor(
        eval_shared_model: Union[types.EvalSharedModel,
                                 Dict[Text, types.EvalSharedModel]],
        desired_batch_size: Optional[int] = None,
        materialize: Optional[bool] = True,
        eval_config: Optional[config.EvalConfig] = None
) -> extractor.Extractor:
    """Creates an Extractor for TFMAPredict.

  The extractor's PTransform loads and runs the eval_saved_model against every
  example yielding a copy of the Extracts input with an additional extract
  of type FeaturesPredictionsLabels keyed by
  tfma.FEATURES_PREDICTIONS_LABELS_KEY unless eval_config is not None in which
  case the features, predictions, and labels will be stored separately under
  tfma.FEATURES_KEY, tfma.PREDICTIONS_KEY, and tfma.LABELS_KEY respectively.

  Args:
    eval_shared_model: Shared model (single-model evaluation) or dict of shared
      models keyed by model name (multi-model evaluation).
    desired_batch_size: Optional batch size for batching in Aggregate.
    materialize: True to call the FeatureExtractor to add MaterializedColumn
      entries for the features, predictions, and labels.
    eval_config: Eval config.

  Returns:
    Extractor for extracting features, predictions, labels, and other tensors
    during predict.
  """
    eval_shared_models = eval_shared_model
    if not isinstance(eval_shared_model, dict):
        eval_shared_models = {'': eval_shared_model}
    # To maintain consistency between settings where single models are used,
    # always use '' as the model name regardless of whether a name is passed.
    if len(eval_shared_models) == 1:
        eval_shared_models = {'': list(eval_shared_models.values())[0]}

    # pylint: disable=no-value-for-parameter
    return extractor.Extractor(stage_name=PREDICT_EXTRACTOR_STAGE_NAME,
                               ptransform=_TFMAPredict(
                                   eval_shared_models=eval_shared_models,
                                   desired_batch_size=desired_batch_size,
                                   materialize=materialize,
                                   eval_config=eval_config))
def FeaturesExtractor(eval_config: config.EvalConfig) -> extractor.Extractor:
    """Creates an extractor for extracting features.

  The extractor's PTransform extracts features from an Arrow RecordBatch stored
  under tfma.ARROW_RECORD_BATCH_KEY in the incoming extract and adds them to the
  output extract under the key tfma.FEATURES_KEY. Any extracts that already
  exist will be merged with the values parsed by this extractor with this
  extractor's values taking precedence when duplicate keys are detected.

  Args:
    eval_config: Eval config.

  Returns:
    Extractor for extracting features.
  """
    del eval_config
    # pylint: disable=no-value-for-parameter
    return extractor.Extractor(stage_name=_FEATURES_EXTRACTOR_STAGE_NAME,
                               ptransform=_ExtractFeatures())
Exemplo n.º 24
0
def FeaturesExtractor(  # pylint: disable=invalid-name
    eval_config: config_pb2.EvalConfig,
    tensor_representations: Optional[Mapping[
        Text, schema_pb2.TensorRepresentation]] = None) -> extractor.Extractor:
    """Creates an extractor for extracting features.

  The extractor acts as follows depending on the existence of certain keys
  within the incoming extracts:

    1) Extracts contains tfma.ARROW_RECORD_BATCH_KEY

    The features stored in the RecordBatch will be extracted and added to the
    output extract under the key tfma.FEATURES_KEY and the raw serialized inputs
    will be added under the tfma.INPUT_KEY. Any extracts that already exist will
    be merged with the values from the RecordBatch with the RecordBatch values
    taking precedence when duplicate keys are detected. The
    tfma.ARROW_RECORD_BATCH_KEY key will be removed from the output extracts.

    2) Extracts contains tfma.FEATURES_KEY (but not tfma.ARROW_RECORD_BATCH_KEY)

    The operation will be a no-op and the incoming extracts will be passed as is
    to the output.

    3) Extracts contains neither tfma.FEATURES_KEY | tfma.ARROW_RECORD_BATCH_KEY

    An exception will be raised.

  Args:
    eval_config: Eval config.
    tensor_representations: Optional tensor representations to use when parsing
      the data. If tensor_representations are not passed or a representation is
      not found for a given feature name a default representation will be used
      where possible, otherwise an exception will be raised.

  Returns:
    Extractor for extracting features.
  """
    del eval_config
    # pylint: disable=no-value-for-parameter
    return extractor.Extractor(stage_name=_FEATURES_EXTRACTOR_STAGE_NAME,
                               ptransform=_ExtractFeatures(
                                   tensor_representations or {}))
Exemplo n.º 25
0
def ExampleWeightsExtractor(
        eval_config: config.EvalConfig) -> extractor.Extractor:
    """Creates an extractor for extracting example weights.

  The extractor's PTransform uses the config's ModelSpec.example_weight_key(s)
  to lookup the associated example weight values stored as features under the
  tfma.FEATURES_KEY (and optionally tfma.TRANSFORMED_FEATURES_KEY) in extracts.
  The resulting values are then added to the extracts under the key
  tfma.EXAMPLE_WEIGHTS_KEY.

  Args:
    eval_config: Eval config.

  Returns:
    Extractor for extracting example weights.
  """
    # pylint: disable=no-value-for-parameter
    return extractor.Extractor(
        stage_name=_EXAMPLE_WEIGHTS_EXTRACTOR_STAGE_NAME,
        ptransform=_ExtractExampleWeights(eval_config=eval_config))
def AutoSliceKeyExtractor(  # pylint: disable=invalid-name
    statistics: Union[beam.pvalue.PCollection,
                      statistics_pb2.DatasetFeatureStatisticsList],
    categorical_uniques_threshold: int = 100,
    max_cross_size: int = 2,
    allowlist_features: Optional[Set[Text]] = None,
    denylist_features: Optional[Set[Text]] = None,
    materialize: bool = True) -> extractor.Extractor:
  """Creates an extractor for automatically extracting slice keys.

  The incoming Extracts must contain a FeaturesPredictionsLabels extract keyed
  by tfma.FEATURES_PREDICTIONS_LABELS_KEY. Typically this will be obtained by
  calling the PredictExtractor.

  The extractor's PTransform yields a copy of the Extracts input with an
  additional extract pointing at the list of SliceKeyType values keyed by
  tfma.SLICE_KEY_TYPES_KEY. If materialize is True then a materialized version
  of the slice keys will be added under the key tfma.MATERIALZED_SLICE_KEYS_KEY.

  Args:
    statistics: PCollection of data statistics proto or actual data statistics
      proto. Note that when passed a PCollection, it would be matrialized and
      passed as a side input.
    categorical_uniques_threshold: Maximum number of unique values beyond which
      we don't slice on that categorical feature.
    max_cross_size: Maximum size feature crosses to consider.
    allowlist_features: Set of features to be used for slicing.
    denylist_features: Set of features to ignore for slicing.
    materialize: True to add MaterializedColumn entries for the slice keys.

  Returns:
    Extractor for slice keys.
  """
  assert not allowlist_features or not denylist_features

  return extractor.Extractor(
      stage_name=SLICE_KEY_EXTRACTOR_STAGE_NAME,
      ptransform=_AutoExtractSliceKeys(statistics,
                                       categorical_uniques_threshold,
                                       max_cross_size, allowlist_features,
                                       denylist_features, materialize))
Exemplo n.º 27
0
def _make_sklearn_predict_extractor(
    eval_shared_model: tfma.EvalSharedModel, ) -> extractor.Extractor:
    """Creates an extractor for performing predictions using a scikit-learn model.

  The extractor's PTransform loads and runs the serving pickle against
  every extract yielding a copy of the incoming extracts with an additional
  extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model
  inputs are searched for under tfma.FEATURES_KEY.

  Args:
    eval_shared_model: Shared model (single-model evaluation).

  Returns:
    Extractor for extracting predictions.
  """
    eval_shared_models = model_util.verify_and_update_eval_shared_models(
        eval_shared_model)
    return extractor.Extractor(
        stage_name=_PREDICT_EXTRACTOR_STAGE_NAME,
        ptransform=_ExtractPredictions(  # pylint: disable=no-value-for-parameter
            eval_shared_models={m.model_name: m
                                for m in eval_shared_models}))
Exemplo n.º 28
0
def PredictExtractor(
        eval_shared_model: types.MaybeMultipleEvalSharedModels,
        desired_batch_size: Optional[int] = None,
        materialize: Optional[bool] = True,
        eval_config: Optional[config.EvalConfig] = None
) -> extractor.Extractor:
    """Creates an Extractor for TFMAPredict.

  The extractor's PTransform loads and runs the eval_saved_model against every
  example yielding a copy of the Extracts input with an additional extract
  of type FeaturesPredictionsLabels keyed by
  tfma.FEATURES_PREDICTIONS_LABELS_KEY unless eval_config is not None in which
  case the features, predictions, and labels will be stored separately under
  tfma.FEATURES_KEY, tfma.PREDICTIONS_KEY, and tfma.LABELS_KEY respectively.

  Args:
    eval_shared_model: Shared model (single-model evaluation) or list of shared
      models (multi-model evaluation).
    desired_batch_size: Optional batch size for batching in Aggregate.
    materialize: True to call the FeatureExtractor to add MaterializedColumn
      entries for the features, predictions, and labels.
    eval_config: Eval config.

  Returns:
    Extractor for extracting features, predictions, labels, and other tensors
    during predict.
  """
    eval_shared_models = model_util.verify_and_update_eval_shared_models(
        eval_shared_model)

    # pylint: disable=no-value-for-parameter
    return extractor.Extractor(
        stage_name=PREDICT_EXTRACTOR_STAGE_NAME,
        ptransform=_TFMAPredict(
            eval_shared_models={m.model_name: m
                                for m in eval_shared_models},
            desired_batch_size=desired_batch_size,
            materialize=materialize,
            eval_config=eval_config))
Exemplo n.º 29
0
def PredictExtractor(
        eval_shared_model: types.EvalSharedModel,
        desired_batch_size: Optional[int] = None) -> extractor.Extractor:
    """Creates an extractor for performing predictions.

  The extractor's PTransform loads and runs the serving saved_model against
  every extact yielding a copy of the incoming extracts with an additional
  extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model
  inputs are searched for under tfma.FEATURES_KEY (keras only) or tfma.INPUT_KEY
  (if tfma.FEATURES_KEY is not set or the model is non-keras).

  Args:
    eval_shared_model: Shared model parameters.
    desired_batch_size: Optional batch size for batching.

  Returns:
    Extractor for extracting predictions.
  """
    # pylint: disable=no-value-for-parameter
    return extractor.Extractor(stage_name=PREDICT_EXTRACTOR_STAGE_NAME,
                               ptransform=_ExtractPredictions(
                                   eval_shared_model=eval_shared_model,
                                   desired_batch_size=desired_batch_size))
Exemplo n.º 30
0
def PredictExtractor(
        eval_config: config.EvalConfig,
        eval_shared_model: Union[types.EvalSharedModel,
                                 Dict[Text, types.EvalSharedModel]],
        desired_batch_size: Optional[int] = None) -> extractor.Extractor:
    """Creates an extractor for performing predictions.

  The extractor's PTransform loads and runs the serving saved_model(s) against
  every extract yielding a copy of the incoming extracts with an additional
  extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model
  inputs are searched for under tfma.FEATURES_KEY (keras only) or tfma.INPUT_KEY
  (if tfma.FEATURES_KEY is not set or the model is non-keras). If multiple
  models are used the predictions will be stored in a dict keyed by model name.

  Args:
    eval_config: Eval config.
    eval_shared_model: Shared model (single-model evaluation) or dict of shared
      models keyed by model name (multi-model evaluation).
    desired_batch_size: Optional batch size.

  Returns:
    Extractor for extracting predictions.
  """
    eval_shared_models = eval_shared_model
    if not isinstance(eval_shared_model, dict):
        eval_shared_models = {'': eval_shared_model}
    # To maintain consistency between settings where single models are used,
    # always use '' as the model name regardless of whether a name is passed.
    if len(eval_shared_models) == 1:
        eval_shared_models = {'': list(eval_shared_models.values())[0]}

    # pylint: disable=no-value-for-parameter
    return extractor.Extractor(stage_name=PREDICT_EXTRACTOR_STAGE_NAME,
                               ptransform=_ExtractPredictions(
                                   eval_config=eval_config,
                                   eval_shared_models=eval_shared_models,
                                   desired_batch_size=desired_batch_size))