def EvaluateExtracts( # pylint: disable=invalid-name extracts, key = constants.ANALYSIS_KEY, include = None, exclude = None): """Creates Evaluation output for extracts. If both include and exclude are None then tfma.INPUT_KEY extracts will be excluded by default. Args: extracts: PCollection of Extracts. key: Name to use for key in Evaluation output. include: Keys of extracts to include in output. Keys starting with '_' are automatically filtered out at write time. exclude: Keys of extracts to exclude from output. Returns: Evaluation containing PCollection of Extracts. """ if include is None and exclude is None: exclude = [constants.INPUT_KEY] filtered = extracts if include or exclude: filtered = extracts | extractor.Filter(include=include, exclude=exclude) return {key: filtered}
def testIncludeFilterWithDict(self): with beam.Pipeline() as pipeline: got = ( pipeline | 'Create' >> beam.Create([{ 'a': 1, 'b': { 'b2': 2 }, 'c': { 'c2': { 'c21': 3, 'c22': 4 } }, 'd': { 'd2': 4 } }]) | 'Filter' >> extractor.Filter(include={ 'b': {}, 'c': { 'c2': { 'c21': {} } } })) def check_result(got): try: self.assertEqual(got, [{'b': {'b2': 2}, 'c': {'c2': {'c21': 3}}}]) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(got, check_result)
def testFilterRaisesValueError(self): with self.assertRaises(ValueError): with beam.Pipeline() as pipeline: _ = ( pipeline | 'Create' >> beam.Create([]) | 'Filter' >> extractor.Filter(include=['a'], exclude=['b']))
def EvaluateMetricsAndPlots( # pylint: disable=invalid-name extracts, eval_shared_model, desired_batch_size = None, metrics_key = constants.METRICS_KEY, plots_key = constants.PLOTS_KEY, num_bootstrap_samples = 1): """Evaluates metrics and plots using the EvalSavedModel. Args: extracts: PCollection of Extracts. The extracts MUST contain a FeaturesPredictionsLabels extract keyed by tfma.FEATURE_PREDICTION_LABELS_KEY and a list of SliceKeyType extracts keyed by tfma.SLICE_KEY_TYPES_KEY. Typically these will be added by calling the default_extractors function. eval_shared_model: Shared model parameters for EvalSavedModel including any additional metrics (see EvalSharedModel for more information on how to configure additional metrics). desired_batch_size: Optional batch size for batching in Aggregate. metrics_key: Name to use for metrics key in Evaluation output. plots_key: Name to use for plots key in Evaluation output. num_bootstrap_samples: Number of bootstrap samples to draw. If more than 1, confidence intervals will be computed for metrics. Suggested value is at least 20. Returns: Evaluation containing serialized protos keyed by 'metrics' and 'plots'. """ # pylint: disable=no-value-for-parameter metrics, plots = ( extracts | 'Filter' >> extractor.Filter(include=[ constants.FEATURES_PREDICTIONS_LABELS_KEY, constants.SLICE_KEY_TYPES_KEY ]) | 'ComputeMetricsAndPlots' >> ComputeMetricsAndPlots( eval_shared_model, desired_batch_size, num_bootstrap_samples=num_bootstrap_samples)) metrics, plots = ( (metrics, plots) | 'SerializeMetricsAndPlots' >> SerializeMetricsAndPlots( post_export_metrics=eval_shared_model.add_metrics_callbacks)) # pylint: enable=no-value-for-parameter return {metrics_key: metrics, plots_key: plots}
def testExludeFilter(self): with beam.Pipeline() as pipeline: got = (pipeline | 'Create' >> beam.Create([{ 'a': 1, 'b': 2, 'c': 3, 'd': 4 }]) | 'Filter' >> extractor.Filter(exclude=['b', 'd'])) def check_result(got): try: self.assertEqual(got, [{'a': 1, 'c': 3}]) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(got, check_result)
def EvaluateQueryBasedMetrics( # pylint: disable=invalid-name extracts: beam.pvalue.PCollection, prediction_key: Text, query_id: Text, combine_fns: List[beam.CombineFn], metrics_key: Text = constants.METRICS_KEY, ) -> evaluator.Evaluation: """Evaluates query-based metrics. Args: extracts: PCollection of Extracts. The extracts MUST contain a FeaturesPredictionsLabels extract keyed by tfma.FEATURE_PREDICTION_LABELS_KEY and a list of SliceKeyType extracts keyed by tfma.SLICE_KEY_TYPES_KEY. Typically these will be added by calling the default_extractors function. prediction_key: Key in predictions dictionary to use as the prediction (for sorting examples within the query). Use the empty string if the Estimator returns a predictions Tensor (not a dictionary). query_id: Key of query ID column in the features dictionary. combine_fns: List of query based metrics combine functions. metrics_key: Name to use for metrics key in Evaluation output. Returns: Evaluation containing metrics dictionaries keyed by 'metrics'. """ # pylint: disable=no-value-for-parameter metrics = ( extracts | 'Filter' >> extractor.Filter(include=[ constants.FEATURES_PREDICTIONS_LABELS_KEY, constants.SLICE_KEY_TYPES_KEY ]) | 'ComputeQueryBasedMetrics' >> ComputeQueryBasedMetrics( query_id=query_id, combine_fns=combine_fns, prediction_key=prediction_key)) # pylint: enable=no-value-for-parameter return {metrics_key: metrics}
def EvaluateExtracts( # pylint: disable=invalid-name extracts: beam.pvalue.PCollection, key: str = constants.ANALYSIS_KEY, include: Optional[Union[Iterable[str], Dict[str, Any]]] = None, exclude: Optional[Union[Iterable[str], Dict[str, Any]]] = None) -> evaluator.Evaluation: """Creates Evaluation output for extracts. If both include and exclude are None then tfma.INPUT_KEY extracts will be excluded by default. Args: extracts: PCollection of Extracts. key: Name to use for key in Evaluation output. include: List or map of keys to include in output. Keys starting with '_' are automatically filtered out at write time. If a map of keys is passed then the keys and sub-keys that exist in the map will be included in the output. An empty dict behaves as a wildcard matching all keys or the value itself. Since matching on feature values is not currently supported, an empty dict must be used to represent the leaf nodes. For example: {'key1': {'key1-subkey': {}}, 'key2': {}}. exclude: List or map of keys to exclude from output. If a map of keys is passed then the keys and sub-keys that exist in the map will be excluded from the output. An empty dict behaves as a wildcard matching all keys or the value itself. Since matching on feature values is not currently supported, an empty dict must be used to represent the leaf nodes. For example: {'key1': {'key1-subkey': {}}, 'key2': {}}. Returns: Evaluation containing PCollection of Extracts. """ if include is None and exclude is None: exclude = [constants.INPUT_KEY] filtered = extracts if include or exclude: filtered = extracts | extractor.Filter(include=include, exclude=exclude) return {key: filtered}
def EvaluateMetricsAndPlots( # pylint: disable=invalid-name extracts: beam.pvalue.PCollection, eval_shared_model: types.EvalSharedModel, desired_batch_size: Optional[int] = None, metrics_key: Text = constants.METRICS_KEY, plots_key: Text = constants.PLOTS_KEY, num_bootstrap_samples: Optional[int] = 1, k_anonymization_count: int = 1, serialize: bool = False) -> evaluator.Evaluation: """Evaluates metrics and plots using the EvalSavedModel. Args: extracts: PCollection of Extracts. The extracts MUST contain a FeaturesPredictionsLabels extract keyed by tfma.FEATURE_PREDICTION_LABELS_KEY and a list of SliceKeyType extracts keyed by tfma.SLICE_KEY_TYPES_KEY. Typically these will be added by calling the default_extractors function. eval_shared_model: Shared model parameters for EvalSavedModel including any additional metrics (see EvalSharedModel for more information on how to configure additional metrics). desired_batch_size: Optional batch size for batching in Aggregate. metrics_key: Name to use for metrics key in Evaluation output. plots_key: Name to use for plots key in Evaluation output. num_bootstrap_samples: Number of bootstrap samples to draw. If more than 1, confidence intervals will be computed for metrics. Suggested value is at least 20. k_anonymization_count: If the number of examples in a specific slice is less than k_anonymization_count, then an error will be returned for that slice. This will be useful to ensure privacy by not displaying the aggregated data for smaller number of examples. serialize: If true, serialize the metrics to protos as part of the evaluation as well. Returns: Evaluation containing metrics and plots dictionaries keyed by 'metrics' and 'plots'. """ # pylint: disable=no-value-for-parameter (metrics, plots), slices_count = ( extracts | 'Filter' >> extractor.Filter(include=[ constants.FEATURES_PREDICTIONS_LABELS_KEY, constants.SLICE_KEY_TYPES_KEY ]) | 'ComputeMetricsAndPlots' >> ComputeMetricsAndPlots( eval_shared_model, desired_batch_size, num_bootstrap_samples=num_bootstrap_samples)) if k_anonymization_count > 1: metrics = (metrics | 'FilterMetricsForSmallSlices' >> _FilterOutSlices( slices_count, k_anonymization_count)) plots = (plots | 'FilterPlotsForSmallSlices' >> _FilterOutSlices( slices_count, k_anonymization_count)) if serialize: metrics, plots = ( (metrics, plots) | 'SerializeMetricsAndPlots' >> metrics_and_plots_serialization.SerializeMetricsAndPlots( post_export_metrics=eval_shared_model.add_metrics_callbacks)) # pylint: enable=no-value-for-parameter return {metrics_key: metrics, plots_key: plots}
def ComputeMetricsAndPlots( # pylint: disable=invalid-name extracts: beam.pvalue.PCollection, eval_shared_model: types.EvalSharedModel, desired_batch_size: Optional[int] = None, compute_confidence_intervals: Optional[bool] = False, random_seed_for_testing: Optional[int] = None ) -> Tuple[beam.pvalue.DoOutputsTuple, beam.pvalue.PCollection]: """Computes metrics and plots using the EvalSavedModel. Args: extracts: PCollection of Extracts. The extracts MUST contain a FeaturesPredictionsLabels extract keyed by tfma.FEATURE_PREDICTIONS_LABELS_KEY and a list of SliceKeyType extracts keyed by tfma.SLICE_KEY_TYPES_KEY. Typically these will be added by calling the default_extractors function. eval_shared_model: Shared model parameters for EvalSavedModel including any additional metrics (see EvalSharedModel for more information on how to configure additional metrics). desired_batch_size: Optional batch size for batching in Aggregate. compute_confidence_intervals: Set to True to run metrics analysis over multiple bootstrap samples and compute uncertainty intervals. random_seed_for_testing: Provide for deterministic tests only. Returns: Tuple of Tuple[PCollection of (slice key, metrics), PCollection of (slice key, plot metrics)] and PCollection of (slice_key and its example count). """ # pylint: disable=no-value-for-parameter _ = ( extracts.pipeline | counter_util.IncrementMetricsComputationCounters( eval_shared_model.add_metrics_callbacks)) slices = ( extracts # Downstream computation only cares about FPLs, so we prune before fanout. # Note that fanout itself will prune the slice keys. # TODO(b/130032676, b/111353165): Prune FPLs to contain only the necessary # set for the calculation of post_export_metrics if possible. | 'PruneExtracts' >> extractor.Filter(include=[ constants.FEATURES_PREDICTIONS_LABELS_KEY, constants.SLICE_KEY_TYPES_KEY, constants.INPUT_KEY, ]) # Input: one example at a time, with slice keys in extracts. # Output: one fpl example per slice key (notice that the example turns # into n logical examples, references to which are replicated once # per applicable slice key). | 'FanoutSlices' >> slicer.FanoutSlices()) slices_count = ( slices | 'ExtractSliceKeys' >> beam.Keys() | 'CountPerSliceKey' >> beam.combiners.Count.PerElement()) aggregated_metrics = ( slices # Metrics are computed per slice key. # Output: Multi-outputs, a dict of slice key to computed metrics, and # plots if applicable. | 'ComputePerSliceMetrics' >> poisson_bootstrap.ComputeWithConfidenceIntervals( aggregate.ComputePerSliceMetrics, num_bootstrap_samples=(poisson_bootstrap.DEFAULT_NUM_BOOTSTRAP_SAMPLES if compute_confidence_intervals else 1), random_seed_for_testing=random_seed_for_testing, eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size) | 'SeparateMetricsAndPlots' >> beam.ParDo( _SeparateMetricsAndPlotsFn()).with_outputs( _SeparateMetricsAndPlotsFn.OUTPUT_TAG_PLOTS, main=_SeparateMetricsAndPlotsFn.OUTPUT_TAG_METRICS)) return (aggregated_metrics, slices_count)