Пример #1
0
def ComputePerSliceMetrics(  # pylint: disable=invalid-name
        slice_result: beam.pvalue.PCollection,
        eval_shared_model: types.EvalSharedModel,
        desired_batch_size: Optional[int] = None,
        compute_with_sampling: Optional[bool] = False,
        random_seed_for_testing: Optional[int] = None
) -> beam.pvalue.PCollection:
    """PTransform for computing, aggregating and combining metrics.

  Args:
    slice_result: Incoming PCollection consisting of slice key and extracts.
    eval_shared_model: Shared model parameters for EvalSavedModel.
    desired_batch_size: Optional batch size for batching in Aggregate.
    compute_with_sampling: True to compute with sampling.
    random_seed_for_testing: Seed to use for unit testing.

  Returns:
    PCollection of (slice key, dict of metrics).
  """
    # TODO(b/123516222): Remove this workaround per discussions in CL/227944001
    slice_result.element_type = beam.typehints.Any

    return (
        slice_result
        | 'CombinePerSlice' >> beam.CombinePerKey(
            _AggregateCombineFn(eval_shared_model=eval_shared_model,
                                desired_batch_size=desired_batch_size,
                                compute_with_sampling=compute_with_sampling,
                                seed_for_testing=random_seed_for_testing))
        | 'InterpretOutput' >> beam.ParDo(
            _ExtractOutputDoFn(eval_shared_model=eval_shared_model)))
Пример #2
0
def _ComputePerSlice(  # pylint: disable=invalid-name
        sliced_extracts: beam.pvalue.PCollection,
        computations: List[metric_types.MetricComputation],
        derived_computations: List[metric_types.DerivedMetricComputation],
        compute_with_sampling: Optional[bool] = False,
        random_seed_for_testing: Optional[int] = None
) -> beam.pvalue.PCollection:
    """PTransform for computing, aggregating and combining metrics and plots.

  Args:
    sliced_extracts: Incoming PCollection consisting of slice key and extracts.
    computations: List of MetricComputations.
    derived_computations: List of DerivedMetricComputations.
    compute_with_sampling: True to compute with sampling.
    random_seed_for_testing: Seed to use for unit testing.

  Returns:
    PCollection of (slice key, dict of metrics).
  """
    # TODO(b/123516222): Remove this workaround per discussions in CL/227944001
    sliced_extracts.element_type = beam.typehints.Any

    def convert_and_add_derived_values(
        sliced_results: Tuple[Text, Tuple[Any, ...]],
        derived_computations: List[metric_types.DerivedMetricComputation],
    ) -> Tuple[slicer.SliceKeyType, Dict[metric_types.MetricKey, Any]]:
        """Converts per slice tuple of dicts into single dict and adds derived."""
        result = {}
        for v in sliced_results[1]:
            result.update(v)
        for c in derived_computations:
            result.update(c.result(result))
        # Remove private metrics
        keys = list(result.keys())
        for k in keys:
            if k.name.startswith('_'):
                result.pop(k)
        return (sliced_results[0], result)

    # A fanout of 8 is used here to reduce stragglers that occur during the
    # merger of large datasets such as historgram buckets. This has little effect
    # on the msec profiles, but can impact the wall time and memory usage. If
    # experiencing significantly extended run times due to stragglers, try bumping
    # this to a larger number.
    return (sliced_extracts
            | 'CombinePerSliceKey' >> beam.CombinePerKey(
                _ComputationsCombineFn(
                    computations=computations,
                    compute_with_sampling=compute_with_sampling,
                    random_seed_for_testing=random_seed_for_testing)).
            with_hot_key_fanout(8)
            | 'ConvertAndAddDerivedValues' >> beam.Map(
                convert_and_add_derived_values, derived_computations))
def _ComputePerSlice(  # pylint: disable=invalid-name
        sliced_extracts: beam.pvalue.PCollection,
        computations: List[metric_types.MetricComputation],
        derived_computations: List[metric_types.DerivedMetricComputation],
        compute_with_sampling: Optional[bool] = False,
        random_seed_for_testing: Optional[int] = None
) -> beam.pvalue.PCollection:
    """PTransform for computing, aggregating and combining metrics and plots.

  Args:
    sliced_extracts: Incoming PCollection consisting of slice key and extracts.
    computations: List of MetricComputations.
    derived_computations: List of DerivedMetricComputations.
    compute_with_sampling: True to compute with sampling.
    random_seed_for_testing: Seed to use for unit testing.

  Returns:
    PCollection of (slice key, dict of metrics).
  """
    # TODO(b/123516222): Remove this workaround per discussions in CL/227944001
    sliced_extracts.element_type = beam.typehints.Any

    def convert_and_add_derived_values(
        sliced_results: Tuple[Text, Tuple[Any, ...]],
        derived_computations: List[metric_types.DerivedMetricComputation],
    ) -> Tuple[slicer.SliceKeyType, Dict[metric_types.MetricKey, Any]]:
        """Converts per slice tuple of dicts into single dict and adds derived."""
        result = {}
        for v in sliced_results[1]:
            result.update(v)
        for c in derived_computations:
            result.update(c.result(result))
        # Remove private metrics
        keys = list(result.keys())
        for k in keys:
            if k.name.startswith('_'):
                result.pop(k)
        return (sliced_results[0], result)

    return (sliced_extracts
            | 'CombinePerSliceKey' >> beam.CombinePerKey(
                _ComputationsCombineFn(
                    computations=computations,
                    compute_with_sampling=compute_with_sampling,
                    random_seed_for_testing=random_seed_for_testing))
            | 'ConvertAndAddDerivedValues' >> beam.Map(
                convert_and_add_derived_values, derived_computations))
Пример #4
0
def ComputePerSliceMetrics(  # pylint: disable=invalid-name
        slice_result: beam.pvalue.PCollection,
        eval_shared_model: types.EvalSharedModel,
        desired_batch_size: Optional[int] = None,
        compute_with_sampling: Optional[bool] = False,
        random_seed_for_testing: Optional[int] = None
) -> beam.pvalue.PCollection:
    """PTransform for computing, aggregating and combining metrics.

  Args:
    slice_result: Incoming PCollection consisting of slice key and extracts.
    eval_shared_model: Shared model parameters for EvalSavedModel.
    desired_batch_size: Optional batch size for batching in Aggregate.
    compute_with_sampling: True to compute with sampling.
    random_seed_for_testing: Seed to use for unit testing.

  Returns:
    PCollection of (slice key, dict of metrics).
  """
    # TODO(b/123516222): Remove this workaround per discussions in CL/227944001
    slice_result.element_type = beam.typehints.Any

    return (
        slice_result
        # _ModelLoadingIdentityFn loads the EvalSavedModel into memory
        # under a shared handle that can be used by subsequent steps.
        # Combiner lifting and producer-consumer fusion should ensure
        # that these steps run in the same process and memory space.
        # TODO(b/69566045): Remove _ModelLoadingIdentityFn and move model
        # loading to CombineFn.setup after it is available in Beam.
        | 'LoadModel' >> beam.ParDo(
            _ModelLoadingIdentityFn(eval_shared_model=eval_shared_model))
        | 'CombinePerSlice' >> beam.CombinePerKey(
            _AggregateCombineFn(eval_shared_model=eval_shared_model,
                                desired_batch_size=desired_batch_size,
                                compute_with_sampling=compute_with_sampling,
                                seed_for_testing=random_seed_for_testing))
        | 'InterpretOutput' >> beam.ParDo(
            _ExtractOutputDoFn(eval_shared_model=eval_shared_model)))
Пример #5
0
def ComputePerSliceMetrics(  # pylint: disable=invalid-name
    slice_result: beam.pvalue.PCollection,
    eval_shared_model: types.EvalSharedModel,
    desired_batch_size: Optional[int] = None,
    num_bootstrap_samples: Optional[int] = 1,
    random_seed_for_testing: Optional[int] = None,
) -> beam.pvalue.PCollection:
    """PTransform for computing, aggregating and combining metrics.

  Args:
    slice_result: Incoming PCollection consisting of slice key and extracts.
    eval_shared_model: Shared model parameters for EvalSavedModel.
    desired_batch_size: Optional batch size for batching in Aggregate.
    num_bootstrap_samples: Number of replicas to use in calculating uncertainty
      using bootstrapping.  If 1 is provided (default), aggregate metrics will
      be calculated with no uncertainty. If num_bootstrap_samples is > 0,
      multiple samples of each slice will be calculated using the Poisson
      bootstrap method. To calculate standard errors, num_bootstrap_samples
      should be 20 or more in order to provide useful data. More is better, but
      you pay a performance cost.
    random_seed_for_testing: Seed to use for unit testing, because
      nondeterministic tests stink. Each partition will use this value + i.

  Returns:
    DoOutputsTuple. The tuple entries are
    PCollection of (slice key, metrics) and
    PCollection of (slice key, plot metrics).
  """
    # TODO(b/123516222): Remove this workaround per discussions in CL/227944001
    slice_result.element_type = beam.typehints.Any

    if not num_bootstrap_samples:
        num_bootstrap_samples = 1
    # TODO(ckuhn): Cap the number of bootstrap samples at 20.
    if num_bootstrap_samples < 1:
        raise ValueError('num_bootstrap_samples should be > 0, got %d' %
                         num_bootstrap_samples)

    output_results = (
        slice_result
        | 'CombinePerSlice' >> beam.CombinePerKey(
            _AggregateCombineFn(eval_shared_model=eval_shared_model,
                                desired_batch_size=desired_batch_size,
                                compute_with_sampling=False))
        | 'InterpretOutput' >> beam.ParDo(
            _ExtractOutputDoFn(eval_shared_model=eval_shared_model)))
    if num_bootstrap_samples > 1:
        multicombine = []
        for i in range(num_bootstrap_samples):
            multicombine.append(
                slice_result
                | 'CombinePerSliceWithSamples%d' % i >> beam.CombinePerKey(
                    _AggregateCombineFn(eval_shared_model=eval_shared_model,
                                        desired_batch_size=desired_batch_size,
                                        compute_with_sampling=True,
                                        seed_for_testing=None
                                        if random_seed_for_testing is None else
                                        random_seed_for_testing + i))
                | 'InterpretSampledOutput%d' % i >> beam.ParDo(
                    _ExtractOutputDoFn(eval_shared_model=eval_shared_model)))
        output_results = (
            multicombine
            | 'FlattenBootstrapPartitions' >> beam.Flatten()
            | 'GroupBySlice' >> beam.GroupByKey()
            | 'MergeBootstrap' >> beam.ParDo(
                _MergeBootstrap(), beam.pvalue.AsIter(output_results)))
    # Separate metrics and plots.
    return (output_results
            | 'SeparateMetricsAndPlots' >> beam.ParDo(
                _SeparateMetricsAndPlotsFn()).with_outputs(
                    _SeparateMetricsAndPlotsFn.OUTPUT_TAG_PLOTS,
                    main=_SeparateMetricsAndPlotsFn.OUTPUT_TAG_METRICS))
def _ComputePerSlice(  # pylint: disable=invalid-name
        sliced_extracts: beam.pvalue.PCollection,
        computations: List[metric_types.MetricComputation],
        derived_computations: List[metric_types.DerivedMetricComputation],
        cross_slice_specs: Optional[Iterable[config.CrossSlicingSpec]] = None,
        compute_with_sampling: Optional[bool] = False,
        num_jackknife_samples: int = 0,
        skip_ci_metric_keys: Set[metric_types.MetricKey] = frozenset(),
        random_seed_for_testing: Optional[int] = None,
        baseline_model_name: Optional[Text] = None) -> beam.pvalue.PCollection:
    """PTransform for computing, aggregating and combining metrics and plots.

  Args:
    sliced_extracts: Incoming PCollection consisting of slice key and extracts.
    computations: List of MetricComputations.
    derived_computations: List of DerivedMetricComputations.
    cross_slice_specs: List of CrossSlicingSpec.
    compute_with_sampling: True to compute with bootstrap sampling. This allows
      _ComputePerSlice to be used to generate unsampled values from the whole
      data set, as well as bootstrap resamples, in which each element is treated
      as if it showed up p ~ poission(1) times.
    num_jackknife_samples: number of delete-d jackknife estimates to use in
      computing standard errors on metrics.
    skip_ci_metric_keys: List of metric keys for which to skip confidence
      interval computation.
    random_seed_for_testing: Seed to use for unit testing.
    baseline_model_name: Name for baseline model.

  Returns:
    PCollection of (slice key, dict of metrics).
  """
    # TODO(b/123516222): Remove this workaround per discussions in CL/227944001
    sliced_extracts.element_type = beam.typehints.Any

    def convert_and_add_derived_values(
        sliced_results: Tuple[slicer.SliceKeyType,
                              Tuple[metric_types.MetricsDict, ...]],
        derived_computations: List[metric_types.DerivedMetricComputation],
    ) -> Tuple[slicer.SliceKeyType, metric_types.MetricsDict]:
        """Converts per slice tuple of dicts into single dict and adds derived."""
        result = {}
        for v in sliced_results[1]:
            result.update(v)
        for c in derived_computations:
            result.update(c.result(result))
        # Remove private metrics
        keys = list(result.keys())
        for k in keys:
            if k.name.startswith('_') and not k.name.startswith('__'):
                result.pop(k)
        return sliced_results[0], result

    def add_diff_metrics(
        sliced_metrics: Tuple[Union[slicer.SliceKeyType,
                                    slicer.CrossSliceKeyType],
                              Dict[metric_types.MetricKey, Any]],
        baseline_model_name: Optional[Text],
    ) -> Tuple[slicer.SliceKeyType, Dict[metric_types.MetricKey, Any]]:
        """Add diff metrics if there is a baseline model."""

        result = copy.copy(sliced_metrics[1])

        if baseline_model_name:
            diff_result = {}
            for k, v in result.items():
                if k.model_name != baseline_model_name and k.make_baseline_key(
                        baseline_model_name) in result:
                    # plots will not be diffed.
                    if not isinstance(v, message.Message):
                        diff_result[k.make_diff_key()] = v - result[
                            k.make_baseline_key(baseline_model_name)]
            result.update(diff_result)

        return (sliced_metrics[0], result)

    combiner = _ComputationsCombineFn(
        computations=computations,
        compute_with_sampling=compute_with_sampling,
        random_seed_for_testing=random_seed_for_testing)
    if num_jackknife_samples:
        # We do not use the hotkey fanout hint used by the non-jacknife path because
        # the random jackknife partitioning naturally mitigates hot keys.
        sliced_combiner_outputs = (
            sliced_extracts
            | 'JackknifeCombinePerSliceKey' >>
            jackknife.JackknifeCombinePerKey(combiner, num_jackknife_samples))
    else:
        sliced_combiner_outputs = (
            sliced_extracts
            | 'CombinePerSliceKey' >> beam.CombinePerKey(combiner).
            with_hot_key_fanout(_COMBINE_PER_SLICE_KEY_HOT_KEY_FANOUT))

    sliced_derived_values_and_diffs = (
        sliced_combiner_outputs
        | 'ConvertAndAddDerivedValues' >> beam.Map(
            convert_and_add_derived_values, derived_computations)
        | 'AddCrossSliceMetrics' >> _AddCrossSliceMetrics(cross_slice_specs)  # pylint: disable=no-value-for-parameter
        | 'AddDiffMetrics' >> beam.Map(add_diff_metrics, baseline_model_name))

    if num_jackknife_samples:
        return (sliced_derived_values_and_diffs
                | 'MergeJackknifeSamples' >> jackknife.MergeJackknifeSamples(
                    num_jackknife_samples, skip_ci_metric_keys))
    else:
        return sliced_derived_values_and_diffs