Пример #1
0
            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    got_slice_key, got_metrics = got[0]
                    self.assertEqual(got_slice_key, ())
                    ndcg1_key = metric_types.MetricKey(
                        name='ndcg', sub_key=metric_types.SubKey(top_k=1))
                    ndcg2_key = metric_types.MetricKey(
                        name='ndcg', sub_key=metric_types.SubKey(top_k=2))
                    # Query1 (weight=1): (p=0.8, g=0.5) (p=0.2, g=1.0)
                    # Query2 (weight=2): (p=0.9, g=1.0) (p=0.5, g=0.5) (p=0.1, g=0.1)
                    # Query3 (weight=3): (p=0.9, g=1.0)
                    #
                    # DCG@1:  0.5, 1.0, 1.0
                    # NDCG@1: 0.5, 1.0, 1.0
                    # Average NDCG@1: (1 * 0.5 + 2 * 1.0 + 3 * 1.0) / (1 + 2 + 3) ~ 0.92
                    #
                    # DCG@2: (0.5 + 1.0/log(3), (1.0 + 0.5/log(3), (1.0)
                    # NDCG@2: (0.5 + 1.0/log(3)) / (1.0 + 0.5/log(3)),
                    #         (1.0 + 0.5/log(3)) / (1.0 + 0.5/log(3)),
                    #         1.0
                    # Average NDCG@2: (1 * 0.860 + 2 * 1.0 + 3 * 1.0) / (1 + 2 + 3) ~ 0.97
                    self.assertDictElementsAlmostEqual(got_metrics, {
                        ndcg1_key: 0.9166667,
                        ndcg2_key: 0.9766198
                    },
                                                       places=5)

                except AssertionError as err:
                    raise util.BeamAssertException(err)
Пример #2
0
def _macro_average_sub_keys(
    sub_key: Optional[metric_types.SubKey],
    class_weights: Dict[int, float]) -> Iterable[metric_types.SubKey]:
  """Returns sub-keys required in order to compute macro average sub-key.

  Args:
    sub_key: SubKey associated with macro_average or weighted_macro_average.
    class_weights: Class weights associated with sub-key.

  Raises:
    ValueError: If invalid sub-key passed or class weights required but not
      passed.
  """
  if not sub_key:
    if not class_weights:
      raise ValueError(
          'class_weights are required in order to compute macro average over '
          'all classes: sub_key={}, class_weights={}'.format(
              sub_key, class_weights))
    return [metric_types.SubKey(class_id=i) for i in class_weights.keys()]
  elif sub_key.top_k:
    return [metric_types.SubKey(k=i + 1) for i in range(sub_key.top_k)]
  else:
    raise ValueError('invalid sub_key for performing macro averaging: '
                     'sub_key={}'.format(sub_key))
Пример #3
0
def _create_sub_keys(
        spec: config.MetricsSpec) -> Optional[List[metric_types.SubKey]]:
    """Creates subkeys associated with spec."""
    sub_keys = None
    if spec.HasField('binarize'):
        sub_keys = []
        if spec.binarize.class_ids.values:
            for v in spec.binarize.class_ids.values:
                sub_keys.append(metric_types.SubKey(class_id=v))
        if spec.binarize.k_list.values:
            for v in spec.binarize.k_list.values:
                sub_keys.append(metric_types.SubKey(k=v))
        if spec.binarize.top_k_list.values:
            for v in spec.binarize.top_k_list.values:
                sub_keys.append(metric_types.SubKey(top_k=v))
        if spec.aggregate.micro_average:
            # Micro averaging is performed by flattening the labels and predictions
            # and treating them as independent pairs. This is done by default by most
            # metrics whenever binarization is not used. If micro-averaging and
            # binarization are used, then we need to create an empty subkey to ensure
            # the overall aggregate key is still computed. Note that the class_weights
            # should always be passed to all metric calculations to ensure they are
            # taken into account when flattening is required.
            sub_keys.append(None)
    return sub_keys  # pytype: disable=bad-return-type
Пример #4
0
def _create_sub_keys(
    spec: config.MetricsSpec
) -> Dict[Optional[metric_types.AggregationType],
          List[Optional[metric_types.SubKey]]]:
  """Creates sub keys per aggregation type."""
  result = {}
  if spec.HasField('binarize'):
    sub_keys = []
    if spec.binarize.class_ids.values:
      for v in spec.binarize.class_ids.values:
        sub_keys.append(metric_types.SubKey(class_id=v))
    if spec.binarize.k_list.values:
      for v in spec.binarize.k_list.values:
        sub_keys.append(metric_types.SubKey(k=v))
    if spec.binarize.top_k_list.values:
      for v in spec.binarize.top_k_list.values:
        sub_keys.append(metric_types.SubKey(top_k=v))
    if sub_keys:
      result[None] = sub_keys
  if spec.HasField('aggregate'):
    sub_keys = []
    for top_k in spec.aggregate.top_k_list.values:
      sub_keys.append(metric_types.SubKey(top_k=top_k))
    if not sub_keys:
      sub_keys = [None]
    result[_aggregation_type(spec)] = sub_keys
  return result if result else {None: [None]}
Пример #5
0
      def check_metrics(got):
        try:
          self.assertLen(got, 1)
          got_slice_key, got_metrics = got[0]
          example_count_key = metric_types.MetricKey(name='example_count')
          weighted_example_count_key = metric_types.MetricKey(
              name='weighted_example_count')
          label_key_class_0 = metric_types.MetricKey(
              name='mean_label', sub_key=metric_types.SubKey(class_id=0))
          label_key_class_1 = metric_types.MetricKey(
              name='mean_label', sub_key=metric_types.SubKey(class_id=1))
          label_key_class_2 = metric_types.MetricKey(
              name='mean_label', sub_key=metric_types.SubKey(class_id=2))
          self.assertEqual(got_slice_key, ())
          self.assertDictElementsAlmostEqual(
              got_metrics, {
                  example_count_key:
                      4,
                  weighted_example_count_key: (1.0 + 2.0 + 3.0 + 4.0),
                  label_key_class_0: (1 * 1.0 + 0 * 2.0 + 0 * 3.0 + 0 * 4.0) /
                                     (1.0 + 2.0 + 3.0 + 4.0),
                  label_key_class_1: (0 * 1.0 + 1 * 2.0 + 0 * 3.0 + 1 * 4.0) /
                                     (1.0 + 2.0 + 3.0 + 4.0),
                  label_key_class_2: (0 * 1.0 + 0 * 2.0 + 1 * 3.0 + 0 * 4.0) /
                                     (1.0 + 2.0 + 3.0 + 4.0)
              })

        except AssertionError as err:
          raise util.BeamAssertException(err)
Пример #6
0
def _metric_keys(metrics: Iterable[tf.keras.metrics.Metric], model_name: Text,
                 output_names: Iterable[Text]) -> List[metric_types.MetricKey]:
    """Returns metric keys for given metrics."""
    # We need to use the metric name to determine the associated output because
    # keras does not provide an API (see b/149780822). Keras names its metrics
    # using the following format:
    #   <output_name>_[weighted]_<metric_name>
    result = []
    for metric in metrics:
        sub_key = None
        if hasattr(metric, 'class_id') and metric.class_id is not None:
            sub_key = metric_types.SubKey(class_id=metric.class_id)
        elif hasattr(metric, 'top_k') and metric.top_k is not None:
            sub_key = metric_types.SubKey(top_k=metric.top_k)
        for output_name in output_names or []:
            if metric.name.startswith(output_name + '_'):
                # TODO(b/171559113): Output prefixes used to be added multiple times.
                # Remove this while loop after the last TF version with the issue is
                # no longer supported.
                name = metric.name
                while name.startswith(output_name + '_'):
                    name = name[len(output_name) + 1:]
                result.append(
                    metric_types.MetricKey(name=name,
                                           model_name=model_name,
                                           output_name=output_name,
                                           sub_key=sub_key))
                break
        else:
            result.append(
                metric_types.MetricKey(name=metric.name,
                                       model_name=model_name,
                                       sub_key=sub_key))
    return result
    def test_partition_slices_with_metric_sub_key(self):
        metrics = self._get_metrics()
        # Set sub_key.
        for metric in metrics:
            for kv in metric.metric_keys_and_values:
                kv.key.sub_key.MergeFrom(
                    metric_types.SubKey(class_id=0).to_proto())
        result = auto_slicing_util.partition_slices(
            metrics,
            metric_key=metric_types.MetricKey(
                name='accuracy', sub_key=metric_types.SubKey(class_id=0)),
            comparison_type='LOWER')
        self.assertCountEqual([s.slice_key for s in result[0]],
                              [(('age', '[1.0, 6.0)'), )])
        self.assertCountEqual([s.slice_key for s in result[1]],
                              [(('age', '[6.0, 12.0)'), ),
                               (('age', '[12.0, 18.0)'), ),
                               (('country', 'USA'), ),
                               (('country', 'USA'), ('age', '[12.0, 18.0)'))])

        result = auto_slicing_util.partition_slices(
            metrics,
            metric_key=metric_types.MetricKey(
                name='accuracy', sub_key=metric_types.SubKey(class_id=0)),
            comparison_type='HIGHER')
        self.assertCountEqual([s.slice_key for s in result[0]],
                              [(('age', '[12.0, 18.0)'), ),
                               (('country', 'USA'), ),
                               (('country', 'USA'), ('age', '[12.0, 18.0)'))])
        self.assertCountEqual([s.slice_key for s in result[1]],
                              [(('age', '[1.0, 6.0)'), ),
                               (('age', '[6.0, 12.0)'), )])
Пример #8
0
  def testToComputations(self):
    computations = metric_specs.to_computations(
        metric_specs.specs_from_metrics(
            {
                'output_name': [
                    tf.keras.metrics.MeanSquaredError('mse'),
                    calibration.MeanLabel('mean_label')
                ]
            },
            model_names=['model_name'],
            binarize=config.BinarizationOptions(class_ids={'values': [0, 1]}),
            aggregate=config.AggregationOptions(macro_average=True)),
        config.EvalConfig())

    keys = []
    for m in computations:
      for k in m.keys:
        if not k.name.startswith('_'):
          keys.append(k)
    self.assertLen(keys, 8)
    self.assertIn(metric_types.MetricKey(name='example_count'), keys)
    self.assertIn(
        metric_types.MetricKey(
            name='weighted_example_count',
            model_name='model_name',
            output_name='output_name'), keys)
    self.assertIn(
        metric_types.MetricKey(
            name='mse',
            model_name='model_name',
            output_name='output_name',
            sub_key=metric_types.SubKey(class_id=0)), keys)
    self.assertIn(
        metric_types.MetricKey(
            name='mse',
            model_name='model_name',
            output_name='output_name',
            sub_key=metric_types.SubKey(class_id=1)), keys)
    self.assertIn(
        metric_types.MetricKey(
            name='mse', model_name='model_name', output_name='output_name'),
        keys)
    self.assertIn(
        metric_types.MetricKey(
            name='mean_label',
            model_name='model_name',
            output_name='output_name',
            sub_key=metric_types.SubKey(class_id=0)), keys)
    self.assertIn(
        metric_types.MetricKey(
            name='mean_label',
            model_name='model_name',
            output_name='output_name',
            sub_key=metric_types.SubKey(class_id=1)), keys)
    self.assertIn(
        metric_types.MetricKey(
            name='mean_label',
            model_name='model_name',
            output_name='output_name'), keys)
Пример #9
0
 def testSubKeyStr(self):
     self.assertEqual(str(metric_types.SubKey(class_id=1)), 'classId:1')
     self.assertEqual(str(metric_types.SubKey(top_k=2)), 'topK:2')
     self.assertEqual(str(metric_types.SubKey(k=3)), 'k:3')
     with self.assertRaises(
             NotImplementedError,
             msg=
         ('A non-existent SubKey should be represented as None, not as ',
          'SubKey(None, None, None).')):
         str(metric_types.SubKey())
Пример #10
0
 def testPlotKeyFromProto(self):
     plot_keys = [
         metric_types.PlotKey(name=''),
         metric_types.PlotKey(name='',
                              model_name='model_name',
                              output_name='output_name',
                              sub_key=metric_types.SubKey(class_id=1)),
         metric_types.MetricKey(name='',
                                model_name='model_name',
                                output_name='output_name',
                                sub_key=metric_types.SubKey(top_k=2))
     ]
     for key in plot_keys:
         got_key = metric_types.PlotKey.from_proto(key.to_proto())
         self.assertEqual(key, got_key, '{} != {}'.format(key, got_key))
Пример #11
0
  def get_metrics_for_all_slices(
      self,
      output_name: Text = '',
      class_id: Optional[int] = None,
      k: Optional[int] = None,
      top_k: Optional[int] = None) -> Dict[Text, MetricsByTextKey]:
    """Get metric names and values for every slice.

    Args:
      output_name: The name of the output (optional, only used for multi-output
        models).
      class_id: Used with multi-class metrics to identify a specific class ID.
      k: Used with multi-class metrics to identify the kth predicted value.
      top_k: Used with multi-class and ranking metrics to identify top-k
        predicted values.

    Returns:
      Dictionary mapping slices to metric names and values.
    """

    if class_id or k or top_k:
      sub_key = str(metric_types.SubKey(class_id, k, top_k))
    else:
      sub_key = ''

    sliced_metrics = {}
    for slicing_metric in self.slicing_metrics:
      slice_name = slicing_metric[0]
      metrics = slicing_metric[1][output_name][sub_key]
      sliced_metrics[slice_name] = {
          metric_name: metric_value
          for metric_name, metric_value in metrics.items()
      }
    return sliced_metrics  # pytype: disable=bad-return-type
Пример #12
0
def _create_sub_keys(
        spec: config.MetricsSpec) -> Optional[List[metric_types.SubKey]]:
    """Creates subkeys associated with spec."""
    sub_keys = None
    if spec.HasField('binarize'):
        sub_keys = []
        if spec.binarize.class_ids:
            for v in spec.binarize.class_ids:
                sub_keys.append(metric_types.SubKey(class_id=v))
        if spec.binarize.k_list:
            for v in spec.binarize.k_list:
                sub_keys.append(metric_types.SubKey(k=v))
        if spec.binarize.top_k_list:
            for v in spec.binarize.top_k_list:
                sub_keys.append(metric_types.SubKey(top_k=v))
    return sub_keys
            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    got_slice_key, got_metrics = got[0]
                    self.assertEqual(got_slice_key, ())
                    self.assertLen(got_metrics, 1)
                    key = metric_types.MetricKey(
                        name='_binary_confusion_matrices_[-inf]',
                        sub_key=metric_types.SubKey(top_k=3))
                    self.assertIn(key, got_metrics)
                    got_matrices = got_metrics[key]
                    self.assertEqual(
                        got_matrices,
                        binary_confusion_matrices.Matrices(
                            thresholds=[float('-inf')],
                            tp=[2.0],
                            fp=[10.0],
                            tn=[6.0],
                            fn=[2.0],
                            tp_examples=[],
                            tn_examples=[],
                            fp_examples=[],
                            fn_examples=[]))

                except AssertionError as err:
                    raise util.BeamAssertException(err)
    def testTFMetricWithClassID(self):
        computation = tf_metric_wrapper.tf_metric_computations(
            [tf.keras.metrics.MeanSquaredError(name='mse')],
            sub_key=metric_types.SubKey(class_id=1),
            example_weighted=False)[0]

        example1 = {
            'labels': [2],
            'predictions': [0.5, 0.0, 0.5],
            'example_weights': [0.1]  # ignored, example_weighted=False
        }
        example2 = {
            'labels': [0],
            'predictions': [0.2, 0.5, 0.3],
            'example_weights': [0.2]  # ignored, example_weighted=False
        }
        example3 = {
            'labels': [1],
            'predictions': [0.2, 0.3, 0.5],
            'example_weights': [0.3]  # ignored, example_weighted=False
        }
        example4 = {
            'labels': [1],
            'predictions': [0.0, 0.9, 0.1],
            'example_weights': [0.4]  # ignored, example_weighted=False
        }

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (
                pipeline
                | 'Create' >> beam.Create(
                    [example1, example2, example3, example4])
                | 'Process' >> beam.Map(metric_util.to_standard_metric_inputs)
                | 'AddSlice' >> beam.Map(lambda x: ((), x))
                | 'Combine' >> beam.CombinePerKey(computation.combiner))

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    got_slice_key, got_metrics = got[0]
                    self.assertEqual(got_slice_key, ())
                    mse_key = metric_types.MetricKey(
                        name='mse',
                        sub_key=metric_types.SubKey(class_id=1),
                        example_weighted=False)
                    self.assertDictElementsAlmostEqual(got_metrics, {
                        mse_key: 0.1875,
                    })

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    got_slice_key, got_metrics = got[0]
                    self.assertEqual(got_slice_key, ())
                    if class_id:
                        sub_key = metric_types.SubKey(class_id=class_id)
                    else:
                        sub_key = metric_types.SubKey(
                            class_id=metric.get_config()['class_id'])
                    key = metric_types.MetricKey(name=metric.name,
                                                 sub_key=sub_key,
                                                 example_weighted=True)
                    self.assertDictElementsAlmostEqual(got_metrics,
                                                       {key: expected_value},
                                                       places=5)

                except AssertionError as err:
                    raise util.BeamAssertException(err)
Пример #16
0
 def testMetricKeyFromProto(self):
     metric_keys = [
         metric_types.MetricKey(name='metric_name'),
         metric_types.MetricKey(name='metric_name',
                                model_name='model_name',
                                output_name='output_name',
                                sub_key=metric_types.SubKey(class_id=1),
                                is_diff=True),
         metric_types.MetricKey(
             name='metric_name',
             model_name='model_name',
             output_name='output_name',
             sub_key=metric_types.SubKey(top_k=2),
             aggregation_type=metric_types.AggregationType(
                 micro_average=True))
     ]
     for key in metric_keys:
         got_key = metric_types.MetricKey.from_proto(key.to_proto())
         self.assertEqual(key, got_key, '{} != {}'.format(key, got_key))
Пример #17
0
    def get_attributions_for_slice(
            self,
            slice_name: slicer.SliceKeyType = (),
            metric_name: str = '',
            output_name: str = '',
            class_id: Optional[int] = None,
            k: Optional[int] = None,
            top_k: Optional[int] = None
    ) -> Union[AttributionsByFeatureKey, None]:
        """Get attribution features names and values for a slice.

    Args:
      slice_name: A tuple of the form (column, value), indicating which slice to
        get attributions from. Optional; if excluded, use overall slice.
      metric_name: Name of metric to get attributions for. Optional if only one
        metric used.
      output_name: The name of the output. Optional, only used for multi-output
        models.
      class_id: Used with multi-class models to identify a specific class ID.
      k: Used with multi-class models to identify the kth predicted value.
      top_k: Used with multi-class models to identify top-k attribution values.

    Returns:
      Dictionary containing feature keys and values for the specified slice.

    Raises:
      ValueError: If metric_name is required.
    """

        if class_id or k or top_k:
            sub_key = str(metric_types.SubKey(class_id, k, top_k))
        else:
            sub_key = ''

        def equals_slice_name(slice_key):
            if not slice_key:
                return not slice_name
            else:
                return slice_key == slice_name

        for sliced_attributions in self.attributions:
            slice_key = sliced_attributions[0]
            slice_val = sliced_attributions[1]
            if equals_slice_name(slice_key):
                if metric_name:
                    return slice_val[output_name][sub_key][metric_name]
                elif len(slice_val[output_name][sub_key]) == 1:
                    return list(slice_val[output_name][sub_key].values())[0]
                else:
                    raise ValueError(
                        'metric_name must be one of the following: {}'.format(
                            slice_val[output_name][sub_key].keys()))

        # if slice could not be found, return None
        return None
Пример #18
0
      def check_result(got):
        try:
          self.assertLen(got, 1)
          got_slice_key, got_plots = got[0]
          self.assertEqual(got_slice_key, ())
          self.assertLen(got_plots, 1)
          key = metric_types.PlotKey(
              name='_calibration_histogram_10000',
              sub_key=metric_types.SubKey(top_k=2),
              example_weighted=True)
          self.assertIn(key, got_plots)
          got_histogram = got_plots[key]
          self.assertLen(got_histogram, 5)
          self.assertEqual(
              got_histogram[0],
              calibration_histogram.Bucket(
                  bucket_id=0,
                  weighted_labels=3.0 + 4.0,
                  weighted_predictions=(2 * 1.0 * float('-inf') +
                                        2 * 2.0 * float('-inf') +
                                        2 * 3.0 * float('-inf') +
                                        2 * 4.0 * float('-inf') + -0.1 * 4.0),
                  weighted_examples=(1.0 * 2.0 + 2.0 * 2.0 + 3.0 * 2.0 +
                                     4.0 * 3.0)))
          self.assertEqual(
              got_histogram[1],
              calibration_histogram.Bucket(
                  bucket_id=2001,
                  weighted_labels=0.0 + 0.0,
                  weighted_predictions=0.2 + 3 * 0.2,
                  weighted_examples=1.0 + 3.0))
          self.assertEqual(
              got_histogram[2],
              calibration_histogram.Bucket(
                  bucket_id=5001,
                  weighted_labels=1.0 + 0.0 * 3.0,
                  weighted_predictions=0.5 * 1.0 + 0.5 * 3.0,
                  weighted_examples=1.0 + 3.0))
          self.assertEqual(
              got_histogram[3],
              calibration_histogram.Bucket(
                  bucket_id=8001,
                  weighted_labels=0.0 * 2.0 + 1.0 * 2.0,
                  weighted_predictions=0.8 * 2.0 + 0.8 * 2.0,
                  weighted_examples=2.0 + 2.0))
          self.assertEqual(
              got_histogram[4],
              calibration_histogram.Bucket(
                  bucket_id=10001,
                  weighted_labels=0.0 * 4.0,
                  weighted_predictions=1.1 * 4.0,
                  weighted_examples=4.0))

        except AssertionError as err:
          raise util.BeamAssertException(err)
Пример #19
0
def _verify_and_update_sub_key(model_name: Text, output_name: Text,
                               sub_key: metric_types.SubKey,
                               metric: _TFMetricOrLoss):
  """Verifies the multi-class metric key matches settings used by the metric."""
  if hasattr(metric, _CLASS_ID_KEY) and metric.class_id is not None:
    if sub_key and sub_key.class_id != metric.class_id:
      raise ValueError(
          '{} tf.keras.metric has class_id = {}, but the metric is being added '
          'using sub_key = {}: model_name={}, output_name={}'.format(
              metric.name, metric.class_id, sub_key, model_name, output_name))
    return metric_types.SubKey(class_id=metric.class_id)
  elif hasattr(metric, _TOP_K_KEY) and metric.top_k is not None:
    if sub_key and sub_key.top_k != metric.top_k:
      raise ValueError(
          '{} tf.keras.metric has top_k = {}, but the metric is being added '
          'using sub_key = {}: model_name={}, output_name={}'.format(
              metric.name, metric.top_k, sub_key, model_name, output_name))
    return metric_types.SubKey(top_k=metric.top_k)
  else:
    return sub_key
Пример #20
0
 def testMetricKeyStrForMetricKeyWithAllFields(self):
     self.assertEqual(
         str(
             metric_types.MetricKey(name='metric_name',
                                    model_name='model_name',
                                    output_name='output_name',
                                    sub_key=metric_types.SubKey(class_id=1),
                                    is_diff=True)),
         'name: "metric_name" output_name: "output_name" ' +
         'sub_key: { class_id: { value: 1 } } model_name: "model_name" ' +
         'is_diff: true')
Пример #21
0
      def check_result(got):
        try:
          self.assertLen(got, 1)
          got_slice_key, got_metrics = got[0]
          self.assertEqual(got_slice_key, ())
          key = metric_types.MetricKey(
              name=metric_name, sub_key=metric_types.SubKey(top_k=top_k))
          self.assertDictElementsAlmostEqual(
              got_metrics, {key: expected_value}, places=5)

        except AssertionError as err:
          raise util.BeamAssertException(err)
Пример #22
0
            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    got_slice_key, got_metrics = got[0]
                    self.assertEqual(got_slice_key, ())
                    mse_key = metric_types.MetricKey(
                        name='mse', sub_key=metric_types.SubKey(class_id=1))
                    self.assertDictElementsAlmostEqual(got_metrics, {
                        mse_key: 0.1875,
                    })

                except AssertionError as err:
                    raise util.BeamAssertException(err)
Пример #23
0
      def check_result(got):
        try:
          self.assertLen(got, 1)
          got_slice_key, got_plots = got[0]
          self.assertEqual(got_slice_key, ())
          self.assertLen(got_plots, 1)
          key = metric_types.PlotKey(
              name='_calibration_histogram_10000',
              sub_key=metric_types.SubKey(k=2),
              example_weighted=True)
          self.assertIn(key, got_plots)
          got_histogram = got_plots[key]
          self.assertLen(got_histogram, 5)
          self.assertEqual(
              got_histogram[0],
              calibration_histogram.Bucket(
                  bucket_id=0,
                  weighted_labels=0.0 * 4.0,
                  weighted_predictions=-0.2 * 4.0,
                  weighted_examples=4.0))
          self.assertEqual(
              got_histogram[1],
              calibration_histogram.Bucket(
                  bucket_id=1001,
                  weighted_labels=1.0 + 7 * 1.0,
                  weighted_predictions=0.1 + 7 * 0.1,
                  weighted_examples=1.0 + 7.0))
          self.assertEqual(
              got_histogram[2],
              calibration_histogram.Bucket(
                  bucket_id=4001,
                  weighted_labels=1.0 * 3.0 + 0.0 * 5.0,
                  weighted_predictions=0.4 * 3.0 + 0.4 * 5.0,
                  weighted_examples=3.0 + 5.0))
          self.assertEqual(
              got_histogram[3],
              calibration_histogram.Bucket(
                  bucket_id=7001,
                  weighted_labels=0.0 * 2.0 + 0.0 * 6.0,
                  weighted_predictions=0.7 * 2.0 + 0.7 * 6.0,
                  weighted_examples=2.0 + 6.0))
          self.assertEqual(
              got_histogram[4],
              calibration_histogram.Bucket(
                  bucket_id=10001,
                  weighted_labels=0.0 * 8.0,
                  weighted_predictions=1.05 * 8.0,
                  weighted_examples=8.0))

        except AssertionError as err:
          raise util.BeamAssertException(err)
    def testStandardMetricInputsWithClassIDToNumpy(self):
        example = metric_types.StandardMetricInputs(
            label={'output_name': np.array([2])},
            prediction={'output_name': np.array([0, 0.5, 0.3, 0.9])},
            example_weight={'output_name': np.array([1.0])})
        got_label, got_pred, got_example_weight = next(
            metric_util.to_label_prediction_example_weight(
                example,
                output_name='output_name',
                sub_key=metric_types.SubKey(class_id=2)))

        self.assertAllClose(got_label, np.array([1.0]))
        self.assertAllClose(got_pred, np.array([0.3]))
        self.assertAllClose(got_example_weight, np.array([1.0]))
Пример #25
0
    def testStandardMetricInputsWithTopKToNumpy(self):
        example = metric_types.StandardMetricInputs(
            {'output_name': np.array([1])},
            {'output_name': np.array([0, 0.5, 0.3, 0.9])},
            {'output_name': np.array([1.0])})
        got_label, got_pred, got_example_weight = (
            metric_util.to_label_prediction_example_weight(
                example,
                output_name='output_name',
                sub_key=metric_types.SubKey(top_k=2)))

        self.assertAllClose(got_label, np.array([0.0, 1.0]))
        self.assertAllClose(got_pred, np.array([0.9, 0.5]))
        self.assertAllClose(got_example_weight, np.array([1.0]))
    def testStandardMetricInputsWithTopKToNumpy(self):
        example = metric_types.StandardMetricInputs(
            label={'output_name': np.array([1])},
            prediction={'output_name': np.array([0, 0.5, 0.3, 0.9])},
            example_weight={'output_name': np.array([1.0])})
        iterable = metric_util.to_label_prediction_example_weight(
            example,
            output_name='output_name',
            sub_key=metric_types.SubKey(top_k=2))

        for expected_label, expected_prediction in zip((0.0, 1.0), (0.9, 0.5)):
            got_label, got_pred, got_example_weight = next(iterable)
            self.assertAllClose(got_label, np.array([expected_label]))
            self.assertAllClose(got_pred, np.array([expected_prediction]))
            self.assertAllClose(got_example_weight, np.array([1.0]))
Пример #27
0
def _ndcg(gain_key: str,
          top_k_list: Optional[List[int]] = None,
          name: str = NDCG_NAME,
          eval_config: Optional[config_pb2.EvalConfig] = None,
          model_names: Optional[List[str]] = None,
          output_names: Optional[List[str]] = None,
          sub_keys: Optional[List[metric_types.SubKey]] = None,
          example_weighted: bool = False,
          query_key: str = '') -> metric_types.MetricComputations:
  """Returns metric computations for NDCG."""
  if not query_key:
    raise ValueError('a query_key is required to use NDCG metric')
  sub_keys = [k for k in sub_keys if k is not None]
  if top_k_list:
    if sub_keys is None:
      sub_keys = []
    for k in top_k_list:
      if not any([sub_key.top_k == k for sub_key in sub_keys]):
        sub_keys.append(metric_types.SubKey(top_k=k))
  if not sub_keys or any([sub_key.top_k is None for sub_key in sub_keys]):
    raise ValueError(
        'top_k values are required to use NDCG metric: {}'.format(sub_keys))
  computations = []
  for model_name in model_names if model_names else ['']:
    for output_name in output_names if output_names else ['']:
      keys = []
      for sub_key in sub_keys:
        keys.append(
            metric_types.MetricKey(
                name,
                model_name=model_name,
                output_name=output_name,
                sub_key=sub_key,
                example_weighted=example_weighted))
      computations.append(
          metric_types.MetricComputation(
              keys=keys,
              preprocessor=metric_types.FeaturePreprocessor(
                  feature_keys=[query_key, gain_key]),
              combiner=_NDCGCombiner(
                  metric_keys=keys,
                  eval_config=eval_config,
                  model_name=model_name,
                  output_name=output_name,
                  example_weighted=example_weighted,
                  query_key=query_key,
                  gain_key=gain_key)))
  return computations
Пример #28
0
    def testStandardMetricInputsWithTopKAndAggregationTypeToNumpy(self):
        example = metric_types.StandardMetricInputs(
            labels={'output_name': np.array([1])},
            predictions={'output_name': np.array([0, 0.5, 0.3, 0.9])},
            example_weights={'output_name': np.array([1.0])})
        iterator = metric_util.to_label_prediction_example_weight(
            example,
            output_name='output_name',
            sub_key=metric_types.SubKey(top_k=2),
            aggregation_type=metric_types.AggregationType(micro_average=True))

        for expected_label, expected_prediction in zip((1.0, 0.0), (0.5, 0.9)):
            got_label, got_pred, got_example_weight = next(iterator)
            self.assertAllClose(got_label, np.array([expected_label]))
            self.assertAllClose(got_pred, np.array([expected_prediction]))
            self.assertAllClose(got_example_weight, np.array([1.0]))
Пример #29
0
    def testMacroAverage(self):
        metric_name = 'test'
        class_ids = [0, 1, 2]
        sub_keys = [metric_types.SubKey(class_id=i) for i in class_ids]
        sub_key_values = [0.1, 0.2, 0.3]
        computations = aggregation.macro_average(
            metric_name,
            sub_keys,
            eval_config=config_pb2.EvalConfig(),
            class_weights={
                0: 1.0,
                1: 1.0,
                2: 1.0
            })
        metric = computations[0]

        sub_metrics = {}
        for sub_key, value in zip(sub_keys, sub_key_values):
            key = metric_types.MetricKey(name=metric_name, sub_key=sub_key)
            sub_metrics[key] = value

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (pipeline
                      | 'Create' >> beam.Create([((), sub_metrics)])
                      | 'ComputeMetric' >>
                      beam.Map(lambda x: (x[0], metric.result(x[1]))))

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    got_slice_key, got_metrics = got[0]
                    self.assertEqual(got_slice_key, ())
                    key = metric.keys[0]
                    expected_value = (0.1 + 0.2 + 0.3) / 3.0
                    self.assertDictElementsAlmostEqual(got_metrics,
                                                       {key: expected_value},
                                                       places=5)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
Пример #30
0
            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    got_slice_key, got_metrics = got[0]
                    self.assertEqual(got_slice_key, ())
                    expected = {}
                    for name, value in expected_values.items():
                        sub_key = None
                        if '@' in name:
                            sub_key = metric_types.SubKey(
                                top_k=int(name.split('@')[1]))
                        key = metric_types.MetricKey(name=name,
                                                     sub_key=sub_key)
                        expected[key] = value
                    self.assertDictElementsAlmostEqual(got_metrics, expected)

                except AssertionError as err:
                    raise util.BeamAssertException(err)