Exemplo n.º 1
0
 def testGetByKeysWithPrefix(self):
     self.assertEqual({
         'all_classes': ['a', 'b'],
         'probabilities': [1]
     },
                      util.get_by_keys(
                          {
                              'predictions': {
                                  'output/all_classes': ['a', 'b'],
                                  'output/probabilities': [1],
                              },
                          }, ['predictions', 'output']))
     self.assertEqual({
         'all_classes': ['a', 'b'],
         'probabilities': [1]
     },
                      util.get_by_keys(
                          {
                              'predictions': {
                                  'model': {
                                      'output/all_classes': ['a', 'b'],
                                      'output/probabilities': [1],
                                  },
                              },
                          }, ['predictions', 'model', 'output']))
Exemplo n.º 2
0
    def testGetByKeysWitMultiLevel(self):
        self.assertEqual([1],
                         util.get_by_keys({'predictions': {
                             'output': [1]
                         }}, ['predictions', 'output']))

        self.assertEqual([1],
                         util.get_by_keys(
                             {'predictions': {
                                 'model': {
                                     'output': [1],
                                 },
                             }}, ['predictions', 'model', 'output']))
Exemplo n.º 3
0
 def _to_gains_example_weight(
     self,
     element: metric_types.StandardMetricInputs) -> Tuple[np.ndarray, float]:
   """Returns gains and example_weight sorted by prediction."""
   _, predictions, example_weight = next(
       metric_util.to_label_prediction_example_weight(
           element,
           eval_config=self._eval_config,
           model_name=self._model_name,
           output_name=self._output_name,
           flatten=False))  # pytype: disable=wrong-arg-types
   gains = util.get_by_keys(element.features, [self._gain_key])
   if gains.size != predictions.size:
     raise ValueError('expected {} to be same size as predictions {} != {}: '
                      'gains={}, metric_keys={}, '
                      'StandardMetricInputs={}'.format(self._gain_key,
                                                       gains.size,
                                                       predictions.size, gains,
                                                       self._metric_keys,
                                                       element))
   gains = gains.reshape(predictions.shape)
   # Ignore non-positive gains.
   if gains.max() <= 0:
     example_weight = 0.0
   return (gains[np.argsort(predictions)[::-1]], float(example_weight))
Exemplo n.º 4
0
 def add_input(
     self, accumulator: _MinLabelPositionAccumulator,
     element: metric_types.StandardMetricInputs
 ) -> _MinLabelPositionAccumulator:
     labels, predictions, example_weight = next(
         metric_util.to_label_prediction_example_weight(
             element,
             eval_config=self._eval_config,
             model_name=self._key.model_name,
             output_name=self._key.output_name,
             flatten=False,
             allow_none=True))  # pytype: disable=wrong-arg-types
     if self._label_key:
         labels = util.get_by_keys(element.features, [self._label_key])
     if labels is not None:
         min_label_pos = None
         for i, l in enumerate(labels[np.argsort(predictions)[::-1]]):
             if np.sum(l) > 0:
                 min_label_pos = i + 1  # Use 1-indexed positions
                 break
         if min_label_pos:
             accumulator.total_min_position += min_label_pos * float(
                 example_weight)
             accumulator.total_weighted_examples += float(example_weight)
     return accumulator
 def add_input(self, accumulator: float,
               element: metric_types.StandardMetricInputs) -> float:
   example_weight = element.example_weight or np.array(1.0)
   if isinstance(example_weight, dict) and self._key.model_name:
     value = util.get_by_keys(
         example_weight, [self._key.model_name], optional=True)
     if value is not None:
       example_weight = value
   if isinstance(example_weight, dict) and self._key.output_name:
     example_weight = util.get_by_keys(example_weight, [self._key.output_name],
                                       np.array(1.0))
   if isinstance(example_weight, dict):
     raise ValueError(
         'weighted_example_count cannot be calculated on a dict: {} = {}.\n\n'
         'This is most likely a configuration error (for multi-output models'
         'a separate metric is needed for each output).'.format(
             self._key, example_weight))
   return accumulator + np.sum(example_weight)
Exemplo n.º 6
0
 def _query(
         self, element: metric_types.StandardMetricInputs
 ) -> Union[float, int, Text]:
     query = util.get_by_keys(element.features, [self._query_key]).flatten()
     if query.size == 0 or not np.all(query == query[0]):
         raise ValueError(
             'missing query value or not all values are the same: value={}, '
             'metric_keys={}, StandardMetricInputs={}'.format(
                 query, self._metric_keys, element))
     return query[0]
 def key_by_query_key(extracts: types.Extracts,
                      query_key: Text) -> Tuple[Text, types.Extracts]:
     """Extract the query key from the extract and key by that."""
     value = metric_util.to_scalar(util.get_by_keys(
         extracts, [constants.FEATURES_KEY, query_key], optional=True),
                                   tensor_name=query_key)
     if value is None:
         missing_query_key_counter.inc()
         return ('', extracts)
     return ('{}'.format(value), extracts)
Exemplo n.º 8
0
 def _gain(self, i: metric_types.StandardMetricInputs) -> float:
   gain = util.get_by_keys(i.features, [self._gain_key])
   if gain.size == 1:
     scalar = metric_util.to_scalar(gain)
     if scalar is not None:
       return scalar
   raise ValueError('expected {} to be scalar, but instead it has size = {}: '
                    'value={}, metric_keys={}, '
                    'StandardMetricInputs={}'.format(self._gain_key, gain.size,
                                                     gain, self._metric_keys,
                                                     i))
Exemplo n.º 9
0
 def add_input(self, accumulator: Dict[Text, List[float]],
               attributions: Dict[Text, Any]) -> Dict[Text, List[float]]:
   if self._key.model_name:
     attributions = util.get_by_keys(attributions, [self._key.model_name])
   if self._key.output_name:
     attributions = util.get_by_keys(attributions, [self._key.output_name])
   for k, v in attributions.items():
     v = metric_util.to_numpy(v)
     if self._key.sub_key is not None:
       if self._key.sub_key.class_id is not None:
         v = _scores_by_class_id(self._key.sub_key.class_id, v)
       elif self._key.sub_key.k is not None:
         v = _scores_by_top_k(self._key.sub_key.k, v)
         v = np.array(v[self._key.sub_key.k - 1])
       elif self._key.sub_key.top_k is not None:
         v = _scores_by_top_k(self._key.sub_key.top_k, v)
     if k not in accumulator:
       accumulator[k] = [0.0] * v.size
     self._sum(accumulator[k], v)
   return accumulator
Exemplo n.º 10
0
 def optionally_get_by_keys(value: Any, keys: List[Text]) -> Any:
   if isinstance(value, dict):
     new_value = util.get_by_keys(value, keys, optional=True)
     if new_value is not None:
       return new_value
   return value
Exemplo n.º 11
0
def to_label_prediction_example_weight(
    inputs: metric_types.StandardMetricInputs,
    eval_config: Optional[config.EvalConfig] = None,
    model_name: Text = '',
    output_name: Text = '',
    sub_key: Optional[metric_types.SubKey] = None,
    allow_none: bool = False,
    array_size: Optional[int] = None,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Returns label, prediction, and example weight for use in calculations.

  Where applicable this function will perform model and output name lookups as
  well as any required class ID, top K, etc conversions. It will also apply
  prediction keys and label vocabularies given the necessary information is
  provided as part of the EvalConfig (or standard estimator based naming is
  used).

  If successful, the final output of calling this function will be a tuple of
  numpy arrays representing the label, prediction, and example weight
  respectively.

  Args:
    inputs: Standard metric inputs.
    eval_config: Eval config
    model_name: Optional model name (if multi-model evaluation).
    output_name: Optional output name (if multi-output model type).
    sub_key: Optional sub key.
    allow_none: True to allow labels or predictions with None values to be
      returned. The example weight will always be non-None.
    array_size: Verifies the prediction and labels are of the given size. If
      both array_size and sub_key.top_k is set, then array_size will be ignored
      and the size will be verified based on the top_k setting. The example
      weight will always be size 1.
  """
    def optionally_get_by_keys(value: Any, keys: List[Text]) -> Any:
        if isinstance(value, dict):
            new_value = util.get_by_keys(value, keys, optional=True)
            if new_value is not None:
                return new_value
        return value

    label = inputs.label
    prediction = inputs.prediction
    example_weight = inputs.example_weight
    if example_weight is None:
        example_weight = np.array(1.0)
    if model_name:
        prediction = util.get_by_keys(prediction, [model_name])
        # Labels and weights can optionally be keyed by model name.
        label = optionally_get_by_keys(label, [model_name])
        example_weight = optionally_get_by_keys(example_weight, [model_name])
    if output_name:
        prediction = util.get_by_keys(prediction, [output_name])
        # Labels and example weights can optionally be keyed by output name.
        label = optionally_get_by_keys(label, [output_name])
        example_weight = optionally_get_by_keys(example_weight, [output_name])
    prediction_key = ''
    if eval_config and eval_config.model_specs:
        for spec in eval_config.model_specs:
            if spec.name == model_name:
                prediction_key = spec.prediction_key
                break
    label, prediction = prepare_labels_and_predictions(label, prediction,
                                                       prediction_key)

    if sub_key is not None:
        if sub_key.class_id is not None:
            label, prediction = select_class_id(sub_key.class_id, label,
                                                prediction)
        elif sub_key.k is not None:
            label, prediction = select_top_k(sub_key.k, label, prediction)
            label = np.array([label[sub_key.k - 1]])
            prediction = np.array([prediction[sub_key.k - 1]])
        elif sub_key.top_k is not None:
            label, prediction = select_top_k(sub_key.top_k, label, prediction)

    example_weight = to_numpy(example_weight)

    if not allow_none:
        for txt, value in zip(('label', 'prediction', 'example_weight'),
                              (label, prediction, example_weight)):
            if value is None:
                raise ValueError(
                    'no value provided for {}: model_name={}, output_name={}, '
                    'sub_key={}, StandardMetricInputs={}\n\n'
                    'This may be caused by a configuration error (i.e. label, '
                    'prediction, and/or example weight keys were not specified) or an '
                    'error in the pipeline.'.format(txt, model_name,
                                                    output_name, sub_key,
                                                    inputs))

    for txt, value in zip(('label', 'prediction', 'example_weight'),
                          (label, prediction, example_weight)):
        if value is None:
            continue
        if txt == 'example_weight':
            size = 1
        elif array_size is None:
            continue
        elif sub_key and sub_key.top_k is not None:
            size = sub_key.top_k
        else:
            size = array_size
        if value.size != size:
            raise ValueError(
                'expected {} to be size = {}, but instead it has size = {}: '
                '{}={}, model_name={}, output_name={}, sub_key={}, '
                'StandardMetricInputs={}\n\nThis is most likely a configuration '
                'error (for multi-class models using binary classification '
                'metrics, a sub_key must be set).'.format(
                    txt, size, value.size, txt, value, model_name, output_name,
                    sub_key, inputs))

    # For consistency, make sure all outputs are arrays (i.e. convert scalars)
    if label is not None and not label.shape:
        label = label.reshape((1, ))
    if prediction is not None and not prediction.shape:
        prediction = prediction.reshape((1, ))
    if example_weight is not None and not example_weight.shape:
        example_weight = example_weight.reshape((1, ))
    return label, prediction, example_weight
Exemplo n.º 12
0
 def testGetByKeysMissingAndNonOptional(self):
     with self.assertRaisesRegexp(ValueError, 'not found'):
         util.get_by_keys({}, ['labels'])
     with self.assertRaisesRegexp(ValueError, 'not found'):
         util.get_by_keys({'labels': {}}, ['labels'])
Exemplo n.º 13
0
 def testGetByKeysMissingAndOptional(self):
     self.assertIsNone(util.get_by_keys({}, ['labels'], optional=True))
     self.assertIsNone(
         util.get_by_keys({'labels': {}}, ['labels'], optional=True))
Exemplo n.º 14
0
 def testGetByKeysMissingAndDefault(self):
     self.assertEqual('a',
                      util.get_by_keys({}, ['labels'], default_value='a'))
     self.assertEqual(
         'a', util.get_by_keys({'labels': {}}, ['labels'],
                               default_value='a'))
Exemplo n.º 15
0
 def testGetByKeys(self):
     self.assertEqual([1], util.get_by_keys({'labels': [1]}, ['labels']))
Exemplo n.º 16
0
 def testGetByKeysMissingSecondaryKey(self):
     with self.assertRaisesRegexp(ValueError, 'not found'):
         util.get_by_keys({'predictions': {
             'missing': [1]
         }}, ['predictions', 'output'])
Exemplo n.º 17
0
 def _query(
     self, i: metric_types.StandardMetricInputs
 ) -> Optional[Union[float, int, Text]]:
   return metric_util.to_scalar(
       util.get_by_keys(i.features, [self._query_key]))
Exemplo n.º 18
0
def to_label_prediction_example_weight(
    inputs: metric_types.StandardMetricInputs,
    eval_config: Optional[config.EvalConfig] = None,
    model_name: Text = '',
    output_name: Text = '',
    sub_key: Optional[metric_types.SubKey] = None,
    class_weights: Optional[Dict[int, float]] = None,
    flatten: bool = True,
    allow_none: bool = False,
) -> Iterable[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
    """Yields label, prediction, and example weights for use in calculations.

  Where applicable this function will perform model and output name lookups as
  well as any required class ID, top K, etc conversions. It will also apply
  prediction keys and label vocabularies given the necessary information is
  provided as part of the EvalConfig (or standard estimator based naming is
  used).

  If successful, the final output of calling this function will be a tuple of
  numpy arrays representing the label, prediction, and example weight
  respectively.

  Args:
    inputs: Standard metric inputs.
    eval_config: Eval config
    model_name: Optional model name (if multi-model evaluation).
    output_name: Optional output name (if multi-output model type).
    sub_key: Optional sub key.
    class_weights: Optional class weights to apply to multi-class / multi-label
      labels and predictions.
    flatten: True to flatten the final label and prediction outputs so that the
      yielded values are always arrays of size 1. For example, multi-class /
      multi-label outputs would be converted into label and prediction pairs
      that could then be processed by a binary classification metric in order to
      compute a micro average over all classes.
    allow_none: True to allow labels or predictions with None values to be
      returned. The example weight will always be non-None.
  """
    def optionally_get_by_keys(value: Any, keys: List[Text]) -> Any:
        if isinstance(value, dict):
            new_value = util.get_by_keys(value, keys, optional=True)
            if new_value is not None:
                return new_value
        return value

    label = inputs.label
    prediction = inputs.prediction
    example_weight = inputs.example_weight
    if example_weight is None:
        example_weight = np.array(1.0)
    if model_name:
        prediction = util.get_by_keys(prediction, [model_name])
        # Labels and weights can optionally be keyed by model name.
        label = optionally_get_by_keys(label, [model_name])
        example_weight = optionally_get_by_keys(example_weight, [model_name])
    if output_name:
        prediction = util.get_by_keys(prediction, [output_name])
        # Labels and example weights can optionally be keyed by output name.
        label = optionally_get_by_keys(label, [output_name])
        example_weight = optionally_get_by_keys(example_weight, [output_name])
    prediction_key = ''
    if eval_config and eval_config.model_specs:
        for spec in eval_config.model_specs:
            if spec.name == model_name:
                prediction_key = spec.prediction_key
                break
    label, prediction = prepare_labels_and_predictions(label, prediction,
                                                       prediction_key)

    if sub_key is not None:
        if sub_key.class_id is not None:
            label, prediction = select_class_id(sub_key.class_id, label,
                                                prediction)
        elif sub_key.k is not None:
            label, prediction = select_top_k(sub_key.k, label, prediction)
            label = np.array([label[sub_key.k - 1]])
            prediction = np.array([prediction[sub_key.k - 1]])
        elif sub_key.top_k is not None:
            label, prediction = select_top_k(sub_key.top_k, label, prediction)

    example_weight = to_numpy(example_weight)

    if not allow_none:
        for txt, value in zip(('label', 'prediction', 'example_weight'),
                              (label, prediction, example_weight)):
            if value is None:
                raise ValueError(
                    'no value provided for {}: model_name={}, output_name={}, '
                    'sub_key={}, StandardMetricInputs={}\n\n'
                    'This may be caused by a configuration error (i.e. label, '
                    'prediction, and/or example weight keys were not specified) or an '
                    'error in the pipeline.'.format(txt, model_name,
                                                    output_name, sub_key,
                                                    inputs))

    if example_weight is not None and example_weight.size != 1:
        raise ValueError(
            'expected example weight to be size = 1, but instead it has '
            'size = {}: example_weight={}, model_name={}, output_name={}, '
            'sub_key={}, StandardMetricInputs={}\n\nThis is most likely a '
            'configuration error.'.format(example_weight.size, example_weight,
                                          model_name, output_name, sub_key,
                                          inputs))

    # For consistency, make sure all outputs are arrays (i.e. convert scalars)
    if label is not None and not label.shape:
        label = label.reshape((1, ))
    if prediction is not None and not prediction.shape:
        prediction = prediction.reshape((1, ))
    if example_weight is not None and not example_weight.shape:
        example_weight = example_weight.reshape((1, ))

    if class_weights and prediction is not None:
        multiplier = [
            class_weights[i] if i in class_weights else 1.0
            for i in range(prediction.shape[-1])
        ]
        prediction = np.multiply(prediction, multiplier)
        if label is not None:
            if label.shape[-1] == 1:
                label = one_hot(label, prediction)
            label = np.multiply(label, multiplier)

    if (not flatten or (label is None and prediction is None)
            or (label is not None and prediction is not None
                and label.size == 1 and prediction.size == 1)):
        yield label, prediction, example_weight
    elif label is None:
        for p in prediction.flatten():
            yield label, np.array([p]), example_weight
    elif prediction is None:
        for l in label.flatten():
            yield np.array([l]), prediction, example_weight
    elif label.size == prediction.size:
        for l, p in zip(label.flatten(), prediction.flatten()):
            yield np.array([l]), np.array([p]), example_weight
    elif label.shape[-1] == 1:
        label = one_hot(label, prediction)
        for l, p in zip(label.flatten(), prediction.flatten()):
            yield np.array([l]), np.array([p]), example_weight
    else:
        raise ValueError(
            'unable to pair labels with predictions: labels={}, predictions={}, '
            'model_name={}, output_name={}, sub_key={}, StandardMetricInputs={} '
            '\n\nThis is most likely a configuration error.'.format(
                label, prediction, model_name, output_name, sub_key, inputs))
Exemplo n.º 19
0
def to_label_prediction_example_weight(
    inputs: metric_types.StandardMetricInputs,
    eval_config: Optional[config.EvalConfig] = None,
    model_name: Text = '',
    output_name: Text = '',
    sub_key: Optional[metric_types.SubKey] = None,
    aggregation_type: Optional[metric_types.AggregationType] = None,
    class_weights: Optional[Dict[int, float]] = None,
    flatten: bool = True,
    squeeze: bool = True,
    allow_none: bool = False,
) -> Iterator[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
  """Yields label, prediction, and example weights for use in calculations.

  Where applicable this function will perform model and output name lookups as
  well as any required class ID, top K, etc conversions. It will also apply
  prediction keys and label vocabularies given the necessary information is
  provided as part of the EvalConfig (or standard estimator based naming is
  used). The sparseness of labels will be inferred from the shapes of the labels
  and predictions (i.e. if the shapes are different then the labels will be
  assumed to be sparse).

  If successful, the final output of calling this function will be a tuple of
  numpy arrays representing the label, prediction, and example weight
  respectively. Labels and predictions will be returned in the same shape
  provided (default behavior) unless (1) flatten is True in which case a series
  of values (one per class ID) will be returned with last dimension of size 1 or
  (2) a sub_key is used in which case the last dimension may be re-shaped to
  match the new number of outputs (1 for class_id or k, top_k for top k with
  aggregation).

  Note that for top_k without aggregation, the non-top_k prediction values will
  be set to float('-inf'), but for top_k with aggregation the values will be
  truncated to only return the top k values.

  Examples:

    # default behavior
    #
    # Binary classification
    Input  : labels=[1] predictions=[0.6]
    Output : (np.array([1]), np.array([0.6]), np.array([1.0]))
    # Multi-class classification w/ sparse labels
    Input : labels=[2] predictions=[0.3, 0.6, 0.1]
    Output: (np.array([2]), np.array([0.3, 0.6, 0.1]), np.array([1.0]))
    # Multi-class / multi-label classification w/ dense labels
    Input  : labels=[0, 1, 1] predictions=[0.3, 0.6, 0.1]
    Output : (np.array([0, 1, 1]), np.array([0.3, 0.6, 0.1]), np.array([1.0]))

    # flatten=True
    #
    # Multi-class classification w/ sparse labels
    Input  : labels=[2], predictions=[0.3, 0.6, 0.1]
    Output : (np.array([0]), np.array([0.3]), np.array([1.0])),
             (np.array([0]), np.array([0.6]), np.array([1.0])),
             (np.array([1]), np.array([0.1]), np.array([1.0]))
    # Multi-class/multi-label classification w/ dense labels
    Input  : labels=[0, 0, 1], predictions=[0.3, 0.6, 0.1]
    Output : (np.array([0]), np.array([0.3]), np.array([1.0])),
             (np.array([0]), np.array([0.6]), np.array([1.0])),
             (np.array([1]), np.array([0.1]), np.array([1.0]))

    # sub_key.class_id=[2]
    #
    # Multi-class classification w/ sparse labels
    Input  : labels=[2] predictions=[0.3, 0.6, 0.1]
    Output : (np.array([1]), np.array([0.1]), np.array([1.0]))
    # Multi-class classification w/ dense labels
    Input  : labels=[0, 0, 1] predictions=[0.3, 0.6, 0.1]
    Output : (np.array([1]), np.array([0.1]), np.array([1.0]))

    # sub_key.top_k=2 and aggregation_type is None (i.e. binarization of top 2).
    #
    # Multi-class classification w/ sparse labels
    Input  : labels=[2] predictions=[0.3, 0.6, 0.1]
    Output : (np.array([0, 0, 1]), np.array([0.3, 0.6, -inf]), np.array([1.0]))
    # Multi-class classification w/ dense labels
    Input  : labels=[0, 0, 1] predictions=[0.3, 0.1, 0.6]
    Output : (np.array([0, 0, 1]), np.array([0.3, -inf, 0.6]), np.array([1.0]))

    # sub_key.top_k=2 and aggregation_type is not None (i.e. aggregate top 2).
    #
    # Multi-class classification w/ sparse labels
    Input  : labels=[2] predictions=[0.3, 0.6, 0.1]
    Output : (np.array([0, 1]), np.array([0.3, 0.6]), np.array([1.0]))
    # Multi-class classification w/ dense labels
    Input  : labels=[0, 0, 1] predictions=[0.3, 0.1, 0.6]
    Output : (np.array([0, 0]), np.array([0.3, 0.6]), np.array([1.0]))

    # sub_key.k=2 (i.e. binarization by choosing 2nd largest predicted value).
    #
    # Multi-class classification w/ sparse labels
    Input  : labels=[0] predictions=[0.3, 0.6, 0.1]
    Output : (np.array([1]), np.array([0.3]), np.array([1.0]))
    # Multi-class classification w/ dense labels
    Input  : labels=[0] predictions=[0.3]
    Output : (np.array([0]), np.array([0.3]), np.array([1.0]))

  Args:
    inputs: Standard metric inputs.
    eval_config: Eval config
    model_name: Optional model name (if multi-model evaluation).
    output_name: Optional output name (if multi-output model type).
    sub_key: Optional sub key.
    aggregation_type: Optional aggregation type.
    class_weights: Optional class weights to apply to multi-class / multi-label
      labels and predictions. If used, flatten must also be True.
    flatten: True to flatten the final label and prediction outputs so that the
      yielded values are always arrays of size 1. For example, multi-class /
      multi-label outputs would be converted into label and prediction pairs
      that could then be processed by a binary classification metric in order to
      compute a micro average over all classes.
    squeeze: True to squeeze any outputs that have rank > 1. This transforms
      outputs such as np.array([[1]]) to np.array([1]).
    allow_none: True to allow labels or predictions with None values to be
      returned. The example weight will always be non-None.

  Yields:
    Tuple of (label, prediction, example_weight).
  """

  def optionally_get_by_keys(value: Any, keys: List[Text]) -> Any:
    if isinstance(value, dict):
      new_value = util.get_by_keys(value, keys, optional=True)
      if new_value is not None:
        return new_value
    return value

  prediction_key = ''
  label_key = ''
  if eval_config and eval_config.model_specs:
    for spec in eval_config.model_specs:
      # To maintain consistency between settings where single models are used,
      # always use '' as the model name regardless of whether a name is passed.
      spec_name = spec.name if len(eval_config.model_specs) > 1 else ''
      if spec_name == model_name:
        prediction_key = spec.prediction_key
        label_key = spec.label_key
        break

  label = inputs.label
  if label_key:
    # This is to support a custom EvalSavedModel where the labels are a dict
    # but the keys are not output_names.
    label = optionally_get_by_keys(label, [label_key])
  prediction = inputs.prediction
  example_weight = inputs.example_weight
  if example_weight is None:
    example_weight = np.array(1.0, dtype=np.float32)  # tf-ranking needs float32
  if model_name:
    prediction = util.get_by_keys(prediction, [model_name])
    # Labels and weights can optionally be keyed by model name.
    label = optionally_get_by_keys(label, [model_name])
    example_weight = optionally_get_by_keys(example_weight, [model_name])
  if output_name:
    prediction = util.get_by_keys(prediction, [output_name])
    # Labels and example weights can optionally be keyed by output name.
    label = optionally_get_by_keys(label, [output_name])
    example_weight = optionally_get_by_keys(example_weight, [output_name])
  label, prediction = prepare_labels_and_predictions(label, prediction,
                                                     prediction_key)

  if not allow_none:
    for txt, value in zip(('label', 'prediction'), (label, prediction)):
      if value is None:
        raise ValueError(
            'no value provided for {}: model_name={}, output_name={}, '
            'sub_key={}, aggregation_type={}, StandardMetricInputs={}\n\n'
            'This may be caused by a configuration error (i.e. label, '
            'and/or prediction keys were not specified) or an '
            'error in the pipeline.'.format(txt, model_name, output_name,
                                            sub_key, aggregation_type, inputs))

  example_weight = to_numpy(example_weight)

  # Query based metrics group by a query_id which will result in the
  # example_weight being replicated once for each matching example in the group.
  # When this happens convert the example_weight back to a single value.
  if example_weight.size > 1:
    example_weight = example_weight.flatten()
    if not np.all(example_weight == example_weight[0]):
      raise ValueError(
          'if example_weight size > 0, the values must all be the same: '
          'example_weight={} model_name={}, output_name={}, '
          'sub_key={}, aggregation_type={}, StandardMetricInputs={}'
          '\n\nThis is most likely a configuration error.'.format(
              example_weight, model_name, output_name, sub_key,
              aggregation_type, inputs))
    example_weight = np.array(example_weight[0])

  if sub_key is not None:
    if sub_key.class_id is not None:
      label, prediction = select_class_id(sub_key.class_id, label, prediction)
    elif sub_key.k is not None:
      indices = top_k_indices(sub_key.k, prediction)
      if len(prediction.shape) == 1:
        indices = indices[0]  # 1D
      else:
        # 2D, take kth values
        indices = (indices[0][0::sub_key.k], indices[1][0::sub_key.k])
      if label.shape != prediction.shape:
        label = one_hot(label, prediction)
      label = select_indices(label, indices)
      prediction = select_indices(prediction, indices)
    elif sub_key.top_k is not None:
      # Set all non-top-k predictions to -inf. Note that we do not sort.
      indices = top_k_indices(sub_key.top_k, prediction)
      if aggregation_type is None:
        top_k_predictions = np.full(prediction.shape, float('-inf'))
        top_k_predictions[indices] = prediction[indices]
        prediction = top_k_predictions
      else:
        if label.shape != prediction.shape:
          label = one_hot(label, prediction)
        label = select_indices(label, indices)
        prediction = select_indices(prediction, indices)

  # For consistency, make sure all outputs are arrays (i.e. convert scalars)
  if label is not None and not label.shape:
    label = label.reshape((1,))
  if prediction is not None and not prediction.shape:
    prediction = prediction.reshape((1,))
  if example_weight is not None and not example_weight.shape:
    example_weight = example_weight.reshape((1,))

  if class_weights:
    if not flatten:
      raise ValueError(
          'class_weights can only be used when flatten is also used. This is '
          'likely caused by a configuration error (i.e. micro averaging being '
          "applied to metrics that don't support micro averaging): "
          'class_weights={}, flatten={}, StandardMetricInputs={}'.format(
              class_weights, flatten, inputs))
    example_weight = np.array([
        float(example_weight) * class_weights[i] if i in class_weights else 0.0
        for i in range(prediction.shape[-1] or label.shape[-1])
    ])
  elif flatten:
    example_weight = np.array([
        float(example_weight)
        for i in range(prediction.shape[-1] or label.shape[-1])
    ])

  if (not flatten or (label is None and prediction is None) or
      (label is not None and prediction is not None and label.size == 1 and
       prediction.size == 1)):
    if squeeze:
      yield _squeeze(label), _squeeze(prediction), _squeeze(example_weight)
    else:
      yield label, prediction, example_weight
  elif label is None:
    for p, w in (prediction.flatten(), example_weight.flatten()):
      yield label, np.array([p]), np.array([w])
  elif prediction is None:
    for l, w in (label.flatten(), example_weight.flatten()):
      yield np.array([l]), prediction, np.array([w])
  elif label.size == prediction.size:
    for l, p, w in zip(label.flatten(), prediction.flatten(),
                       example_weight.flatten()):
      yield np.array([l]), np.array([p]), np.array([w])
  elif label.shape[-1] == 1:
    label = one_hot(label, prediction)
    for l, p, w in zip(label.flatten(), prediction.flatten(),
                       example_weight.flatten()):
      yield np.array([l]), np.array([p]), np.array([w])
  else:
    raise ValueError(
        'unable to pair labels with predictions: labels={}, predictions={}, '
        'model_name={}, output_name={}, sub_key={}, aggregation_type={}, '
        'StandardMetricInputs={}\n\nThis is most likely a configuration '
        'error.'.format(label, prediction, model_name, output_name, sub_key,
                        aggregation_type, inputs))
Exemplo n.º 20
0
def to_label_prediction_example_weight(
    inputs: metric_types.StandardMetricInputs,
    eval_config: Optional[config.EvalConfig] = None,
    model_name: Text = '',
    output_name: Text = '',
    sub_key: Optional[metric_types.SubKey] = None,
    class_weights: Optional[Dict[int, float]] = None,
    flatten: bool = True,
    allow_none: bool = False,
) -> Iterable[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
    """Yields label, prediction, and example weights for use in calculations.

  Where applicable this function will perform model and output name lookups as
  well as any required class ID, top K, etc conversions. It will also apply
  prediction keys and label vocabularies given the necessary information is
  provided as part of the EvalConfig (or standard estimator based naming is
  used). The sparseness of labels will be inferred from the shapes of the labels
  and predictions (i.e. if the shapes are different then the labels will be
  assumed to be sparse).

  If successful, the final output of calling this function will be a tuple of
  numpy arrays representing the label, prediction, and example weight
  respectively. Labels and predictions will be returned in the same shape
  provided (default behavior) unless (1) flatten is True in which case a series
  of values (one per class ID) will be returned with last dimension of size 1 or
  (2) a sub_key is used in which case the last dimension will be re-shaped to
  match the number of outputs selected (1 or top_k). Examples:

    # default behavior
    #
    # Binary classification
    Input  : labels=[1] predictions=[0.6]
    Output : (np.array([1]), np.array([0.6]), np.array([1.0]))
    # Multi-class classification w/ sparse labels
    Input : labels=[2] predictions=[0.3, 0.6, 0.1]
    Output: (np.array([2]), np.array([0.3, 0.6, 0.1]), np.array([1.0]))
    # Multi-class / multi-label classification w/ dense labels
    Input  : labels=[0, 1, 1] predictions=[0.3, 0.6, 0.1]
    Output : (np.array([0, 1, 1]), np.array([0.3, 0.6, 0.1]), np.array([1.0]))

    # flatten=True
    #
    # Multi-class classification w/ sparse labels
    Input  : labels=[2], predictions=[0.3, 0.6, 0.1]
    Output : (np.array([0]), np.array([0.3]), np.array([1.0])),
             (np.array([0]), np.array([0.6]), np.array([1.0])),
             (np.array([1]), np.array([0.1]), np.array([1.0]))
    # Multi-class/multi-label classification w/ dense labels
    Input  : labels=[0, 0, 1], predictions=[0.3, 0.6, 0.1]
    Output : (np.array([0]), np.array([0.3]), np.array([1.0])),
             (np.array([0]), np.array([0.6]), np.array([1.0])),
             (np.array([1]), np.array([0.1]), np.array([1.0]))

    # sub_key.class_id=[2]
    #
    # Multi-class classification w/ sparse labels
    Input  : labels=[2] predictions=[0.3, 0.6, 0.1]
    Output : (np.array([1]), np.array([0.1]), np.array([1.0]))
    # Multi-class classification w/ dense labels
    Input  : labels=[0, 0, 1] predictions=[0.3, 0.6, 0.1]
    Output : (np.array([1]), np.array([0.1]), np.array([1.0]))

  Args:
    inputs: Standard metric inputs.
    eval_config: Eval config
    model_name: Optional model name (if multi-model evaluation).
    output_name: Optional output name (if multi-output model type).
    sub_key: Optional sub key.
    class_weights: Optional class weights to apply to multi-class / multi-label
      labels and predictions. If used, flatten must also be True.
    flatten: True to flatten the final label and prediction outputs so that the
      yielded values are always arrays of size 1. For example, multi-class /
      multi-label outputs would be converted into label and prediction pairs
      that could then be processed by a binary classification metric in order to
      compute a micro average over all classes.
    allow_none: True to allow labels or predictions with None values to be
      returned. The example weight will always be non-None.

  Yields:
    Tuple of (label, prediction, example_weight).
  """
    def optionally_get_by_keys(value: Any, keys: List[Text]) -> Any:
        if isinstance(value, dict):
            new_value = util.get_by_keys(value, keys, optional=True)
            if new_value is not None:
                return new_value
        return value

    label = inputs.label
    prediction = inputs.prediction
    example_weight = inputs.example_weight
    if example_weight is None:
        example_weight = np.array(1.0)
    if model_name:
        prediction = util.get_by_keys(prediction, [model_name])
        # Labels and weights can optionally be keyed by model name.
        label = optionally_get_by_keys(label, [model_name])
        example_weight = optionally_get_by_keys(example_weight, [model_name])
    if output_name:
        prediction = util.get_by_keys(prediction, [output_name])
        # Labels and example weights can optionally be keyed by output name.
        label = optionally_get_by_keys(label, [output_name])
        example_weight = optionally_get_by_keys(example_weight, [output_name])
    prediction_key = ''
    if eval_config and eval_config.model_specs:
        for spec in eval_config.model_specs:
            if spec.name == model_name:
                prediction_key = spec.prediction_key
                break
    label, prediction = prepare_labels_and_predictions(label, prediction,
                                                       prediction_key)

    if not allow_none:
        for txt, value in zip(('label', 'prediction'), (label, prediction)):
            if value is None:
                raise ValueError(
                    'no value provided for {}: model_name={}, output_name={}, '
                    'sub_key={}, StandardMetricInputs={}\n\n'
                    'This may be caused by a configuration error (i.e. label, '
                    'and/or prediction keys were not specified) or an '
                    'error in the pipeline.'.format(txt, model_name,
                                                    output_name, sub_key,
                                                    inputs))

    example_weight = to_numpy(example_weight)

    if example_weight.size != 1:
        raise ValueError(
            'expected example weight to be size = 1, but instead it has '
            'size = {}: example_weight={}, model_name={}, output_name={}, '
            'sub_key={}, StandardMetricInputs={}\n\nThis is most likely a '
            'configuration error.'.format(example_weight.size, example_weight,
                                          model_name, output_name, sub_key,
                                          inputs))

    if sub_key is not None:
        if sub_key.class_id is not None:
            label, prediction = select_class_id(sub_key.class_id, label,
                                                prediction)
        elif sub_key.k is not None:
            label, prediction = select_top_k(sub_key.k, label, prediction)
            label = np.array([label[sub_key.k - 1]])
            prediction = np.array([prediction[sub_key.k - 1]])
        elif sub_key.top_k is not None:
            label, prediction = select_top_k(sub_key.top_k, label, prediction)

    # For consistency, make sure all outputs are arrays (i.e. convert scalars)
    if label is not None and not label.shape:
        label = label.reshape((1, ))
    if prediction is not None and not prediction.shape:
        prediction = prediction.reshape((1, ))
    if example_weight is not None and not example_weight.shape:
        example_weight = example_weight.reshape((1, ))

    if class_weights:
        if not flatten:
            raise ValueError(
                'class_weights can only be used when flatten is also used. This is '
                'likely caused by a configuration error (i.e. micro averaging being '
                "applied to metrics that don't support micro averaging): "
                'class_weights={}, flatten={}, StandardMetricInputs={}'.format(
                    class_weights, flatten, inputs))
        example_weight = np.array([
            float(example_weight) *
            class_weights[i] if i in class_weights else 1.0
            for i in range(prediction.shape[-1] or label.shape[-1])
        ])
    elif flatten:
        example_weight = np.array([
            float(example_weight)
            for i in range(prediction.shape[-1] or label.shape[-1])
        ])

    if (not flatten or (label is None and prediction is None)
            or (label is not None and prediction is not None
                and label.size == 1 and prediction.size == 1)):
        yield label, prediction, example_weight
    elif label is None:
        for p, w in (prediction.flatten(), example_weight.flatten()):
            yield label, np.array([p]), np.array([w])
    elif prediction is None:
        for l, w in (label.flatten(), example_weight.flatten()):
            yield np.array([l]), prediction, np.array([w])
    elif label.size == prediction.size:
        for l, p, w in zip(label.flatten(), prediction.flatten(),
                           example_weight.flatten()):
            yield np.array([l]), np.array([p]), np.array([w])
    elif label.shape[-1] == 1:
        label = one_hot(label, prediction)
        for l, p, w in zip(label.flatten(), prediction.flatten(),
                           example_weight.flatten()):
            yield np.array([l]), np.array([p]), np.array([w])
    else:
        raise ValueError(
            'unable to pair labels with predictions: labels={}, predictions={}, '
            'model_name={}, output_name={}, sub_key={}, StandardMetricInputs={} '
            '\n\nThis is most likely a configuration error.'.format(
                label, prediction, model_name, output_name, sub_key, inputs))