def testGetByKeysWithPrefix(self): self.assertEqual({ 'all_classes': ['a', 'b'], 'probabilities': [1] }, util.get_by_keys( { 'predictions': { 'output/all_classes': ['a', 'b'], 'output/probabilities': [1], }, }, ['predictions', 'output'])) self.assertEqual({ 'all_classes': ['a', 'b'], 'probabilities': [1] }, util.get_by_keys( { 'predictions': { 'model': { 'output/all_classes': ['a', 'b'], 'output/probabilities': [1], }, }, }, ['predictions', 'model', 'output']))
def testGetByKeysWitMultiLevel(self): self.assertEqual([1], util.get_by_keys({'predictions': { 'output': [1] }}, ['predictions', 'output'])) self.assertEqual([1], util.get_by_keys( {'predictions': { 'model': { 'output': [1], }, }}, ['predictions', 'model', 'output']))
def _to_gains_example_weight( self, element: metric_types.StandardMetricInputs) -> Tuple[np.ndarray, float]: """Returns gains and example_weight sorted by prediction.""" _, predictions, example_weight = next( metric_util.to_label_prediction_example_weight( element, eval_config=self._eval_config, model_name=self._model_name, output_name=self._output_name, flatten=False)) # pytype: disable=wrong-arg-types gains = util.get_by_keys(element.features, [self._gain_key]) if gains.size != predictions.size: raise ValueError('expected {} to be same size as predictions {} != {}: ' 'gains={}, metric_keys={}, ' 'StandardMetricInputs={}'.format(self._gain_key, gains.size, predictions.size, gains, self._metric_keys, element)) gains = gains.reshape(predictions.shape) # Ignore non-positive gains. if gains.max() <= 0: example_weight = 0.0 return (gains[np.argsort(predictions)[::-1]], float(example_weight))
def add_input( self, accumulator: _MinLabelPositionAccumulator, element: metric_types.StandardMetricInputs ) -> _MinLabelPositionAccumulator: labels, predictions, example_weight = next( metric_util.to_label_prediction_example_weight( element, eval_config=self._eval_config, model_name=self._key.model_name, output_name=self._key.output_name, flatten=False, allow_none=True)) # pytype: disable=wrong-arg-types if self._label_key: labels = util.get_by_keys(element.features, [self._label_key]) if labels is not None: min_label_pos = None for i, l in enumerate(labels[np.argsort(predictions)[::-1]]): if np.sum(l) > 0: min_label_pos = i + 1 # Use 1-indexed positions break if min_label_pos: accumulator.total_min_position += min_label_pos * float( example_weight) accumulator.total_weighted_examples += float(example_weight) return accumulator
def add_input(self, accumulator: float, element: metric_types.StandardMetricInputs) -> float: example_weight = element.example_weight or np.array(1.0) if isinstance(example_weight, dict) and self._key.model_name: value = util.get_by_keys( example_weight, [self._key.model_name], optional=True) if value is not None: example_weight = value if isinstance(example_weight, dict) and self._key.output_name: example_weight = util.get_by_keys(example_weight, [self._key.output_name], np.array(1.0)) if isinstance(example_weight, dict): raise ValueError( 'weighted_example_count cannot be calculated on a dict: {} = {}.\n\n' 'This is most likely a configuration error (for multi-output models' 'a separate metric is needed for each output).'.format( self._key, example_weight)) return accumulator + np.sum(example_weight)
def _query( self, element: metric_types.StandardMetricInputs ) -> Union[float, int, Text]: query = util.get_by_keys(element.features, [self._query_key]).flatten() if query.size == 0 or not np.all(query == query[0]): raise ValueError( 'missing query value or not all values are the same: value={}, ' 'metric_keys={}, StandardMetricInputs={}'.format( query, self._metric_keys, element)) return query[0]
def key_by_query_key(extracts: types.Extracts, query_key: Text) -> Tuple[Text, types.Extracts]: """Extract the query key from the extract and key by that.""" value = metric_util.to_scalar(util.get_by_keys( extracts, [constants.FEATURES_KEY, query_key], optional=True), tensor_name=query_key) if value is None: missing_query_key_counter.inc() return ('', extracts) return ('{}'.format(value), extracts)
def _gain(self, i: metric_types.StandardMetricInputs) -> float: gain = util.get_by_keys(i.features, [self._gain_key]) if gain.size == 1: scalar = metric_util.to_scalar(gain) if scalar is not None: return scalar raise ValueError('expected {} to be scalar, but instead it has size = {}: ' 'value={}, metric_keys={}, ' 'StandardMetricInputs={}'.format(self._gain_key, gain.size, gain, self._metric_keys, i))
def add_input(self, accumulator: Dict[Text, List[float]], attributions: Dict[Text, Any]) -> Dict[Text, List[float]]: if self._key.model_name: attributions = util.get_by_keys(attributions, [self._key.model_name]) if self._key.output_name: attributions = util.get_by_keys(attributions, [self._key.output_name]) for k, v in attributions.items(): v = metric_util.to_numpy(v) if self._key.sub_key is not None: if self._key.sub_key.class_id is not None: v = _scores_by_class_id(self._key.sub_key.class_id, v) elif self._key.sub_key.k is not None: v = _scores_by_top_k(self._key.sub_key.k, v) v = np.array(v[self._key.sub_key.k - 1]) elif self._key.sub_key.top_k is not None: v = _scores_by_top_k(self._key.sub_key.top_k, v) if k not in accumulator: accumulator[k] = [0.0] * v.size self._sum(accumulator[k], v) return accumulator
def optionally_get_by_keys(value: Any, keys: List[Text]) -> Any: if isinstance(value, dict): new_value = util.get_by_keys(value, keys, optional=True) if new_value is not None: return new_value return value
def to_label_prediction_example_weight( inputs: metric_types.StandardMetricInputs, eval_config: Optional[config.EvalConfig] = None, model_name: Text = '', output_name: Text = '', sub_key: Optional[metric_types.SubKey] = None, allow_none: bool = False, array_size: Optional[int] = None, ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """Returns label, prediction, and example weight for use in calculations. Where applicable this function will perform model and output name lookups as well as any required class ID, top K, etc conversions. It will also apply prediction keys and label vocabularies given the necessary information is provided as part of the EvalConfig (or standard estimator based naming is used). If successful, the final output of calling this function will be a tuple of numpy arrays representing the label, prediction, and example weight respectively. Args: inputs: Standard metric inputs. eval_config: Eval config model_name: Optional model name (if multi-model evaluation). output_name: Optional output name (if multi-output model type). sub_key: Optional sub key. allow_none: True to allow labels or predictions with None values to be returned. The example weight will always be non-None. array_size: Verifies the prediction and labels are of the given size. If both array_size and sub_key.top_k is set, then array_size will be ignored and the size will be verified based on the top_k setting. The example weight will always be size 1. """ def optionally_get_by_keys(value: Any, keys: List[Text]) -> Any: if isinstance(value, dict): new_value = util.get_by_keys(value, keys, optional=True) if new_value is not None: return new_value return value label = inputs.label prediction = inputs.prediction example_weight = inputs.example_weight if example_weight is None: example_weight = np.array(1.0) if model_name: prediction = util.get_by_keys(prediction, [model_name]) # Labels and weights can optionally be keyed by model name. label = optionally_get_by_keys(label, [model_name]) example_weight = optionally_get_by_keys(example_weight, [model_name]) if output_name: prediction = util.get_by_keys(prediction, [output_name]) # Labels and example weights can optionally be keyed by output name. label = optionally_get_by_keys(label, [output_name]) example_weight = optionally_get_by_keys(example_weight, [output_name]) prediction_key = '' if eval_config and eval_config.model_specs: for spec in eval_config.model_specs: if spec.name == model_name: prediction_key = spec.prediction_key break label, prediction = prepare_labels_and_predictions(label, prediction, prediction_key) if sub_key is not None: if sub_key.class_id is not None: label, prediction = select_class_id(sub_key.class_id, label, prediction) elif sub_key.k is not None: label, prediction = select_top_k(sub_key.k, label, prediction) label = np.array([label[sub_key.k - 1]]) prediction = np.array([prediction[sub_key.k - 1]]) elif sub_key.top_k is not None: label, prediction = select_top_k(sub_key.top_k, label, prediction) example_weight = to_numpy(example_weight) if not allow_none: for txt, value in zip(('label', 'prediction', 'example_weight'), (label, prediction, example_weight)): if value is None: raise ValueError( 'no value provided for {}: model_name={}, output_name={}, ' 'sub_key={}, StandardMetricInputs={}\n\n' 'This may be caused by a configuration error (i.e. label, ' 'prediction, and/or example weight keys were not specified) or an ' 'error in the pipeline.'.format(txt, model_name, output_name, sub_key, inputs)) for txt, value in zip(('label', 'prediction', 'example_weight'), (label, prediction, example_weight)): if value is None: continue if txt == 'example_weight': size = 1 elif array_size is None: continue elif sub_key and sub_key.top_k is not None: size = sub_key.top_k else: size = array_size if value.size != size: raise ValueError( 'expected {} to be size = {}, but instead it has size = {}: ' '{}={}, model_name={}, output_name={}, sub_key={}, ' 'StandardMetricInputs={}\n\nThis is most likely a configuration ' 'error (for multi-class models using binary classification ' 'metrics, a sub_key must be set).'.format( txt, size, value.size, txt, value, model_name, output_name, sub_key, inputs)) # For consistency, make sure all outputs are arrays (i.e. convert scalars) if label is not None and not label.shape: label = label.reshape((1, )) if prediction is not None and not prediction.shape: prediction = prediction.reshape((1, )) if example_weight is not None and not example_weight.shape: example_weight = example_weight.reshape((1, )) return label, prediction, example_weight
def testGetByKeysMissingAndNonOptional(self): with self.assertRaisesRegexp(ValueError, 'not found'): util.get_by_keys({}, ['labels']) with self.assertRaisesRegexp(ValueError, 'not found'): util.get_by_keys({'labels': {}}, ['labels'])
def testGetByKeysMissingAndOptional(self): self.assertIsNone(util.get_by_keys({}, ['labels'], optional=True)) self.assertIsNone( util.get_by_keys({'labels': {}}, ['labels'], optional=True))
def testGetByKeysMissingAndDefault(self): self.assertEqual('a', util.get_by_keys({}, ['labels'], default_value='a')) self.assertEqual( 'a', util.get_by_keys({'labels': {}}, ['labels'], default_value='a'))
def testGetByKeys(self): self.assertEqual([1], util.get_by_keys({'labels': [1]}, ['labels']))
def testGetByKeysMissingSecondaryKey(self): with self.assertRaisesRegexp(ValueError, 'not found'): util.get_by_keys({'predictions': { 'missing': [1] }}, ['predictions', 'output'])
def _query( self, i: metric_types.StandardMetricInputs ) -> Optional[Union[float, int, Text]]: return metric_util.to_scalar( util.get_by_keys(i.features, [self._query_key]))
def to_label_prediction_example_weight( inputs: metric_types.StandardMetricInputs, eval_config: Optional[config.EvalConfig] = None, model_name: Text = '', output_name: Text = '', sub_key: Optional[metric_types.SubKey] = None, class_weights: Optional[Dict[int, float]] = None, flatten: bool = True, allow_none: bool = False, ) -> Iterable[Tuple[np.ndarray, np.ndarray, np.ndarray]]: """Yields label, prediction, and example weights for use in calculations. Where applicable this function will perform model and output name lookups as well as any required class ID, top K, etc conversions. It will also apply prediction keys and label vocabularies given the necessary information is provided as part of the EvalConfig (or standard estimator based naming is used). If successful, the final output of calling this function will be a tuple of numpy arrays representing the label, prediction, and example weight respectively. Args: inputs: Standard metric inputs. eval_config: Eval config model_name: Optional model name (if multi-model evaluation). output_name: Optional output name (if multi-output model type). sub_key: Optional sub key. class_weights: Optional class weights to apply to multi-class / multi-label labels and predictions. flatten: True to flatten the final label and prediction outputs so that the yielded values are always arrays of size 1. For example, multi-class / multi-label outputs would be converted into label and prediction pairs that could then be processed by a binary classification metric in order to compute a micro average over all classes. allow_none: True to allow labels or predictions with None values to be returned. The example weight will always be non-None. """ def optionally_get_by_keys(value: Any, keys: List[Text]) -> Any: if isinstance(value, dict): new_value = util.get_by_keys(value, keys, optional=True) if new_value is not None: return new_value return value label = inputs.label prediction = inputs.prediction example_weight = inputs.example_weight if example_weight is None: example_weight = np.array(1.0) if model_name: prediction = util.get_by_keys(prediction, [model_name]) # Labels and weights can optionally be keyed by model name. label = optionally_get_by_keys(label, [model_name]) example_weight = optionally_get_by_keys(example_weight, [model_name]) if output_name: prediction = util.get_by_keys(prediction, [output_name]) # Labels and example weights can optionally be keyed by output name. label = optionally_get_by_keys(label, [output_name]) example_weight = optionally_get_by_keys(example_weight, [output_name]) prediction_key = '' if eval_config and eval_config.model_specs: for spec in eval_config.model_specs: if spec.name == model_name: prediction_key = spec.prediction_key break label, prediction = prepare_labels_and_predictions(label, prediction, prediction_key) if sub_key is not None: if sub_key.class_id is not None: label, prediction = select_class_id(sub_key.class_id, label, prediction) elif sub_key.k is not None: label, prediction = select_top_k(sub_key.k, label, prediction) label = np.array([label[sub_key.k - 1]]) prediction = np.array([prediction[sub_key.k - 1]]) elif sub_key.top_k is not None: label, prediction = select_top_k(sub_key.top_k, label, prediction) example_weight = to_numpy(example_weight) if not allow_none: for txt, value in zip(('label', 'prediction', 'example_weight'), (label, prediction, example_weight)): if value is None: raise ValueError( 'no value provided for {}: model_name={}, output_name={}, ' 'sub_key={}, StandardMetricInputs={}\n\n' 'This may be caused by a configuration error (i.e. label, ' 'prediction, and/or example weight keys were not specified) or an ' 'error in the pipeline.'.format(txt, model_name, output_name, sub_key, inputs)) if example_weight is not None and example_weight.size != 1: raise ValueError( 'expected example weight to be size = 1, but instead it has ' 'size = {}: example_weight={}, model_name={}, output_name={}, ' 'sub_key={}, StandardMetricInputs={}\n\nThis is most likely a ' 'configuration error.'.format(example_weight.size, example_weight, model_name, output_name, sub_key, inputs)) # For consistency, make sure all outputs are arrays (i.e. convert scalars) if label is not None and not label.shape: label = label.reshape((1, )) if prediction is not None and not prediction.shape: prediction = prediction.reshape((1, )) if example_weight is not None and not example_weight.shape: example_weight = example_weight.reshape((1, )) if class_weights and prediction is not None: multiplier = [ class_weights[i] if i in class_weights else 1.0 for i in range(prediction.shape[-1]) ] prediction = np.multiply(prediction, multiplier) if label is not None: if label.shape[-1] == 1: label = one_hot(label, prediction) label = np.multiply(label, multiplier) if (not flatten or (label is None and prediction is None) or (label is not None and prediction is not None and label.size == 1 and prediction.size == 1)): yield label, prediction, example_weight elif label is None: for p in prediction.flatten(): yield label, np.array([p]), example_weight elif prediction is None: for l in label.flatten(): yield np.array([l]), prediction, example_weight elif label.size == prediction.size: for l, p in zip(label.flatten(), prediction.flatten()): yield np.array([l]), np.array([p]), example_weight elif label.shape[-1] == 1: label = one_hot(label, prediction) for l, p in zip(label.flatten(), prediction.flatten()): yield np.array([l]), np.array([p]), example_weight else: raise ValueError( 'unable to pair labels with predictions: labels={}, predictions={}, ' 'model_name={}, output_name={}, sub_key={}, StandardMetricInputs={} ' '\n\nThis is most likely a configuration error.'.format( label, prediction, model_name, output_name, sub_key, inputs))
def to_label_prediction_example_weight( inputs: metric_types.StandardMetricInputs, eval_config: Optional[config.EvalConfig] = None, model_name: Text = '', output_name: Text = '', sub_key: Optional[metric_types.SubKey] = None, aggregation_type: Optional[metric_types.AggregationType] = None, class_weights: Optional[Dict[int, float]] = None, flatten: bool = True, squeeze: bool = True, allow_none: bool = False, ) -> Iterator[Tuple[np.ndarray, np.ndarray, np.ndarray]]: """Yields label, prediction, and example weights for use in calculations. Where applicable this function will perform model and output name lookups as well as any required class ID, top K, etc conversions. It will also apply prediction keys and label vocabularies given the necessary information is provided as part of the EvalConfig (or standard estimator based naming is used). The sparseness of labels will be inferred from the shapes of the labels and predictions (i.e. if the shapes are different then the labels will be assumed to be sparse). If successful, the final output of calling this function will be a tuple of numpy arrays representing the label, prediction, and example weight respectively. Labels and predictions will be returned in the same shape provided (default behavior) unless (1) flatten is True in which case a series of values (one per class ID) will be returned with last dimension of size 1 or (2) a sub_key is used in which case the last dimension may be re-shaped to match the new number of outputs (1 for class_id or k, top_k for top k with aggregation). Note that for top_k without aggregation, the non-top_k prediction values will be set to float('-inf'), but for top_k with aggregation the values will be truncated to only return the top k values. Examples: # default behavior # # Binary classification Input : labels=[1] predictions=[0.6] Output : (np.array([1]), np.array([0.6]), np.array([1.0])) # Multi-class classification w/ sparse labels Input : labels=[2] predictions=[0.3, 0.6, 0.1] Output: (np.array([2]), np.array([0.3, 0.6, 0.1]), np.array([1.0])) # Multi-class / multi-label classification w/ dense labels Input : labels=[0, 1, 1] predictions=[0.3, 0.6, 0.1] Output : (np.array([0, 1, 1]), np.array([0.3, 0.6, 0.1]), np.array([1.0])) # flatten=True # # Multi-class classification w/ sparse labels Input : labels=[2], predictions=[0.3, 0.6, 0.1] Output : (np.array([0]), np.array([0.3]), np.array([1.0])), (np.array([0]), np.array([0.6]), np.array([1.0])), (np.array([1]), np.array([0.1]), np.array([1.0])) # Multi-class/multi-label classification w/ dense labels Input : labels=[0, 0, 1], predictions=[0.3, 0.6, 0.1] Output : (np.array([0]), np.array([0.3]), np.array([1.0])), (np.array([0]), np.array([0.6]), np.array([1.0])), (np.array([1]), np.array([0.1]), np.array([1.0])) # sub_key.class_id=[2] # # Multi-class classification w/ sparse labels Input : labels=[2] predictions=[0.3, 0.6, 0.1] Output : (np.array([1]), np.array([0.1]), np.array([1.0])) # Multi-class classification w/ dense labels Input : labels=[0, 0, 1] predictions=[0.3, 0.6, 0.1] Output : (np.array([1]), np.array([0.1]), np.array([1.0])) # sub_key.top_k=2 and aggregation_type is None (i.e. binarization of top 2). # # Multi-class classification w/ sparse labels Input : labels=[2] predictions=[0.3, 0.6, 0.1] Output : (np.array([0, 0, 1]), np.array([0.3, 0.6, -inf]), np.array([1.0])) # Multi-class classification w/ dense labels Input : labels=[0, 0, 1] predictions=[0.3, 0.1, 0.6] Output : (np.array([0, 0, 1]), np.array([0.3, -inf, 0.6]), np.array([1.0])) # sub_key.top_k=2 and aggregation_type is not None (i.e. aggregate top 2). # # Multi-class classification w/ sparse labels Input : labels=[2] predictions=[0.3, 0.6, 0.1] Output : (np.array([0, 1]), np.array([0.3, 0.6]), np.array([1.0])) # Multi-class classification w/ dense labels Input : labels=[0, 0, 1] predictions=[0.3, 0.1, 0.6] Output : (np.array([0, 0]), np.array([0.3, 0.6]), np.array([1.0])) # sub_key.k=2 (i.e. binarization by choosing 2nd largest predicted value). # # Multi-class classification w/ sparse labels Input : labels=[0] predictions=[0.3, 0.6, 0.1] Output : (np.array([1]), np.array([0.3]), np.array([1.0])) # Multi-class classification w/ dense labels Input : labels=[0] predictions=[0.3] Output : (np.array([0]), np.array([0.3]), np.array([1.0])) Args: inputs: Standard metric inputs. eval_config: Eval config model_name: Optional model name (if multi-model evaluation). output_name: Optional output name (if multi-output model type). sub_key: Optional sub key. aggregation_type: Optional aggregation type. class_weights: Optional class weights to apply to multi-class / multi-label labels and predictions. If used, flatten must also be True. flatten: True to flatten the final label and prediction outputs so that the yielded values are always arrays of size 1. For example, multi-class / multi-label outputs would be converted into label and prediction pairs that could then be processed by a binary classification metric in order to compute a micro average over all classes. squeeze: True to squeeze any outputs that have rank > 1. This transforms outputs such as np.array([[1]]) to np.array([1]). allow_none: True to allow labels or predictions with None values to be returned. The example weight will always be non-None. Yields: Tuple of (label, prediction, example_weight). """ def optionally_get_by_keys(value: Any, keys: List[Text]) -> Any: if isinstance(value, dict): new_value = util.get_by_keys(value, keys, optional=True) if new_value is not None: return new_value return value prediction_key = '' label_key = '' if eval_config and eval_config.model_specs: for spec in eval_config.model_specs: # To maintain consistency between settings where single models are used, # always use '' as the model name regardless of whether a name is passed. spec_name = spec.name if len(eval_config.model_specs) > 1 else '' if spec_name == model_name: prediction_key = spec.prediction_key label_key = spec.label_key break label = inputs.label if label_key: # This is to support a custom EvalSavedModel where the labels are a dict # but the keys are not output_names. label = optionally_get_by_keys(label, [label_key]) prediction = inputs.prediction example_weight = inputs.example_weight if example_weight is None: example_weight = np.array(1.0, dtype=np.float32) # tf-ranking needs float32 if model_name: prediction = util.get_by_keys(prediction, [model_name]) # Labels and weights can optionally be keyed by model name. label = optionally_get_by_keys(label, [model_name]) example_weight = optionally_get_by_keys(example_weight, [model_name]) if output_name: prediction = util.get_by_keys(prediction, [output_name]) # Labels and example weights can optionally be keyed by output name. label = optionally_get_by_keys(label, [output_name]) example_weight = optionally_get_by_keys(example_weight, [output_name]) label, prediction = prepare_labels_and_predictions(label, prediction, prediction_key) if not allow_none: for txt, value in zip(('label', 'prediction'), (label, prediction)): if value is None: raise ValueError( 'no value provided for {}: model_name={}, output_name={}, ' 'sub_key={}, aggregation_type={}, StandardMetricInputs={}\n\n' 'This may be caused by a configuration error (i.e. label, ' 'and/or prediction keys were not specified) or an ' 'error in the pipeline.'.format(txt, model_name, output_name, sub_key, aggregation_type, inputs)) example_weight = to_numpy(example_weight) # Query based metrics group by a query_id which will result in the # example_weight being replicated once for each matching example in the group. # When this happens convert the example_weight back to a single value. if example_weight.size > 1: example_weight = example_weight.flatten() if not np.all(example_weight == example_weight[0]): raise ValueError( 'if example_weight size > 0, the values must all be the same: ' 'example_weight={} model_name={}, output_name={}, ' 'sub_key={}, aggregation_type={}, StandardMetricInputs={}' '\n\nThis is most likely a configuration error.'.format( example_weight, model_name, output_name, sub_key, aggregation_type, inputs)) example_weight = np.array(example_weight[0]) if sub_key is not None: if sub_key.class_id is not None: label, prediction = select_class_id(sub_key.class_id, label, prediction) elif sub_key.k is not None: indices = top_k_indices(sub_key.k, prediction) if len(prediction.shape) == 1: indices = indices[0] # 1D else: # 2D, take kth values indices = (indices[0][0::sub_key.k], indices[1][0::sub_key.k]) if label.shape != prediction.shape: label = one_hot(label, prediction) label = select_indices(label, indices) prediction = select_indices(prediction, indices) elif sub_key.top_k is not None: # Set all non-top-k predictions to -inf. Note that we do not sort. indices = top_k_indices(sub_key.top_k, prediction) if aggregation_type is None: top_k_predictions = np.full(prediction.shape, float('-inf')) top_k_predictions[indices] = prediction[indices] prediction = top_k_predictions else: if label.shape != prediction.shape: label = one_hot(label, prediction) label = select_indices(label, indices) prediction = select_indices(prediction, indices) # For consistency, make sure all outputs are arrays (i.e. convert scalars) if label is not None and not label.shape: label = label.reshape((1,)) if prediction is not None and not prediction.shape: prediction = prediction.reshape((1,)) if example_weight is not None and not example_weight.shape: example_weight = example_weight.reshape((1,)) if class_weights: if not flatten: raise ValueError( 'class_weights can only be used when flatten is also used. This is ' 'likely caused by a configuration error (i.e. micro averaging being ' "applied to metrics that don't support micro averaging): " 'class_weights={}, flatten={}, StandardMetricInputs={}'.format( class_weights, flatten, inputs)) example_weight = np.array([ float(example_weight) * class_weights[i] if i in class_weights else 0.0 for i in range(prediction.shape[-1] or label.shape[-1]) ]) elif flatten: example_weight = np.array([ float(example_weight) for i in range(prediction.shape[-1] or label.shape[-1]) ]) if (not flatten or (label is None and prediction is None) or (label is not None and prediction is not None and label.size == 1 and prediction.size == 1)): if squeeze: yield _squeeze(label), _squeeze(prediction), _squeeze(example_weight) else: yield label, prediction, example_weight elif label is None: for p, w in (prediction.flatten(), example_weight.flatten()): yield label, np.array([p]), np.array([w]) elif prediction is None: for l, w in (label.flatten(), example_weight.flatten()): yield np.array([l]), prediction, np.array([w]) elif label.size == prediction.size: for l, p, w in zip(label.flatten(), prediction.flatten(), example_weight.flatten()): yield np.array([l]), np.array([p]), np.array([w]) elif label.shape[-1] == 1: label = one_hot(label, prediction) for l, p, w in zip(label.flatten(), prediction.flatten(), example_weight.flatten()): yield np.array([l]), np.array([p]), np.array([w]) else: raise ValueError( 'unable to pair labels with predictions: labels={}, predictions={}, ' 'model_name={}, output_name={}, sub_key={}, aggregation_type={}, ' 'StandardMetricInputs={}\n\nThis is most likely a configuration ' 'error.'.format(label, prediction, model_name, output_name, sub_key, aggregation_type, inputs))
def to_label_prediction_example_weight( inputs: metric_types.StandardMetricInputs, eval_config: Optional[config.EvalConfig] = None, model_name: Text = '', output_name: Text = '', sub_key: Optional[metric_types.SubKey] = None, class_weights: Optional[Dict[int, float]] = None, flatten: bool = True, allow_none: bool = False, ) -> Iterable[Tuple[np.ndarray, np.ndarray, np.ndarray]]: """Yields label, prediction, and example weights for use in calculations. Where applicable this function will perform model and output name lookups as well as any required class ID, top K, etc conversions. It will also apply prediction keys and label vocabularies given the necessary information is provided as part of the EvalConfig (or standard estimator based naming is used). The sparseness of labels will be inferred from the shapes of the labels and predictions (i.e. if the shapes are different then the labels will be assumed to be sparse). If successful, the final output of calling this function will be a tuple of numpy arrays representing the label, prediction, and example weight respectively. Labels and predictions will be returned in the same shape provided (default behavior) unless (1) flatten is True in which case a series of values (one per class ID) will be returned with last dimension of size 1 or (2) a sub_key is used in which case the last dimension will be re-shaped to match the number of outputs selected (1 or top_k). Examples: # default behavior # # Binary classification Input : labels=[1] predictions=[0.6] Output : (np.array([1]), np.array([0.6]), np.array([1.0])) # Multi-class classification w/ sparse labels Input : labels=[2] predictions=[0.3, 0.6, 0.1] Output: (np.array([2]), np.array([0.3, 0.6, 0.1]), np.array([1.0])) # Multi-class / multi-label classification w/ dense labels Input : labels=[0, 1, 1] predictions=[0.3, 0.6, 0.1] Output : (np.array([0, 1, 1]), np.array([0.3, 0.6, 0.1]), np.array([1.0])) # flatten=True # # Multi-class classification w/ sparse labels Input : labels=[2], predictions=[0.3, 0.6, 0.1] Output : (np.array([0]), np.array([0.3]), np.array([1.0])), (np.array([0]), np.array([0.6]), np.array([1.0])), (np.array([1]), np.array([0.1]), np.array([1.0])) # Multi-class/multi-label classification w/ dense labels Input : labels=[0, 0, 1], predictions=[0.3, 0.6, 0.1] Output : (np.array([0]), np.array([0.3]), np.array([1.0])), (np.array([0]), np.array([0.6]), np.array([1.0])), (np.array([1]), np.array([0.1]), np.array([1.0])) # sub_key.class_id=[2] # # Multi-class classification w/ sparse labels Input : labels=[2] predictions=[0.3, 0.6, 0.1] Output : (np.array([1]), np.array([0.1]), np.array([1.0])) # Multi-class classification w/ dense labels Input : labels=[0, 0, 1] predictions=[0.3, 0.6, 0.1] Output : (np.array([1]), np.array([0.1]), np.array([1.0])) Args: inputs: Standard metric inputs. eval_config: Eval config model_name: Optional model name (if multi-model evaluation). output_name: Optional output name (if multi-output model type). sub_key: Optional sub key. class_weights: Optional class weights to apply to multi-class / multi-label labels and predictions. If used, flatten must also be True. flatten: True to flatten the final label and prediction outputs so that the yielded values are always arrays of size 1. For example, multi-class / multi-label outputs would be converted into label and prediction pairs that could then be processed by a binary classification metric in order to compute a micro average over all classes. allow_none: True to allow labels or predictions with None values to be returned. The example weight will always be non-None. Yields: Tuple of (label, prediction, example_weight). """ def optionally_get_by_keys(value: Any, keys: List[Text]) -> Any: if isinstance(value, dict): new_value = util.get_by_keys(value, keys, optional=True) if new_value is not None: return new_value return value label = inputs.label prediction = inputs.prediction example_weight = inputs.example_weight if example_weight is None: example_weight = np.array(1.0) if model_name: prediction = util.get_by_keys(prediction, [model_name]) # Labels and weights can optionally be keyed by model name. label = optionally_get_by_keys(label, [model_name]) example_weight = optionally_get_by_keys(example_weight, [model_name]) if output_name: prediction = util.get_by_keys(prediction, [output_name]) # Labels and example weights can optionally be keyed by output name. label = optionally_get_by_keys(label, [output_name]) example_weight = optionally_get_by_keys(example_weight, [output_name]) prediction_key = '' if eval_config and eval_config.model_specs: for spec in eval_config.model_specs: if spec.name == model_name: prediction_key = spec.prediction_key break label, prediction = prepare_labels_and_predictions(label, prediction, prediction_key) if not allow_none: for txt, value in zip(('label', 'prediction'), (label, prediction)): if value is None: raise ValueError( 'no value provided for {}: model_name={}, output_name={}, ' 'sub_key={}, StandardMetricInputs={}\n\n' 'This may be caused by a configuration error (i.e. label, ' 'and/or prediction keys were not specified) or an ' 'error in the pipeline.'.format(txt, model_name, output_name, sub_key, inputs)) example_weight = to_numpy(example_weight) if example_weight.size != 1: raise ValueError( 'expected example weight to be size = 1, but instead it has ' 'size = {}: example_weight={}, model_name={}, output_name={}, ' 'sub_key={}, StandardMetricInputs={}\n\nThis is most likely a ' 'configuration error.'.format(example_weight.size, example_weight, model_name, output_name, sub_key, inputs)) if sub_key is not None: if sub_key.class_id is not None: label, prediction = select_class_id(sub_key.class_id, label, prediction) elif sub_key.k is not None: label, prediction = select_top_k(sub_key.k, label, prediction) label = np.array([label[sub_key.k - 1]]) prediction = np.array([prediction[sub_key.k - 1]]) elif sub_key.top_k is not None: label, prediction = select_top_k(sub_key.top_k, label, prediction) # For consistency, make sure all outputs are arrays (i.e. convert scalars) if label is not None and not label.shape: label = label.reshape((1, )) if prediction is not None and not prediction.shape: prediction = prediction.reshape((1, )) if example_weight is not None and not example_weight.shape: example_weight = example_weight.reshape((1, )) if class_weights: if not flatten: raise ValueError( 'class_weights can only be used when flatten is also used. This is ' 'likely caused by a configuration error (i.e. micro averaging being ' "applied to metrics that don't support micro averaging): " 'class_weights={}, flatten={}, StandardMetricInputs={}'.format( class_weights, flatten, inputs)) example_weight = np.array([ float(example_weight) * class_weights[i] if i in class_weights else 1.0 for i in range(prediction.shape[-1] or label.shape[-1]) ]) elif flatten: example_weight = np.array([ float(example_weight) for i in range(prediction.shape[-1] or label.shape[-1]) ]) if (not flatten or (label is None and prediction is None) or (label is not None and prediction is not None and label.size == 1 and prediction.size == 1)): yield label, prediction, example_weight elif label is None: for p, w in (prediction.flatten(), example_weight.flatten()): yield label, np.array([p]), np.array([w]) elif prediction is None: for l, w in (label.flatten(), example_weight.flatten()): yield np.array([l]), prediction, np.array([w]) elif label.size == prediction.size: for l, p, w in zip(label.flatten(), prediction.flatten(), example_weight.flatten()): yield np.array([l]), np.array([p]), np.array([w]) elif label.shape[-1] == 1: label = one_hot(label, prediction) for l, p, w in zip(label.flatten(), prediction.flatten(), example_weight.flatten()): yield np.array([l]), np.array([p]), np.array([w]) else: raise ValueError( 'unable to pair labels with predictions: labels={}, predictions={}, ' 'model_name={}, output_name={}, sub_key={}, StandardMetricInputs={} ' '\n\nThis is most likely a configuration error.'.format( label, prediction, model_name, output_name, sub_key, inputs))