def _confusion_matrix_metric_ops(
    features_dict,
    predictions_dict,
    labels_dict,
    example_weight_key,
    thresholds,
):
    """Metric ops for computing confusion matrix at the given thresholds.

  This is factored out because it's common to AucPlots and
  ConfusionMatrixAtThresholds.

  Args:
    features_dict: Features dict.
    predictions_dict: Predictions dict.
    labels_dict: Labels dict.
    example_weight_key: Example weight key (into features_dict).
    thresholds: List of thresholds to compute the confusion matrix at.

  Returns:
    (value_op, update_op) for the metric. Note that the value_op produces a
    matrix as described in the comments below.
  """
    # Note that we have to squeeze predictions, labels, weights so they are all
    # N element vectors (otherwise some of them might be N x 1 tensors, and
    # multiplying a N element vector with a N x 1 tensor uses matrix
    # multiplication rather than element-wise multiplication).
    squeezed_weights = None
    if example_weight_key:
        squeezed_weights = tf.squeeze(features_dict[example_weight_key])
    prediction_tensor = tf.cast(_get_prediction_tensor(predictions_dict),
                                tf.float64)
    values, update_ops = metrics_impl._confusion_matrix_at_thresholds(  # pylint: disable=protected-access
        tf.squeeze(labels_dict), tf.squeeze(prediction_tensor), thresholds,
        squeezed_weights)

    values['precision'] = values['tp'] / (values['tp'] + values['fp'])
    values['recall'] = values['tp'] / (values['tp'] + values['fn'])

    # The final matrix will look like the following:
    #
    # [ fn@threshold_0 tn@threshold_0 ... recall@threshold_0 ]
    # [ fn@threshold_1 tn@threshold_1 ... recall@threshold_1 ]
    # [       :              :        ...         :          ]
    # [       :              :        ...         :          ]
    # [ fn@threshold_k tn@threshold_k ... recall@threshold_k ]
    #
    value_op = tf.transpose(
        tf.stack([
            values['fn'], values['tn'], values['fp'], values['tp'],
            values['precision'], values['recall']
        ]))
    update_op = tf.group(update_ops['fn'], update_ops['tn'], update_ops['fp'],
                         update_ops['tp'])

    return (value_op, update_op)
def f1_score(labels, predictions, weights=None, num_thresholds=200,
             metrics_collections=None, updates_collections=None, name=None):
    with variable_scope.variable_scope(
            name, 'f1', (labels, predictions, weights)):
        predictions, labels, weights = metrics_impl._remove_squeezable_dimensions(  # pylint: disable=protected-access
            predictions=predictions, labels=labels, weights=weights)
        # To account for floating point imprecisions / avoid division by zero.
        epsilon = 1e-7
        thresholds = [(i + 1) * 1.0 / (num_thresholds - 1)
                      for i in range(num_thresholds - 2)]
        thresholds = [0.0 - epsilon] + thresholds + [1.0 + epsilon]
        thresholds_tensor = tf.constant(thresholds)

        # Confusion matrix.
        values, update_ops = metrics_impl._confusion_matrix_at_thresholds(  # pylint: disable=protected-access
            labels, predictions, thresholds, weights, includes=('tp', 'fp', 'fn'))

        # Compute precision and recall at various thresholds.
        def compute_best_f1_score(tp, fp, fn, name):
            precision_at_t = math_ops.div(tp, epsilon + tp + fp,
                                          name='precision_' + name)
            recall_at_t = math_ops.div(tp, epsilon + tp + fn, name='recall_' + name)
            # Compute F1 score.
            f1_at_thresholds = (
                    2.0 * precision_at_t * recall_at_t /
                    (precision_at_t + recall_at_t + epsilon))

            best_f1 = math_ops.reduce_max(f1_at_thresholds)
            best_f1_index = tf.math.argmax(f1_at_thresholds)
            precision = precision_at_t[best_f1_index]
            recall = recall_at_t[best_f1_index]
            threshold = thresholds_tensor[best_f1_index]
            return best_f1, precision, recall, threshold

        def f1_across_replicas(_, values):
            best_f1, precision, recall, threshold = compute_best_f1_score(tp=values['tp'], fp=values['fp'],
                                                                          fn=values['fn'], name='value')
            if metrics_collections:
                ops.add_to_collections(metrics_collections, best_f1, precision, recall, threshold)
            return best_f1, precision, recall, threshold

        best_f1, precision, recall, threshold = distribution_strategy_context.get_replica_context().merge_call(
            f1_across_replicas, args=(values,))

        update_op = compute_best_f1_score(tp=update_ops['tp'], fp=update_ops['fp'],
                                          fn=update_ops['fn'], name='update')
        if updates_collections:
            ops.add_to_collections(updates_collections, update_op)

        # return (best_f1, precision, recall, threshold), update_op
        return (best_f1, update_op), (precision, update_op), (recall, update_op), (threshold, update_op)
示例#3
0
    def get_metric_ops(self, features_dict, predictions_dict, labels_dict):
        # Note that we have to squeeze predictions, labels, weights so they are all
        # N element vectors (otherwise some of them might be N x 1 tensors, and
        # multiplying a N element vector with a N x 1 tensor uses matrix
        # multiplication rather than element-wise multiplication).
        squeezed_weights = None
        if self._example_weight_key:
            squeezed_weights = tf.squeeze(
                features_dict[self._example_weight_key])
        thresholds = [
            i * 1.0 / self._HISTOGRAM_NUM_BUCKETS
            for i in range(0, self._HISTOGRAM_NUM_BUCKETS + 1)
        ]
        thresholds = [-1e-6] + thresholds
        prediction_tensor = tf.cast(_get_prediction_tensor(predictions_dict),
                                    tf.float64)
        values, update_ops = metrics_impl._confusion_matrix_at_thresholds(  # pylint: disable=protected-access
            tf.squeeze(labels_dict), tf.squeeze(prediction_tensor), thresholds,
            squeezed_weights)

        values['precision'] = values['tp'] / (values['tp'] + values['fp'])
        values['recall'] = values['tp'] / (values['tp'] + values['fn'])

        # The final matrix will look like the following:
        #
        # [ fn@threshold_0 tn@threshold_0 ... recall@threshold_0 ]
        # [ fn@threshold_1 tn@threshold_1 ... recall@threshold_1 ]
        # [       :              :        ...         :          ]
        # [       :              :        ...         :          ]
        # [ fn@threshold_k tn@threshold_k ... recall@threshold_k ]
        #
        value_op = tf.transpose(
            tf.stack([
                values['fn'], values['tn'], values['fp'], values['tp'],
                values['precision'], values['recall']
            ]))
        update_op = tf.group(update_ops['fn'], update_ops['tn'],
                             update_ops['fp'], update_ops['tp'])

        return {
            metric_keys.AUC_PLOTS_MATRICES: (value_op, update_op),
            metric_keys.AUC_PLOTS_THRESHOLDS:
            (tf.identity(thresholds), tf.no_op()),
        }
    def confusion_matrix_metric_ops(
        self,
        features_dict,
        predictions_dict,
        labels_dict,
    ):
        """Metric ops for computing confusion matrix at the given thresholds.

    This is factored out because it's common to AucPlots and
    ConfusionMatrixAtThresholds.

    Args:
      features_dict: Features dict.
      predictions_dict: Predictions dict.
      labels_dict: Labels dict.

    Returns:
      (value_ops, update_ops) for the confusion matrix.
    """
        # Note that we have to squeeze predictions, labels, weights so they are all
        # N element vectors (otherwise some of them might be N x 1 tensors, and
        # multiplying a N element vector with a N x 1 tensor uses matrix
        # multiplication rather than element-wise multiplication).
        squeezed_weights = None
        if self._example_weight_key:
            squeezed_weights = tf.squeeze(
                features_dict[self._example_weight_key])
        prediction_tensor = tf.cast(_get_prediction_tensor(predictions_dict),
                                    tf.float64)
        values, update_ops = metrics_impl._confusion_matrix_at_thresholds(  # pylint: disable=protected-access
            tf.squeeze(labels_dict), tf.squeeze(prediction_tensor),
            self._thresholds, squeezed_weights)

        values['precision'] = values['tp'] / (values['tp'] + values['fp'])
        values['recall'] = values['tp'] / (values['tp'] + values['fn'])
        return (values, update_ops)  # pytype: disable=bad-return-type
示例#5
0
def f1_score(labels, predictions, weights=None, num_thresholds=200,
             metrics_collections=None, updates_collections=None, name=None):
  """Computes the approximately best F1-score across different thresholds.

  The f1_score function applies a range of thresholds to the predictions to
  convert them from [0, 1] to bool. Precision and recall are computed by
  comparing them to the labels. The F1-Score is then defined as
  2 * precision * recall / (precision + recall). The best one across the
  thresholds is returned.

  Disclaimer: In practice it may be desirable to choose the best threshold on
  the validation set and evaluate the F1 score with this threshold on a
  separate code set. Or it may be desirable to use a fixed threshold (e.g. 0.5).

  This function internally creates four local variables, `true_positives`,
  `true_negatives`, `false_positives` and `false_negatives` that are used to
  compute the pairs of recall and precision values for a linearly spaced set of
  thresholds from which the best f1-score is derived.

  This value is ultimately returned as `f1-score`, an idempotent operation that
  computes the F1-score (computed using the aforementioned variables). The
  `num_thresholds` variable controls the degree of discretization with larger
  numbers of thresholds more closely approximating the true best F1-score.

  For estimation of the metric over a stream of data, the function creates an
  `update_op` operation that updates these variables and returns the F1-score.

  Example usage with a custom estimator:
  def model_fn(features, labels, mode):
    predictions = make_predictions(features)
    loss = make_loss(predictions, labels)
    train_op = tf.contrib.training.create_train_op(
          total_loss=loss,
          optimizer='Adam')
    eval_metric_ops = {'f1': f1_score(labels, predictions)}
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        loss=loss,
        train_op=train_op,
        eval_metric_ops=eval_metric_ops,
        export_outputs=export_outputs)
  estimator = tf.estimator.Estimator(model_fn=model_fn)

  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

  Args:
    labels: A `Tensor` whose shape matches `predictions`. Will be cast to
      `bool`.
    predictions: A floating point `Tensor` of arbitrary shape and whose values
      are in the range `[0, 1]`.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `labels` dimension).
    num_thresholds: The number of thresholds to use when discretizing the roc
      curve.
    metrics_collections: An optional list of collections that `f1_score` should
      be added to.
    updates_collections: An optional list of collections that `update_op` should
      be added to.
    name: An optional variable_scope name.

  Returns:
    f1_score: A scalar `Tensor` representing the current best f1-score across
      different thresholds.
    update_op: An operation that increments the `true_positives`,
      `true_negatives`, `false_positives` and `false_negatives` variables
      appropriately and whose value matches the `f1_score`.

  Raises:
    ValueError: If `predictions` and `labels` have mismatched shapes, or if
      `weights` is not `None` and its shape doesn't match `predictions`, or if
      either `metrics_collections` or `updates_collections` are not a list or
      tuple.
  """
  with variable_scope.variable_scope(
      name, 'f1', (labels, predictions, weights)):
    predictions, labels, weights = metrics_impl._remove_squeezable_dimensions(  # pylint: disable=protected-access
        predictions=predictions, labels=labels, weights=weights)
    # To account for floating point imprecisions / avoid division by zero.
    epsilon = 1e-7
    thresholds = [(i + 1) * 1.0 / (num_thresholds - 1)
                  for i in range(num_thresholds - 2)]
    thresholds = [0.0 - epsilon] + thresholds + [1.0 + epsilon]

    # Confusion matrix.
    values, update_ops = metrics_impl._confusion_matrix_at_thresholds(  # pylint: disable=protected-access
        labels, predictions, thresholds, weights, includes=('tp', 'fp', 'fn'))

    # Compute precision and recall at various thresholds.
    def compute_best_f1_score(tp, fp, fn, name):
      precision_at_t = math_ops.div(tp, epsilon + tp + fp,
                                    name='precision_' + name)
      recall_at_t = math_ops.div(tp, epsilon + tp + fn, name='recall_' + name)
      # Compute F1 score.
      f1_at_thresholds = (
          2.0 * precision_at_t * recall_at_t /
          (precision_at_t + recall_at_t + epsilon))
      return math_ops.reduce_max(f1_at_thresholds)

    def f1_across_towers(_, values):
      best_f1 = compute_best_f1_score(tp=values['tp'], fp=values['fp'],
                                      fn=values['fn'], name='value')
      if metrics_collections:
        ops.add_to_collections(metrics_collections, best_f1)
      return best_f1

    best_f1 = distribution_strategy_context.get_tower_context().merge_call(
        f1_across_towers, values)

    update_op = compute_best_f1_score(tp=update_ops['tp'], fp=update_ops['fp'],
                                      fn=update_ops['fn'], name='update')
    if updates_collections:
      ops.add_to_collections(updates_collections, update_op)

    return best_f1, update_op
def _auc(labels,
         predictions,
         weights=None,
         num_thresholds=200,
         metrics_collections=None,
         updates_collections=None,
         curve='ROC',
         name=None,
         summation_method='trapezoidal'):
    """Computes the approximate AUC via a Riemann sum.

  Modified version of tf.metrics.auc. Add support for AUC computation
  of the recall curve.
  """
    with tf.variable_scope(name, 'auc', (labels, predictions, weights)):
        if curve != 'ROC' and curve != 'PR' and curve != 'R':
            raise ValueError('curve must be either ROC, PR or R, %s unknown' %
                             (curve))
        kepsilon = 1e-7  # to account for floating point imprecisions
        thresholds = [(i + 1) * 1.0 / (num_thresholds - 1)
                      for i in range(num_thresholds - 2)]
        thresholds = [0.0 - kepsilon] + thresholds + [1.0 + kepsilon]

        values, update_ops = _confusion_matrix_at_thresholds(
            labels, predictions, thresholds, weights)

        # Add epsilons to avoid dividing by 0.
        epsilon = 1.0e-6

        def compute_auc(tp, fn, tn, fp, name):
            """Computes the roc-auc or pr-auc based on confusion counts."""
            rec = tf.div(tp + epsilon, tp + fn + epsilon)
            if curve == 'ROC':
                fp_rate = tf.div(fp, fp + tn + epsilon)
                x = fp_rate
                y = rec
            elif curve == 'R':  # recall auc
                x = tf.linspace(1., 0., num_thresholds)
                y = rec
            else:  # curve == 'PR'.
                prec = tf.div(tp + epsilon, tp + fp + epsilon)
                x = rec
                y = prec
            if summation_method == 'trapezoidal':
                return tf.reduce_sum(tf.multiply(
                    x[:num_thresholds - 1] - x[1:],
                    (y[:num_thresholds - 1] + y[1:]) / 2.),
                                     name=name)
            elif summation_method == 'minoring':
                return tf.reduce_sum(tf.multiply(
                    x[:num_thresholds - 1] - x[1:],
                    tf.minimum(y[:num_thresholds - 1], y[1:])),
                                     name=name)
            elif summation_method == 'majoring':
                return tf.reduce_sum(tf.multiply(
                    x[:num_thresholds - 1] - x[1:],
                    tf.maximum(y[:num_thresholds - 1], y[1:])),
                                     name=name)
            else:
                raise ValueError('Invalid summation_method: %s' %
                                 summation_method)

        # sum up the areas of all the trapeziums
        auc_value = compute_auc(values['tp'], values['fn'], values['tn'],
                                values['fp'], 'value')
        update_op = compute_auc(update_ops['tp'], update_ops['fn'],
                                update_ops['tn'], update_ops['fp'],
                                'update_op')

        if metrics_collections:
            ops.add_to_collections(metrics_collections, auc_value)

        if updates_collections:
            ops.add_to_collections(updates_collections, update_op)

        return auc_value, update_op
def f1_score(labels, predictions, weights=None, num_thresholds=200,
             metrics_collections=None, updates_collections=None, name=None):
  """Computes the approximately best F1-score across different thresholds.

  The f1_score function applies a range of thresholds to the predictions to
  convert them from [0, 1] to bool. Precision and recall are computed by
  comparing them to the labels. The F1-Score is then defined as
  2 * precision * recall / (precision + recall). The best one across the
  thresholds is returned.

  Disclaimer: In practice it may be desirable to choose the best threshold on
  the validation set and evaluate the F1 score with this threshold on a
  separate test set. Or it may be desirable to use a fixed threshold (e.g. 0.5).

  This function internally creates four local variables, `true_positives`,
  `true_negatives`, `false_positives` and `false_negatives` that are used to
  compute the pairs of recall and precision values for a linearly spaced set of
  thresholds from which the best f1-score is derived.

  This value is ultimately returned as `f1-score`, an idempotent operation that
  computes the F1-score (computed using the aforementioned variables). The
  `num_thresholds` variable controls the degree of discretization with larger
  numbers of thresholds more closely approximating the true best F1-score.

  For estimation of the metric over a stream of data, the function creates an
  `update_op` operation that updates these variables and returns the F1-score.

  Example usage with a custom estimator:
  def model_fn(features, labels, mode):
    predictions = make_predictions(features)
    loss = make_loss(predictions, labels)
    train_op = tf.contrib.training.create_train_op(
          total_loss=loss,
          optimizer='Adam')
    eval_metric_ops = {'f1': f1_score(labels, predictions)}
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        loss=loss,
        train_op=train_op,
        eval_metric_ops=eval_metric_ops,
        export_outputs=export_outputs)
  estimator = tf.estimator.Estimator(model_fn=model_fn)

  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

  Args:
    labels: A `Tensor` whose shape matches `predictions`. Will be cast to
      `bool`.
    predictions: A floating point `Tensor` of arbitrary shape and whose values
      are in the range `[0, 1]`.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `labels` dimension).
    num_thresholds: The number of thresholds to use when discretizing the roc
      curve.
    metrics_collections: An optional list of collections that `f1_score` should
      be added to.
    updates_collections: An optional list of collections that `update_op` should
      be added to.
    name: An optional variable_scope name.

  Returns:
    f1_score: A scalar `Tensor` representing the current best f1-score across
      different thresholds.
    update_op: An operation that increments the `true_positives`,
      `true_negatives`, `false_positives` and `false_negatives` variables
      appropriately and whose value matches the `f1_score`.

  Raises:
    ValueError: If `predictions` and `labels` have mismatched shapes, or if
      `weights` is not `None` and its shape doesn't match `predictions`, or if
      either `metrics_collections` or `updates_collections` are not a list or
      tuple.
  """
  with variable_scope.variable_scope(
      name, 'f1', (labels, predictions, weights)):
    predictions, labels, weights = metrics_impl._remove_squeezable_dimensions(  # pylint: disable=protected-access
        predictions=predictions, labels=labels, weights=weights)
    # To account for floating point imprecisions / avoid division by zero.
    epsilon = 1e-7
    thresholds = [(i + 1) * 1.0 / (num_thresholds - 1)
                  for i in range(num_thresholds - 2)]
    thresholds = [0.0 - epsilon] + thresholds + [1.0 + epsilon]

    # Confusion matrix.
    values, update_ops = metrics_impl._confusion_matrix_at_thresholds(  # pylint: disable=protected-access
        labels, predictions, thresholds, weights, includes=('tp', 'fp', 'fn'))

    # Compute precision and recall at various thresholds.
    def compute_best_f1_score(tp, fp, fn, name):
      precision_at_t = math_ops.div(tp, epsilon + tp + fp,
                                    name='precision_' + name)
      recall_at_t = math_ops.div(tp, epsilon + tp + fn, name='recall_' + name)
      # Compute F1 score.
      f1_at_thresholds = (
          2.0 * precision_at_t * recall_at_t /
          (precision_at_t + recall_at_t + epsilon))
      return math_ops.reduce_max(f1_at_thresholds)

    def f1_across_replicas(_, values):
      best_f1 = compute_best_f1_score(tp=values['tp'], fp=values['fp'],
                                      fn=values['fn'], name='value')
      if metrics_collections:
        ops.add_to_collections(metrics_collections, best_f1)
      return best_f1

    best_f1 = distribution_strategy_context.get_replica_context().merge_call(
        f1_across_replicas, values)

    update_op = compute_best_f1_score(tp=update_ops['tp'], fp=update_ops['fp'],
                                      fn=update_ops['fn'], name='update')
    if updates_collections:
      ops.add_to_collections(updates_collections, update_op)

    return best_f1, update_op
def _auc(labels, predictions, weights=None, num_thresholds=200,
         metrics_collections=None, updates_collections=None,
         curve='ROC', name=None, summation_method='trapezoidal'):
  """Computes the approximate AUC via a Riemann sum.

  Modified version of tf.metrics.auc. Add support for AUC computation
  of the recall curve.
  """
  with tf.variable_scope(
      name, 'auc', (labels, predictions, weights)):
    if curve != 'ROC' and curve != 'PR' and curve != 'R':
      raise ValueError('curve must be either ROC, PR or R, %s unknown' %
                       (curve))
    kepsilon = 1e-7  # to account for floating point imprecisions
    thresholds = [(i + 1) * 1.0 / (num_thresholds - 1)
                  for i in range(num_thresholds - 2)]
    thresholds = [0.0 - kepsilon] + thresholds + [1.0 + kepsilon]

    values, update_ops = _confusion_matrix_at_thresholds(
      labels, predictions, thresholds, weights)

    # Add epsilons to avoid dividing by 0.
    epsilon = 1.0e-6

    def compute_auc(tp, fn, tn, fp, name):
      """Computes the roc-auc or pr-auc based on confusion counts."""
      rec = tf.div(tp + epsilon, tp + fn + epsilon)
      if curve == 'ROC':
        fp_rate = tf.div(fp, fp + tn + epsilon)
        x = fp_rate
        y = rec
      elif curve == 'R':  # recall auc
        x = tf.linspace(1., 0., num_thresholds)
        y = rec
      else:  # curve == 'PR'.
        prec = tf.div(tp + epsilon, tp + fp + epsilon)
        x = rec
        y = prec
      if summation_method == 'trapezoidal':
        return tf.reduce_sum(
          tf.multiply(x[:num_thresholds - 1] - x[1:],
                      (y[:num_thresholds - 1] + y[1:]) / 2.),
          name=name)
      elif summation_method == 'minoring':
        return tf.reduce_sum(
          tf.multiply(x[:num_thresholds - 1] - x[1:],
                      tf.minimum(y[:num_thresholds - 1], y[1:])),
          name=name)
      elif summation_method == 'majoring':
        return tf.reduce_sum(
          tf.multiply(x[:num_thresholds - 1] - x[1:],
                      tf.maximum(y[:num_thresholds - 1], y[1:])),
          name=name)
      else:
        raise ValueError('Invalid summation_method: %s' % summation_method)

    # sum up the areas of all the trapeziums
    auc_value = compute_auc(
      values['tp'], values['fn'], values['tn'], values['fp'], 'value')
    update_op = compute_auc(
      update_ops['tp'], update_ops['fn'], update_ops['tn'], update_ops['fp'],
      'update_op')

    if metrics_collections:
      ops.add_to_collections(metrics_collections, auc_value)

    if updates_collections:
      ops.add_to_collections(updates_collections, update_op)

    return auc_value, update_op