def raw_data_pb(
    name,
    true_positive_counts,
    false_positive_counts,
    true_negative_counts,
    false_negative_counts,
    precision,
    recall,
    num_thresholds=None,
    display_name=None,
    description=None):
  """Create a PR curves summary protobuf from raw data values.

  Args:
    name: A tag attached to the summary. Used by TensorBoard for organization.
    true_positive_counts: A rank-1 numpy array of true positive counts. Must
        contain `num_thresholds` elements and be castable to float32.
    false_positive_counts: A rank-1 numpy array of false positive counts. Must
        contain `num_thresholds` elements and be castable to float32.
    true_negative_counts: A rank-1 numpy array of true negative counts. Must
        contain `num_thresholds` elements and be castable to float32.
    false_negative_counts: A rank-1 numpy array of false negative counts. Must
        contain `num_thresholds` elements and be castable to float32.
    precision: A rank-1 numpy array of precision values. Must contain
        `num_thresholds` elements and be castable to float32.
    recall: A rank-1 numpy array of recall values. Must contain `num_thresholds`
        elements and be castable to float32.
    num_thresholds: Number of thresholds, evenly distributed in `[0, 1]`, to
        compute PR metrics for. Should be an int `>= 2`.
    display_name: Optional name for this summary in TensorBoard, as a `str`.
        Defaults to `name`.
    description: Optional long-form description for this summary, as a `str`.
        Markdown is supported. Defaults to empty.

  Returns:
    A summary operation for use in a TensorFlow graph. See docs for the `op`
    method for details on the float32 tensor produced by this summary.
  """
  if display_name is None:
    display_name = name
  summary_metadata = metadata.create_summary_metadata(
      display_name=display_name if display_name is not None else name,
      description=description or '',
      num_thresholds=num_thresholds)
  summary = tf.Summary()
  data = np.stack(
      (true_positive_counts,
       false_positive_counts,
       true_negative_counts,
       false_negative_counts,
       precision,
       recall))
  tensor = tf.make_tensor_proto(np.float32(data), dtype=tf.float32)
  summary.value.add(tag='%s/pr_curves' % name,
                    metadata=summary_metadata,
                    tensor=tensor)
  return summary
Exemplo n.º 2
0
def raw_data_pb(
    name,
    true_positive_counts,
    false_positive_counts,
    true_negative_counts,
    false_negative_counts,
    precision,
    recall,
    num_thresholds=None,
    display_name=None,
    description=None):
  """Create a PR curves summary protobuf from raw data values.

  Args:
    name: A tag attached to the summary. Used by TensorBoard for organization.
    true_positive_counts: A rank-1 numpy array of true positive counts. Must
        contain `num_thresholds` elements and be castable to float32.
    false_positive_counts: A rank-1 numpy array of false positive counts. Must
        contain `num_thresholds` elements and be castable to float32.
    true_negative_counts: A rank-1 numpy array of true negative counts. Must
        contain `num_thresholds` elements and be castable to float32.
    false_negative_counts: A rank-1 numpy array of false negative counts. Must
        contain `num_thresholds` elements and be castable to float32.
    precision: A rank-1 numpy array of precision values. Must contain
        `num_thresholds` elements and be castable to float32.
    recall: A rank-1 numpy array of recall values. Must contain `num_thresholds`
        elements and be castable to float32.
    num_thresholds: Number of thresholds, evenly distributed in `[0, 1]`, to
        compute PR metrics for. Should be an int `>= 2`.
    display_name: Optional name for this summary in TensorBoard, as a `str`.
        Defaults to `name`.
    description: Optional long-form description for this summary, as a `str`.
        Markdown is supported. Defaults to empty.

  Returns:
    A summary operation for use in a TensorFlow graph. See docs for the `op`
    method for details on the float32 tensor produced by this summary.
  """
  if display_name is None:
    display_name = name
  summary_metadata = metadata.create_summary_metadata(
      display_name=display_name if display_name is not None else name,
      description=description or '',
      num_thresholds=num_thresholds)
  summary = tf.Summary()
  data = np.stack(
      (true_positive_counts,
       false_positive_counts,
       true_negative_counts,
       false_negative_counts,
       precision,
       recall))
  tensor = tf.make_tensor_proto(np.float32(data), dtype=tf.float32)
  summary.value.add(tag='%s/pr_curves' % name,
                    metadata=summary_metadata,
                    tensor=tensor)
  return summary
Exemplo n.º 3
0
def _create_tensor_summary(
    name,
    true_positive_counts,
    false_positive_counts,
    true_negative_counts,
    false_negative_counts,
    precision,
    recall,
    num_thresholds=None,
    display_name=None,
    description=None,
    collections=None,
):
    """A private helper method for generating a tensor summary.

    We use a helper method instead of having `op` directly call `raw_data_op`
    to prevent the scope of `raw_data_op` from being embedded within `op`.

    Arguments are the same as for raw_data_op.

    Returns:
      A tensor summary that collects data for PR curves.
    """
    # TODO(nickfelt): remove on-demand imports once dep situation is fixed.
    import tensorflow.compat.v1 as tf

    # Store the number of thresholds within the summary metadata because
    # that value is constant for all pr curve summaries with the same tag.
    summary_metadata = metadata.create_summary_metadata(
        display_name=display_name if display_name is not None else name,
        description=description or "",
        num_thresholds=num_thresholds,
    )

    # Store values within a tensor. We store them in the order:
    # true positives, false positives, true negatives, false
    # negatives, precision, and recall.
    combined_data = tf.stack(
        [
            tf.cast(true_positive_counts, tf.float32),
            tf.cast(false_positive_counts, tf.float32),
            tf.cast(true_negative_counts, tf.float32),
            tf.cast(false_negative_counts, tf.float32),
            tf.cast(precision, tf.float32),
            tf.cast(recall, tf.float32),
        ]
    )

    return tf.summary.tensor_summary(
        name="pr_curves",
        tensor=combined_data,
        collections=collections,
        summary_metadata=summary_metadata,
    )
Exemplo n.º 4
0
def _create_tensor_summary(
    name,
    true_positive_counts,
    false_positive_counts,
    true_negative_counts,
    false_negative_counts,
    precision,
    recall,
    num_thresholds=None,
    display_name=None,
    description=None,
    collections=None):
  """A private helper method for generating a tensor summary.

  We use a helper method instead of having `op` directly call `raw_data_op`
  to prevent the scope of `raw_data_op` from being embedded within `op`.

  Arguments are the same as for raw_data_op.

  Returns:
    A tensor summary that collects data for PR curves.
  """
  # Store the number of thresholds within the summary metadata because
  # that value is constant for all pr curve summaries with the same tag.
  summary_metadata = metadata.create_summary_metadata(
      display_name=display_name if display_name is not None else name,
      description=description or '',
      num_thresholds=num_thresholds)

  # Store values within a tensor. We store them in the order:
  # true positives, false positives, true negatives, false
  # negatives, precision, and recall.
  combined_data = tf.stack([
      tf.cast(true_positive_counts, tf.float32),
      tf.cast(false_positive_counts, tf.float32),
      tf.cast(true_negative_counts, tf.float32),
      tf.cast(false_negative_counts, tf.float32),
      tf.cast(precision, tf.float32),
      tf.cast(recall, tf.float32)])

  return tf.summary.tensor_summary(
      name='pr_curves',
      tensor=combined_data,
      collections=collections,
      summary_metadata=summary_metadata)
Exemplo n.º 5
0
def _create_tensor_summary(tag,
                           true_positive_counts,
                           false_positive_counts,
                           true_negative_counts,
                           false_negative_counts,
                           precision,
                           recall,
                           num_thresholds=None,
                           display_name=None,
                           description=None,
                           collections=None):
    """A private helper method for generating a tensor summary.

  We use a helper method instead of having `op` directly call `raw_metrics_op`
  to prevent the scope of `raw_metrics_op` from being embedded within `op`.

  Arguments are the same as for raw_metrics_op.

  Returns:
    A tensor summary that collects data for PR curves.
  """
    # Store the number of thresholds within the summary metadata because
    # that value is constant for all pr curve summaries with the same tag.
    summary_metadata = metadata.create_summary_metadata(
        display_name=display_name if display_name is not None else tag,
        description=description or '',
        num_thresholds=num_thresholds)

    # Store values within a tensor. We store them in the order:
    # true positives, false positives, true negatives, false
    # negatives, precision, and recall.
    combined_data = tf.stack([
        tf.cast(true_positive_counts, tf.float32),
        tf.cast(false_positive_counts, tf.float32),
        tf.cast(true_negative_counts, tf.float32),
        tf.cast(false_negative_counts, tf.float32),
        tf.cast(precision, tf.float32),
        tf.cast(recall, tf.float32)
    ])

    return tf.summary.tensor_summary(name='pr_curves',
                                     tensor=combined_data,
                                     collections=collections,
                                     summary_metadata=summary_metadata)
Exemplo n.º 6
0
def op(tag,
       labels,
       predictions,
       num_thresholds=None,
       weights=None,
       display_name=None,
       description=None,
       collections=None):
    """Create a PR curve summary op for a single binary classifier.

  Computes true/false positive/negative values for the given `predictions`
  against the ground truth `labels`, against a list of evenly distributed
  threshold values in `[0, 1]` of length `num_thresholds`.

  Each number in `predictions`, a float in `[0, 1]`, is compared with its
  corresponding boolean label in `labels`, and counts as a single tp/fp/tn/fn
  value at each threshold. This is then multiplied with `weights` which can be
  used to reweight certain values, or more commonly used for masking values.

  Args:
    tag: A tag attached to the summary. Used by TensorBoard for organization.
    labels: The ground truth values. A Tensor of `bool` values with arbitrary
        shape.
    predictions: A float32 `Tensor` whose values are in the range `[0, 1]`.
        Dimensions must match those of `labels`.
    num_thresholds: Number of thresholds, evenly distributed in `[0, 1]`, to
        compute PR metrics for. Should be `>= 2`. This value should be a
        constant integer value, not a Tensor that stores an integer.
    weights: Optional float32 `Tensor`. Individual counts are multiplied by this
        value. This tensor must be either the same shape as or broadcastable to
        the `labels` tensor.
    display_name: Optional name for this summary in TensorBoard, as a
        constant `str`. Defaults to `name`.
    description: Optional long-form description for this summary, as a
        constant `str`. Markdown is supported. Defaults to empty.
    collections: Optional list of graph collections keys. The new
        summary op is added to these collections. Defaults to
        `[Graph Keys.SUMMARIES]`.

  Returns:
    A summary operation for use in a TensorFlow graph. The float32 tensor
    produced by the summary operation is of dimension (6, num_thresholds). The
    first dimension (of length 6) is of the order: true positives,
    false positives, true negatives, false negatives, precision, recall.

  """
    if num_thresholds is None:
        num_thresholds = 200

    if weights is None:
        weights = 1.0

    dtype = predictions.dtype

    with tf.name_scope(tag, values=[labels, predictions, weights]):
        tf.assert_type(labels, tf.bool)
        # We cast to float to ensure we have 0.0 or 1.0.
        f_labels = tf.cast(labels, dtype)
        # Ensure predictions are all in range [0.0, 1.0].
        predictions = tf.minimum(1.0, tf.maximum(0.0, predictions))
        # Get weighted true/false labels.
        true_labels = f_labels * weights
        false_labels = (1.0 - f_labels) * weights

        # Before we begin, flatten predictions.
        predictions = tf.reshape(predictions, [-1])

        # Shape the labels so they are broadcast-able for later multiplication.
        true_labels = tf.reshape(true_labels, [-1, 1])
        false_labels = tf.reshape(false_labels, [-1, 1])

        # To compute TP/FP/TN/FN, we are measuring a binary classifier
        #   C(t) = (predictions >= t)
        # at each threshold 't'. So we have
        #   TP(t) = sum( C(t) * true_labels )
        #   FP(t) = sum( C(t) * false_labels )
        #
        # But, computing C(t) requires computation for each t. To make it fast,
        # observe that C(t) is a cumulative integral, and so if we have
        #   thresholds = [t_0, ..., t_{n-1}];  t_0 < ... < t_{n-1}
        # where n = num_thresholds, and if we can compute the bucket function
        #   B(i) = Sum( (predictions == t), t_i <= t < t{i+1} )
        # then we get
        #   C(t_i) = sum( B(j), j >= i )
        # which is the reversed cumulative sum in tf.cumsum().
        #
        # We can compute B(i) efficiently by taking advantage of the fact that
        # our thresholds are evenly distributed, in that
        #   width = 1.0 / (num_thresholds - 1)
        #   thresholds = [0.0, 1*width, 2*width, 3*width, ..., 1.0]
        # Given a prediction value p, we can map it to its bucket by
        #   bucket_index(p) = floor( p * (num_thresholds - 1) )
        # so we can use tf.scatter_add() to update the buckets in one pass.

        # Compute the bucket indices for each prediction value.
        bucket_indices = tf.cast(tf.floor(predictions * (num_thresholds - 1)),
                                 tf.int32)

        # Bucket predictions.
        tp_buckets = tf.reduce_sum(
            tf.one_hot(bucket_indices, depth=num_thresholds) * true_labels,
            axis=0)
        fp_buckets = tf.reduce_sum(
            tf.one_hot(bucket_indices, depth=num_thresholds) * false_labels,
            axis=0)

        # Set up the cumulative sums to compute the actual metrics.
        tp = tf.cumsum(tp_buckets, reverse=True, name='tp')
        fp = tf.cumsum(fp_buckets, reverse=True, name='fp')
        # fn = sum(true_labels) - tp
        #    = sum(tp_buckets) - tp
        #    = tp[0] - tp
        # Similarly,
        # tn = fp[0] - fp
        tn = fp[0] - fp
        fn = tp[0] - tp

        # Store the number of thresholds within the summary metadata because
        # that value is constant for all pr curve summaries with the same tag.
        summary_metadata = metadata.create_summary_metadata(
            display_name=display_name if display_name is not None else tag,
            description=description or '',
            num_thresholds=num_thresholds)

        precision = tp / tf.maximum(_MINIMUM_COUNT, tp + fp)
        recall = tp / tf.maximum(_MINIMUM_COUNT, tp + fn)

        # Store values within a tensor. We store them in the order:
        # true positives, false positives, true negatives, false
        # negatives, precision, and recall.
        combined_data = tf.stack([tp, fp, tn, fn, precision, recall])

        return tf.summary.tensor_summary(name='pr_curves',
                                         tensor=combined_data,
                                         collections=collections,
                                         summary_metadata=summary_metadata)