Exemplo n.º 1
0
def discounted_cumulative_gain(labels,
                               predictions,
                               weights=None,
                               topn=None,
                               name=None):
    """Computes discounted cumulative gain (DCG).

  Args:
    labels: A `Tensor` of the same shape as `predictions`.
    predictions: A `Tensor` with shape [batch_size, list_size]. Each value is
      the ranking score of the corresponding example.
    weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The
      former case is per-example and the latter case is per-list.
    topn: A cutoff for how many examples to consider for this metric.
    name: A string used as the name for this metric.

  Returns:
    A metric for the weighted discounted cumulative gain of the batch.
  """
    with ops.name_scope(name, 'discounted_cumulative_gain',
                        (labels, predictions, weights)):
        labels, predictions, weights, topn = _prepare_and_validate_params(
            labels, predictions, weights, topn)
        sorted_labels, sorted_weights = utils.sort_by_scores(predictions,
                                                             [labels, weights],
                                                             topn=topn)
        dcg = _discounted_cumulative_gain(sorted_labels,
                                          sorted_weights) * math_ops.log1p(1.0)
        per_list_weights = _per_example_weights_to_per_list_weights(
            weights=weights,
            relevance=math_ops.pow(2.0, math_ops.to_float(labels)) - 1.0)
        return metrics.mean(_safe_div(dcg, per_list_weights), per_list_weights)
Exemplo n.º 2
0
def _sort_and_normalize(labels, logits, weights=None):
    """Sorts `labels` and `logits` and normalize `weights`.

  Args:
    labels: A `Tensor` of the same shape as `logits` representing graded
      relevance.
    logits: A `Tensor` with shape [batch_size, list_size]. Each value is the
      ranking score of the corresponding item.
    weights: A scalar, a `Tensor` with shape [batch_size, 1], or a `Tensor` with
      the same shape as `labels`.

  Returns:
    A tuple of (sorted_labels, sorted_logits, sorted_weights).
  """
    labels = ops.convert_to_tensor(labels)
    logits = ops.convert_to_tensor(logits)
    logits.get_shape().assert_has_rank(2)
    logits.get_shape().assert_is_compatible_with(labels.get_shape())
    weights = 1.0 if weights is None else ops.convert_to_tensor(weights)
    weights = array_ops.ones_like(labels) * weights
    _, topn = array_ops.unstack(array_ops.shape(logits))

    # Only sort entries with valid labels that are >= 0.
    scores = array_ops.where(
        math_ops.greater_equal(labels, 0.), logits,
        -1e-6 * array_ops.ones_like(logits) +
        math_ops.reduce_min(logits, axis=1, keepdims=True))
    sorted_labels, sorted_logits, sorted_weights = utils.sort_by_scores(
        scores, [labels, logits, weights], topn=topn)
    return sorted_labels, sorted_logits, sorted_weights
Exemplo n.º 3
0
def average_relevance_position(labels, predictions, weights=None, name=None):
    """Computes average relevance position (ARP).

  This can also be named as average_relevance_rank, but this can be confusing
  with mean_reciprocal_rank in acronyms. This name is more distinguishing and
  has been used historically for binary relevance as average_click_position.

  Args:
    labels: A `Tensor` of the same shape as `predictions`.
    predictions: A `Tensor` with shape [batch_size, list_size]. Each value is
      the ranking score of the corresponding example.
    weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The
      former case is per-example and the latter case is per-list.
    name: A string used as the name for this metric.

  Returns:
    A metric for the weighted average relevance position.
  """
    with ops.name_scope(name, 'average_relevance_position',
                        (labels, predictions, weights)):
        _, list_size = array_ops.unstack(array_ops.shape(predictions))
        labels, predictions, weights, topn = _prepare_and_validate_params(
            labels, predictions, weights, list_size)
        sorted_labels, sorted_weights = utils.sort_by_scores(predictions,
                                                             [labels, weights],
                                                             topn=topn)
        relevance = sorted_labels * sorted_weights
        position = math_ops.to_float(math_ops.range(1, topn + 1))
        # TODO(xuanhui): Consider to add a cap poistion topn + 1 when there is no
        # relevant examples.
        return metrics.mean(position * array_ops.ones_like(relevance),
                            relevance)
Exemplo n.º 4
0
def mean_reciprocal_rank(labels, predictions, weights=None, name=None):
    """Computes mean reciprocal rank (MRR).

  Args:
    labels: A `Tensor` of the same shape as `predictions`. A value >= 1 means a
      relevant example.
    predictions: A `Tensor` with shape [batch_size, list_size]. Each value is
      the ranking score of the corresponding example.
    weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The
      former case is per-example and the latter case is per-list.
    name: A string used as the name for this metric.

  Returns:
    A metric for the weighted mean reciprocal rank of the batch.
  """
    with ops.name_scope(name, 'mean_reciprocal_rank',
                        (labels, predictions, weights)):
        _, list_size = array_ops.unstack(array_ops.shape(predictions))
        labels, predictions, weights, topn = _prepare_and_validate_params(
            labels, predictions, weights, list_size)
        sorted_labels, = utils.sort_by_scores(predictions, [labels], topn=topn)
        # Relevance = 1.0 when labels >= 1.0 to accommodate graded relevance.
        relevance = math_ops.to_float(
            math_ops.greater_equal(sorted_labels, 1.0))
        reciprocal_rank = 1.0 / math_ops.to_float(math_ops.range(1, topn + 1))
        # MRR has a shape of [batch_size, 1]
        mrr = math_ops.reduce_max(relevance * reciprocal_rank,
                                  axis=1,
                                  keepdims=True)
        return metrics.mean(mrr * array_ops.ones_like(weights), weights)
Exemplo n.º 5
0
    def test_sort_by_scores(self):
        scores = [[1., 3., 2.], [1., 2., 3.]]
        positions = [[1, 2, 3], [4, 5, 6]]
        names = [['a', 'b', 'c'], ['d', 'e', 'f']]
        with session.Session() as sess:
            sorted_positions, sorted_names = sess.run(
                utils.sort_by_scores(scores, [positions, names]))
            self.assertAllEqual(sorted_positions, [[2, 3, 1], [6, 5, 4]])
            self.assertAllEqual(sorted_names,
                                [[b'b', b'c', b'a'], [b'f', b'e', b'd']])

            sorted_positions, sorted_names = sess.run(
                utils.sort_by_scores(scores, [positions, names], topn=2))
            self.assertAllEqual(sorted_positions, [[2, 3], [6, 5]])
            self.assertAllEqual(sorted_names, [[b'b', b'c'], [b'f', b'e']])

            sorted_positions, sorted_names = sess.run(
                utils.sort_by_scores([scores[0]],
                                     [[positions[0]], [names[0]]]))
            self.assertAllEqual(sorted_positions, [[2, 3, 1]])
            self.assertAllEqual(sorted_names, [[b'b', b'c', b'a']])
Exemplo n.º 6
0
 def _inverse_max_dcg(self, labels):
     """Computes the inverse of max DCG."""
     ideal_sorted_labels, = utils.sort_by_scores(labels, [labels],
                                                 topn=self._topn)
     rank = math_ops.range(array_ops.shape(ideal_sorted_labels)[1]) + 1
     discounted_gain = self._gain_fn(
         ideal_sorted_labels) * self._rank_discount_fn(
             math_ops.to_float(rank))
     discounted_gain = math_ops.reduce_sum(discounted_gain,
                                           1,
                                           keepdims=True)
     return array_ops.where(math_ops.greater(discounted_gain, 0.),
                            1. / discounted_gain,
                            array_ops.zeros_like(discounted_gain))
Exemplo n.º 7
0
def precision(labels, predictions, weights=None, topn=None, name=None):
    """Computes precision as weighted average of relevant examples.

  Args:
    labels: A `Tensor` of the same shape as `predictions`. A value >= 1 means a
      relevant example.
    predictions: A `Tensor` with shape [batch_size, list_size]. Each value is
      the ranking score of the corresponding example.
    weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The
      former case is per-example and the latter case is per-list.
    topn: A cutoff for how many examples to consider for this metric.
    name: A string used as the name for this metric.

  Returns:
    A metric for the weighted precision of the batch.
  """
    with ops.name_scope(name, 'precision', (labels, predictions, weights)):
        labels, predictions, weights, topn = _prepare_and_validate_params(
            labels, predictions, weights, topn)
        sorted_labels, sorted_weights = utils.sort_by_scores(predictions,
                                                             [labels, weights],
                                                             topn=topn)
        # Relevance = 1.0 when labels >= 1.0.
        relevance = math_ops.to_float(
            math_ops.greater_equal(sorted_labels, 1.0))
        per_list_precision = _safe_div(
            math_ops.reduce_sum(relevance * sorted_weights, 1, keepdims=True),
            math_ops.reduce_sum(array_ops.ones_like(relevance) *
                                sorted_weights,
                                1,
                                keepdims=True))
        # per_list_weights are computed from the whole list to avoid the problem of
        # 0 when there is no relevant example in topn.
        per_list_weights = _per_example_weights_to_per_list_weights(
            weights, math_ops.to_float(math_ops.greater_equal(labels, 1.0)))
        return metrics.mean(per_list_precision, per_list_weights)
Exemplo n.º 8
0
def _list_mle_loss(labels,
                   logits,
                   weights=None,
                   lambda_weight=None,
                   reduction=core_losses.Reduction.SUM_BY_NONZERO_WEIGHTS,
                   name=None,
                   seed=None):
    """Computes the ListMLE loss [Xia et al.

  2008] for a list.

  Given the labels of graded relevance l_i and the logits s_i, we calculate
  the ListMLE loss for the given list.

  The `lambda_weight` re-weights examples based on l_i and r_i.
  The recommended weighting scheme is the formulation presented in the
  "Position-Aware ListMLE" paper (Lan et. al) and available using
  create_p_list_mle_lambda_weight() factory function above.

  Args:
    labels: A `Tensor` of the same shape as `logits` representing graded
      relevance.
    logits: A `Tensor` with shape [batch_size, list_size]. Each value is the
      ranking score of the corresponding item.
    weights: A scalar, a `Tensor` with shape [batch_size, 1] for list-wise
      weights, or a `Tensor` with shape [batch_size, list_size] for item-wise
      weights.
    lambda_weight: A `DCGLambdaWeight` instance.
    reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
      reduce training loss over batch.
    name: A string used as the name for this loss.
    seed: A randomization seed used when shuffling ground truth permutations.

  Returns:
    An op for the ListMLE loss.
  """
    with ops.name_scope(name, 'list_mle_loss', (labels, logits, weights)):
        is_label_valid = utils.is_label_valid(labels)
        # Reset the invalid labels to 0 and reset the invalid logits to a logit with
        # ~= 0 contribution.
        labels = array_ops.where(is_label_valid, labels,
                                 array_ops.zeros_like(labels))
        logits = array_ops.where(
            is_label_valid, logits,
            math_ops.log(_EPSILON) * array_ops.ones_like(logits))
        weights = 1.0 if weights is None else ops.convert_to_tensor(weights)
        weights = array_ops.squeeze(weights)

        # Shuffle labels and logits to add randomness to sort.
        shuffled_indices = utils.shuffle_valid_indices(is_label_valid, seed)
        shuffled_labels = array_ops.gather_nd(labels, shuffled_indices)
        shuffled_logits = array_ops.gather_nd(logits, shuffled_indices)

        sorted_labels, sorted_logits = utils.sort_by_scores(
            shuffled_labels, [shuffled_labels, shuffled_logits])

        raw_max = math_ops.reduce_max(sorted_logits, axis=1, keepdims=True)
        sorted_logits = sorted_logits - raw_max
        sums = math_ops.cumsum(math_ops.exp(sorted_logits),
                               axis=1,
                               reverse=True)
        sums = math_ops.log(sums) - sorted_logits

        if lambda_weight is not None and isinstance(lambda_weight,
                                                    ListMLELambdaWeight):
            sums *= lambda_weight.individual_weights(sorted_labels)

        negative_log_likelihood = math_ops.reduce_sum(sums, 1)

        return core_losses.compute_weighted_loss(negative_log_likelihood,
                                                 weights=weights,
                                                 reduction=reduction)