def discounted_cumulative_gain(labels, predictions, weights=None, topn=None, name=None): """Computes discounted cumulative gain (DCG). Args: labels: A `Tensor` of the same shape as `predictions`. predictions: A `Tensor` with shape [batch_size, list_size]. Each value is the ranking score of the corresponding example. weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The former case is per-example and the latter case is per-list. topn: A cutoff for how many examples to consider for this metric. name: A string used as the name for this metric. Returns: A metric for the weighted discounted cumulative gain of the batch. """ with ops.name_scope(name, 'discounted_cumulative_gain', (labels, predictions, weights)): labels, predictions, weights, topn = _prepare_and_validate_params( labels, predictions, weights, topn) sorted_labels, sorted_weights = utils.sort_by_scores(predictions, [labels, weights], topn=topn) dcg = _discounted_cumulative_gain(sorted_labels, sorted_weights) * math_ops.log1p(1.0) per_list_weights = _per_example_weights_to_per_list_weights( weights=weights, relevance=math_ops.pow(2.0, math_ops.to_float(labels)) - 1.0) return metrics.mean(_safe_div(dcg, per_list_weights), per_list_weights)
def _sort_and_normalize(labels, logits, weights=None): """Sorts `labels` and `logits` and normalize `weights`. Args: labels: A `Tensor` of the same shape as `logits` representing graded relevance. logits: A `Tensor` with shape [batch_size, list_size]. Each value is the ranking score of the corresponding item. weights: A scalar, a `Tensor` with shape [batch_size, 1], or a `Tensor` with the same shape as `labels`. Returns: A tuple of (sorted_labels, sorted_logits, sorted_weights). """ labels = ops.convert_to_tensor(labels) logits = ops.convert_to_tensor(logits) logits.get_shape().assert_has_rank(2) logits.get_shape().assert_is_compatible_with(labels.get_shape()) weights = 1.0 if weights is None else ops.convert_to_tensor(weights) weights = array_ops.ones_like(labels) * weights _, topn = array_ops.unstack(array_ops.shape(logits)) # Only sort entries with valid labels that are >= 0. scores = array_ops.where( math_ops.greater_equal(labels, 0.), logits, -1e-6 * array_ops.ones_like(logits) + math_ops.reduce_min(logits, axis=1, keepdims=True)) sorted_labels, sorted_logits, sorted_weights = utils.sort_by_scores( scores, [labels, logits, weights], topn=topn) return sorted_labels, sorted_logits, sorted_weights
def average_relevance_position(labels, predictions, weights=None, name=None): """Computes average relevance position (ARP). This can also be named as average_relevance_rank, but this can be confusing with mean_reciprocal_rank in acronyms. This name is more distinguishing and has been used historically for binary relevance as average_click_position. Args: labels: A `Tensor` of the same shape as `predictions`. predictions: A `Tensor` with shape [batch_size, list_size]. Each value is the ranking score of the corresponding example. weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The former case is per-example and the latter case is per-list. name: A string used as the name for this metric. Returns: A metric for the weighted average relevance position. """ with ops.name_scope(name, 'average_relevance_position', (labels, predictions, weights)): _, list_size = array_ops.unstack(array_ops.shape(predictions)) labels, predictions, weights, topn = _prepare_and_validate_params( labels, predictions, weights, list_size) sorted_labels, sorted_weights = utils.sort_by_scores(predictions, [labels, weights], topn=topn) relevance = sorted_labels * sorted_weights position = math_ops.to_float(math_ops.range(1, topn + 1)) # TODO(xuanhui): Consider to add a cap poistion topn + 1 when there is no # relevant examples. return metrics.mean(position * array_ops.ones_like(relevance), relevance)
def mean_reciprocal_rank(labels, predictions, weights=None, name=None): """Computes mean reciprocal rank (MRR). Args: labels: A `Tensor` of the same shape as `predictions`. A value >= 1 means a relevant example. predictions: A `Tensor` with shape [batch_size, list_size]. Each value is the ranking score of the corresponding example. weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The former case is per-example and the latter case is per-list. name: A string used as the name for this metric. Returns: A metric for the weighted mean reciprocal rank of the batch. """ with ops.name_scope(name, 'mean_reciprocal_rank', (labels, predictions, weights)): _, list_size = array_ops.unstack(array_ops.shape(predictions)) labels, predictions, weights, topn = _prepare_and_validate_params( labels, predictions, weights, list_size) sorted_labels, = utils.sort_by_scores(predictions, [labels], topn=topn) # Relevance = 1.0 when labels >= 1.0 to accommodate graded relevance. relevance = math_ops.to_float( math_ops.greater_equal(sorted_labels, 1.0)) reciprocal_rank = 1.0 / math_ops.to_float(math_ops.range(1, topn + 1)) # MRR has a shape of [batch_size, 1] mrr = math_ops.reduce_max(relevance * reciprocal_rank, axis=1, keepdims=True) return metrics.mean(mrr * array_ops.ones_like(weights), weights)
def test_sort_by_scores(self): scores = [[1., 3., 2.], [1., 2., 3.]] positions = [[1, 2, 3], [4, 5, 6]] names = [['a', 'b', 'c'], ['d', 'e', 'f']] with session.Session() as sess: sorted_positions, sorted_names = sess.run( utils.sort_by_scores(scores, [positions, names])) self.assertAllEqual(sorted_positions, [[2, 3, 1], [6, 5, 4]]) self.assertAllEqual(sorted_names, [[b'b', b'c', b'a'], [b'f', b'e', b'd']]) sorted_positions, sorted_names = sess.run( utils.sort_by_scores(scores, [positions, names], topn=2)) self.assertAllEqual(sorted_positions, [[2, 3], [6, 5]]) self.assertAllEqual(sorted_names, [[b'b', b'c'], [b'f', b'e']]) sorted_positions, sorted_names = sess.run( utils.sort_by_scores([scores[0]], [[positions[0]], [names[0]]])) self.assertAllEqual(sorted_positions, [[2, 3, 1]]) self.assertAllEqual(sorted_names, [[b'b', b'c', b'a']])
def _inverse_max_dcg(self, labels): """Computes the inverse of max DCG.""" ideal_sorted_labels, = utils.sort_by_scores(labels, [labels], topn=self._topn) rank = math_ops.range(array_ops.shape(ideal_sorted_labels)[1]) + 1 discounted_gain = self._gain_fn( ideal_sorted_labels) * self._rank_discount_fn( math_ops.to_float(rank)) discounted_gain = math_ops.reduce_sum(discounted_gain, 1, keepdims=True) return array_ops.where(math_ops.greater(discounted_gain, 0.), 1. / discounted_gain, array_ops.zeros_like(discounted_gain))
def precision(labels, predictions, weights=None, topn=None, name=None): """Computes precision as weighted average of relevant examples. Args: labels: A `Tensor` of the same shape as `predictions`. A value >= 1 means a relevant example. predictions: A `Tensor` with shape [batch_size, list_size]. Each value is the ranking score of the corresponding example. weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The former case is per-example and the latter case is per-list. topn: A cutoff for how many examples to consider for this metric. name: A string used as the name for this metric. Returns: A metric for the weighted precision of the batch. """ with ops.name_scope(name, 'precision', (labels, predictions, weights)): labels, predictions, weights, topn = _prepare_and_validate_params( labels, predictions, weights, topn) sorted_labels, sorted_weights = utils.sort_by_scores(predictions, [labels, weights], topn=topn) # Relevance = 1.0 when labels >= 1.0. relevance = math_ops.to_float( math_ops.greater_equal(sorted_labels, 1.0)) per_list_precision = _safe_div( math_ops.reduce_sum(relevance * sorted_weights, 1, keepdims=True), math_ops.reduce_sum(array_ops.ones_like(relevance) * sorted_weights, 1, keepdims=True)) # per_list_weights are computed from the whole list to avoid the problem of # 0 when there is no relevant example in topn. per_list_weights = _per_example_weights_to_per_list_weights( weights, math_ops.to_float(math_ops.greater_equal(labels, 1.0))) return metrics.mean(per_list_precision, per_list_weights)
def _list_mle_loss(labels, logits, weights=None, lambda_weight=None, reduction=core_losses.Reduction.SUM_BY_NONZERO_WEIGHTS, name=None, seed=None): """Computes the ListMLE loss [Xia et al. 2008] for a list. Given the labels of graded relevance l_i and the logits s_i, we calculate the ListMLE loss for the given list. The `lambda_weight` re-weights examples based on l_i and r_i. The recommended weighting scheme is the formulation presented in the "Position-Aware ListMLE" paper (Lan et. al) and available using create_p_list_mle_lambda_weight() factory function above. Args: labels: A `Tensor` of the same shape as `logits` representing graded relevance. logits: A `Tensor` with shape [batch_size, list_size]. Each value is the ranking score of the corresponding item. weights: A scalar, a `Tensor` with shape [batch_size, 1] for list-wise weights, or a `Tensor` with shape [batch_size, list_size] for item-wise weights. lambda_weight: A `DCGLambdaWeight` instance. reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. name: A string used as the name for this loss. seed: A randomization seed used when shuffling ground truth permutations. Returns: An op for the ListMLE loss. """ with ops.name_scope(name, 'list_mle_loss', (labels, logits, weights)): is_label_valid = utils.is_label_valid(labels) # Reset the invalid labels to 0 and reset the invalid logits to a logit with # ~= 0 contribution. labels = array_ops.where(is_label_valid, labels, array_ops.zeros_like(labels)) logits = array_ops.where( is_label_valid, logits, math_ops.log(_EPSILON) * array_ops.ones_like(logits)) weights = 1.0 if weights is None else ops.convert_to_tensor(weights) weights = array_ops.squeeze(weights) # Shuffle labels and logits to add randomness to sort. shuffled_indices = utils.shuffle_valid_indices(is_label_valid, seed) shuffled_labels = array_ops.gather_nd(labels, shuffled_indices) shuffled_logits = array_ops.gather_nd(logits, shuffled_indices) sorted_labels, sorted_logits = utils.sort_by_scores( shuffled_labels, [shuffled_labels, shuffled_logits]) raw_max = math_ops.reduce_max(sorted_logits, axis=1, keepdims=True) sorted_logits = sorted_logits - raw_max sums = math_ops.cumsum(math_ops.exp(sorted_logits), axis=1, reverse=True) sums = math_ops.log(sums) - sorted_logits if lambda_weight is not None and isinstance(lambda_weight, ListMLELambdaWeight): sums *= lambda_weight.individual_weights(sorted_labels) negative_log_likelihood = math_ops.reduce_sum(sums, 1) return core_losses.compute_weighted_loss(negative_log_likelihood, weights=weights, reduction=reduction)