Пример #1
0
 def test_make_mean_reciprocal_rank_fn(self):
     scores = [[1., 3., 2.], [1., 2., 3.]]
     # Note that scores are ranked in descending order.
     # ranks = [[3, 1, 2], [3, 2, 1]]
     labels = [[0., 0., 1.], [0., 1., 2.]]
     # Note that the definition of MRR only uses the highest ranked
     # relevant item, where an item is relevant if its label is > 0.
     rel_rank = [2, 1]
     weights = [[1., 2., 3.], [4., 5., 6.]]
     num_queries = len(scores)
     weights_feature_name = 'weights'
     features = {weights_feature_name: torch.tensor(weights)}
     m = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.MRR)
     m_w = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.MRR,
         weights_feature_name=weights_feature_name)
     m_2 = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.MRR, topn=1)
     self._check_metrics([
         (m(torch.tensor([labels[0]]), torch.tensor([scores[0]]),
            features), 0.5),
         (m(torch.tensor(labels), torch.tensor(scores),
            features), (0.5 + 1.0) / 2),
         (m_w(torch.tensor(labels), torch.tensor(scores), features),
          (3. * 0.5 + (6. + 5.) / 2. * 1.) / (3. + (6. + 5.) / 2.)),
         (m_2(torch.tensor(labels), torch.tensor(scores),
              features), (sum([0., 1. / rel_rank[1], 0.]) / num_queries)),
     ])
Пример #2
0
    def test_make_discounted_cumulative_gain_fn(self):
        scores = [[1., 3., 2.], [1., 2., 3.]]
        # Note that scores are ranked in descending order.
        ranks = [[3, 1, 2], [3, 2, 1]]
        labels = [[0., 0., 1.], [0., 1., 2.]]
        weights = [[1., 1., 1.], [2., 2., 1.]]
        weights_feature_name = 'weights'
        features = {weights_feature_name: torch.tensor(weights)}
        m = metrics_lib.make_ranking_metric_fn(
            metrics_lib.RankingMetricKey.DCG)
        m_w = metrics_lib.make_ranking_metric_fn(
            metrics_lib.RankingMetricKey.DCG,
            weights_feature_name=weights_feature_name)
        expected_dcg_1 = _dcg(0., 1) + _dcg(1., 2) + _dcg(0., 3)
        self._check_metrics([
            (m(torch.tensor([labels[0]]), torch.tensor([scores[0]]),
               features), expected_dcg_1),
        ])
        expected_dcg_2 = _dcg(2., 1) + _dcg(1., 2)
        expected_dcg_2_weighted = _dcg(2., 1) + _dcg(1., 2) * 2.
        expected_weight_2 = ((4 - 1) * 1. + (2 - 1) * 2.) / (4 - 1 + 2 - 1)
        self._check_metrics([
            (m(torch.tensor(labels), torch.tensor(scores),
               features), (expected_dcg_1 + expected_dcg_2) / 2.0),
            (m_w(torch.tensor(labels), torch.tensor(scores),
                 features), (expected_dcg_1 + expected_dcg_2_weighted) /
             (1. + expected_weight_2)),
        ])
        # Testing different gain and discount functions
        gain_fn = lambda rel: rel
        rank_discount_fn = lambda rank: rank

        def mod_dcg_fn(l, r):
            return _dcg(l,
                        r,
                        gain_fn=gain_fn,
                        rank_discount_fn=rank_discount_fn)

        m_mod = metrics_lib.make_ranking_metric_fn(
            metrics_lib.RankingMetricKey.DCG,
            gain_fn=gain_fn,
            rank_discount_fn=rank_discount_fn)
        list_size = len(scores[0])
        expected_modified_dcg_1 = sum([
            mod_dcg_fn(labels[0][ind], ranks[0][ind])
            for ind in range(list_size)
        ])
        self._check_metrics([
            (m_mod(torch.tensor([labels[0]]), torch.tensor([scores[0]]),
                   features), expected_modified_dcg_1),
        ])
Пример #3
0
 def test_make_average_relevance_position_fn(self):
     scores = [[1., 3., 2.], [1., 2., 3.]]
     labels = [[0., 0., 1.], [0., 1., 2.]]
     weights = [[1., 2., 3.], [4., 5., 6.]]
     weights_feature_name = 'weights'
     features = {weights_feature_name: torch.tensor(weights)}
     m = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.ARP)
     m_w = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.ARP,
         weights_feature_name=weights_feature_name)
     self._check_metrics([
         (m(torch.tensor([labels[0]]), torch.tensor([scores[0]]),
            features), 2.),
         (m(torch.tensor(labels), torch.tensor(scores),
            features), (1. * 2. + 2. * 1. + 1. * 2.) / 4.),
         (m_w(torch.tensor(labels), torch.tensor(scores), features),
          (3. * 1. * 2. + 6. * 2. * 1. + 5 * 1. * 2.) / (3. + 12. + 5.)),
     ])
Пример #4
0
 def test_make_precision_fn(self):
     scores = [[1., 3., 2.], [1., 2., 3.]]
     labels = [[0., 0., 1.], [0., 1., 2.]]
     features = {}
     m = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.PRECISION)
     m_top_1 = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.PRECISION, topn=1)
     m_top_2 = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.PRECISION, topn=2)
     self._check_metrics([
         (m(torch.tensor([labels[0]]), torch.tensor([scores[0]]),
            features), 1. / 3.),
         (m_top_1(torch.tensor([labels[0]]), torch.tensor([scores[0]]),
                  features), 0. / 1.),
         (m_top_2(torch.tensor([labels[0]]), torch.tensor([scores[0]]),
                  features), 1. / 2.),
         (m(torch.tensor(labels), torch.tensor(scores),
            features), (1. / 3. + 2. / 3.) / 2.),
     ])
Пример #5
0
 def test_make_ordered_pair_accuracy_fn(self):
     scores = [[1., 3., 2.], [1., 2., 3.]]
     labels = [[0., 0., 1.], [0., 1., 2.]]
     m = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.ORDERED_PAIR_ACCURACY)
     self._check_metrics([
         (m(torch.tensor([labels[0]]), torch.tensor([scores[0]]),
            {}), 1. / 2.),
         (m(torch.tensor([labels[1]]), torch.tensor([scores[1]]), {}), 1.),
         (m(torch.tensor(labels), torch.tensor(scores),
            {}), (1. + 3.) / (2. + 3.)),
     ])
Пример #6
0
 def test_eval(self):
     metric_fns = {
         'metric/precision@1':
         metrics_lib.make_ranking_metric_fn(
             metrics_lib.RankingMetricKey.PRECISION, topn=1),
     }
     head = ranking_head.Head(loss_fn=_make_loss_fn(),
                              eval_metric_fns=metric_fns)
     loss, metrics_values = head.run(ranking_head.ModeKeys.EVAL,
                                     self._default_labels,
                                     self._default_logits,
                                     features={})
     self.assertAlmostEqual(loss.item(), self._default_loss, 5)
Пример #7
0
 def test_make_mean_average_precision_fn(self):
     scores = [[1., 3., 2.], [1., 2., 3.]]
     # Note that scores are ranked in descending order, so the ranks are
     # [[3, 1, 2], [3, 2, 1]]
     labels = [[0., 0., 1.], [0., 1., 2.]]
     rels = [[0, 0, 1], [0, 1, 1]]
     features = {}
     m = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.MAP)
     m_top_1 = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.MAP, topn=1)
     m_top_2 = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.MAP, topn=2)
     self._check_metrics([
         (m(torch.tensor([labels[0]]), torch.tensor([scores[0]]),
            features), _ap(rels[0], scores[0])),
         (m_top_1(torch.tensor([labels[0]]), torch.tensor([scores[0]]),
                  features), _ap(rels[0], scores[0], topn=1)),
         (m_top_2(torch.tensor([labels[0]]), torch.tensor([scores[0]]),
                  features), _ap(rels[0], scores[0], topn=2)),
         (m(torch.tensor(labels), torch.tensor(scores),
            features), sum(_ap(rels[i], scores[i]) for i in range(2)) / 2.),
     ])
Пример #8
0
    def test_make_normalized_discounted_cumulative_gain_fn(self):
        scores = [[1., 3., 2.], [1., 2., 3.]]
        # Note that scores are ranked in descending order.
        ranks = [[3, 1, 2], [3, 2, 1]]
        labels = [[0., 0., 1.], [0., 1., 2.]]
        weights = [[1., 2., 3.], [4., 5., 6.]]
        weights_3d = [[[1.], [2.], [3.]], [[4.], [5.], [6.]]]
        list_weights = [1., 0.]
        list_weights_2d = [[1.], [0.]]
        weights_feature_name = 'weights'
        weights_invalid_feature_name = 'weights_invalid'
        weights_3d_feature_name = 'weights_3d'
        list_weights_name = 'list_weights'
        list_weights_2d_name = 'list_weights_2d'
        features = {
            weights_feature_name: torch.tensor([weights[0]]),
            weights_invalid_feature_name: torch.tensor(weights[0]),
            weights_3d_feature_name: torch.tensor([weights_3d[0]]),
            list_weights_name: torch.tensor(list_weights),
            list_weights_2d_name: torch.tensor(list_weights_2d)
        }
        m = metrics_lib.make_ranking_metric_fn(
            metrics_lib.RankingMetricKey.NDCG)

        expected_ndcg = (_dcg(0., 1) + _dcg(1., 2) + _dcg(0., 3)) / (
            _dcg(1., 1) + _dcg(0., 2) + _dcg(0., 3))
        self._check_metrics([
            (m(torch.tensor([labels[0]]), torch.tensor([scores[0]]),
               features), expected_ndcg),
        ])
        expected_ndcg_1 = (_dcg(0., 1) + _dcg(1., 2) + _dcg(0., 3)) / (
            _dcg(1., 1) + _dcg(0., 2) + _dcg(0., 3))
        expected_ndcg_2 = 1.0
        expected_ndcg = (expected_ndcg_1 + expected_ndcg_2) / 2.0
        self._check_metrics([
            (m(torch.tensor(labels), torch.tensor(scores),
               features), expected_ndcg),
        ])

        # With item-wise weights.
        m_top = metrics_lib.make_ranking_metric_fn(
            metrics_lib.RankingMetricKey.NDCG,
            weights_feature_name=weights_feature_name,
            topn=1)
        m_weight = metrics_lib.make_ranking_metric_fn(
            metrics_lib.RankingMetricKey.NDCG,
            weights_feature_name=weights_feature_name)
        m_weights_3d = metrics_lib.make_ranking_metric_fn(
            metrics_lib.RankingMetricKey.NDCG,
            weights_feature_name=weights_3d_feature_name)
        self._check_metrics([
            (m_top(torch.tensor([labels[0]]), torch.tensor([scores[0]]),
                   features), _dcg(0., 1, 2.) / _dcg(1., 1, 3.)),
            (m_weight(torch.tensor([labels[0]]), torch.tensor([scores[0]]),
                      features),
             (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg(0., 3, 1.)) /
             (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.))),
            (m_weights_3d(torch.tensor([labels[0]]), torch.tensor([scores[0]]),
                          features),
             (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg(0., 3, 1.)) /
             (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.))),
        ])
        with self.assertRaises(ValueError):
            m_weight_invalid = metrics_lib.make_ranking_metric_fn(
                metrics_lib.RankingMetricKey.NDCG,
                weights_feature_name=weights_invalid_feature_name)
            m_weight_invalid(torch.tensor([labels[0]]),
                             torch.tensor([scores[0]]), features)

        # With list-wise weights.
        m_list_weight = metrics_lib.make_ranking_metric_fn(
            metrics_lib.RankingMetricKey.NDCG,
            weights_feature_name=list_weights_name)
        m_list_weight_2d = metrics_lib.make_ranking_metric_fn(
            metrics_lib.RankingMetricKey.NDCG,
            weights_feature_name=list_weights_2d_name)
        self._check_metrics([
            (m_list_weight(torch.tensor(labels), torch.tensor(scores),
                           features),
             (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg(0., 3, 1.)) /
             (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.))),
            (m_list_weight_2d(torch.tensor(labels), torch.tensor(scores),
                              features),
             (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg(0., 3, 1.)) /
             (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.))),
        ])

        # Testing different gain and discount functions
        gain_fn = lambda rel: rel
        rank_discount_fn = lambda rank: 1. / rank

        def mod_dcg_fn(l, r):
            return _dcg(l,
                        r,
                        gain_fn=gain_fn,
                        rank_discount_fn=rank_discount_fn)

        m_mod = metrics_lib.make_ranking_metric_fn(
            metrics_lib.RankingMetricKey.NDCG,
            gain_fn=gain_fn,
            rank_discount_fn=rank_discount_fn)
        list_size = len(scores[0])
        expected_modified_dcg_1 = sum([
            mod_dcg_fn(labels[0][ind], ranks[0][ind])
            for ind in range(list_size)
        ])
        self._check_metrics([
            (m_mod(torch.tensor([labels[0]]), torch.tensor([scores[0]]),
                   features), expected_modified_dcg_1),
        ])