def test_normalized_discounted_cumulative_gain_with_zero_weights(self): scores = [[1., 3., 2.], [1., 2., 3.]] labels = [[0., 0., 1.], [0., 1., 2.]] weights = [[1., 2., 3.], [4., 5., 6.]] metric_ = metrics_lib.NDCGMetric() metric_.update_state(labels, scores, [[0.], [0.]]) self.assertAlmostEqual(metric_.result().numpy(), 0., places=5) metric_ = metrics_lib.NDCGMetric(topn=1) metric_.update_state([[0., 0., 0.]], [scores[0]], weights[0]) self.assertAlmostEqual(metric_.result().numpy(), 0., places=5)
def test_normalized_discounted_cumulative_gain(self): scores = [[1., 3., 2.], [1., 2., 3.]] # Note that scores are ranked in descending order. ranks = [[3, 1, 2], [3, 2, 1]] labels = [[0., 0., 1.], [0., 1., 2.]] metric_ = metrics_lib.NDCGMetric() metric_.update_state([labels[0]], [scores[0]]) expected_ndcg = (_dcg(0., 1) + _dcg(1., 2) + _dcg(0., 3)) / ( _dcg(1., 1) + _dcg(0., 2) + _dcg(0., 3)) self.assertAlmostEqual(metric_.result().numpy(), expected_ndcg, places=5) metric_ = metrics_lib.NDCGMetric() metric_.update_state(labels, scores) expected_ndcg_1 = (_dcg(0., 1) + _dcg(1., 2) + _dcg(0., 3)) / ( _dcg(1., 1) + _dcg(0., 2) + _dcg(0., 3)) expected_ndcg_2 = 1.0 expected_ndcg = (expected_ndcg_1 + expected_ndcg_2) / 2.0 self.assertAlmostEqual(metric_.result().numpy(), expected_ndcg, places=5) # Testing different gain and discount functions. gain_fn = lambda rel: rel rank_discount_fn = lambda rank: rank metric_ = metrics_lib.NDCGMetric(gain_fn=gain_fn, rank_discount_fn=rank_discount_fn) metric_.update_state([labels[0]], [scores[0]]) def mod_dcg_fn(l, r): return _dcg(l, r, gain_fn=gain_fn, rank_discount_fn=rank_discount_fn) list_size = len(scores[0]) ideal_labels = sorted(labels[0], reverse=True) list_dcgs = [ mod_dcg_fn(labels[0][ind], ranks[0][ind]) for ind in range(list_size) ] ideal_dcgs = [ mod_dcg_fn(ideal_labels[ind], ind + 1) for ind in range(list_size) ] expected_modified_ndcg_1 = sum(list_dcgs) / sum(ideal_dcgs) self.assertAlmostEqual(metric_.result().numpy(), expected_modified_ndcg_1, places=5)
def setUp(self): super(FunctionalRankingModelTest, self).setUp() self.context_feature_columns = _context_feature_columns() self.example_feature_columns = _example_feature_columns() self.optimizer = tf.keras.optimizers.Adagrad() self.loss = losses.SoftmaxLoss() self.metrics = [metrics.NDCGMetric("ndcg_5", topn=5)]
def test_normalized_discounted_cumulative_gain_with_zero_relevance(self): scores = [[1., 3., 2.], [1., 2., 3.]] labels = [[0., 0., 0.], [0., 1., 2.]] metric_ = metrics_lib.NDCGMetric() metric_.update_state(labels, scores) self.assertAlmostEqual(metric_.result().numpy(), (0. + 1.) / 2.0, places=5)
def build_metrics(self, training=None): del training metrics = [tfr_metrics.MeanAveragePrecisionMetric(name='MAP')] for topn in [1, 5, 10]: metrics.append( tfr_metrics.NDCGMetric(name='NDCG@{}'.format(topn), topn=topn)) for topn in [1, 5, 10]: metrics.append( tfr_metrics.MRRMetric(name='MRR@{}'.format(topn), topn=topn)) return metrics
def test_model_compile_keras(self): # Specify the training configuration (optimizer, loss, metrics). optimizer = tf.keras.optimizers.RMSprop() loss = losses.SoftmaxLoss() eval_metrics = [metrics.NDCGMetric("ndcg_5", topn=5)] ranker = model.create_keras_model(network=self.network, loss=loss, metrics=eval_metrics, optimizer=optimizer, size_feature_name=None) self.assertIs(ranker.optimizer, optimizer) self.assertIs(ranker.loss, loss)
def test_normalized_discounted_cumulative_gain_with_weights_zero_relevance( self): scores = [[1., 3., 2.], [1., 2., 3.]] labels = [[0., 0., 0.], [0., 1., 2.]] weights = [[1., 2., 3.], [4., 5., 6.]] expected_ndcg_1 = 0.0 expected_ndcg_2 = 1.0 as_list_weights = _example_weights_to_list_weights( weights, labels, 'NDCG') self.assertAllClose(as_list_weights, [5.75, 5.75]) expected_ndcg = ( expected_ndcg_1 * as_list_weights[0] + expected_ndcg_2 * as_list_weights[1]) / sum(as_list_weights) metric_ = metrics_lib.NDCGMetric() metric_.update_state(labels, scores, weights) self.assertAlmostEqual(metric_.result().numpy(), expected_ndcg, places=5) # Test zero NDCG cases. metric_ = metrics_lib.NDCGMetric() metric_.update_state(labels, scores, [[0.], [0.]]) self.assertAlmostEqual(metric_.result().numpy(), 0., places=5)
def _calculate_aggregated_metrics(self, flattened_aggregated_logs, query_feature_name): """Calculates metrics where lists are grouped by `query_feature_name`.""" qid2labels = {} qid2preds = {} qids = flattened_aggregated_logs[query_feature_name] preds = flattened_aggregated_logs[_PREDICTION] labels = flattened_aggregated_logs[_LABEL] for qid, pred, label in zip(qids, preds, labels): qid2labels[qid] = qid2labels.get(qid, []) + [label] qid2preds[qid] = qid2preds.get(qid, []) + [pred] metrics = [ tfr_metrics.MeanAveragePrecisionMetric(name='Aggregated_MAP') ] for topn in [1, 5, 10]: metrics.append( tfr_metrics.NDCGMetric(name='Aggregated_NDCG@{}'.format(topn), topn=topn)) for topn in [1, 5, 10]: metrics.append( tfr_metrics.MRRMetric(name='Aggregated_MRR@{}'.format(topn), topn=topn)) output_results = {} for metric in metrics: for qid in qid2preds: preds = np.expand_dims(qid2preds[qid], 0) labels = np.expand_dims(qid2labels[qid], 0) metric.update_state(labels, preds) output_results.update({ 'agggregated_metrics/{}'.format(metric.name): metric.result().numpy() }) logging.info('agggregated_metrics/%s = %f', metric.name, metric.result().numpy()) return output_results
def test_normalized_discounted_cumulative_gain_with_weights(self): scores = [[1., 3., 2.], [1., 2., 3.]] labels = [[0., 0., 1.], [0., 1., 2.]] weights = [[1., 2., 3.], [4., 5., 6.]] list_weights = [[1.], [2.]] metric_ = metrics_lib.NDCGMetric(topn=1) metric_.update_state([labels[0]], [scores[0]], weights[0]) expected_result = _dcg(0., 1, 2.) / _dcg(1., 1, 3.) self.assertAlmostEqual(metric_.result().numpy(), expected_result, places=5) metric_ = metrics_lib.NDCGMetric() metric_.update_state([labels[0]], [scores[0]], weights[0]) expected_result = (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg( 0., 3, 1.)) / (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.)) self.assertAlmostEqual(metric_.result().numpy(), expected_result, places=5) metric_ = metrics_lib.NDCGMetric() metric_.update_state(labels, scores, weights) expected_ndcg_1 = (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg( 0., 3, 1.)) / (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.)) expected_ndcg_2 = 1.0 as_list_weights = _example_weights_to_list_weights( weights, labels, 'NDCG') expected_ndcg = ( expected_ndcg_1 * as_list_weights[0] + expected_ndcg_2 * as_list_weights[1]) / sum(as_list_weights) self.assertAlmostEqual(metric_.result().numpy(), expected_ndcg, places=5) metric_ = metrics_lib.NDCGMetric(topn=1) metric_.update_state(labels, scores, weights) expected_ndcg_1 = _dcg(0., 1, 2.) / _dcg(1., 1, 3.) expected_ndcg_2 = 1.0 expected_ndcg = ( expected_ndcg_1 * as_list_weights[0] + expected_ndcg_2 * as_list_weights[1]) / sum(as_list_weights) self.assertAlmostEqual(metric_.result().numpy(), expected_ndcg, places=5) metric_ = metrics_lib.NDCGMetric() metric_.update_state(labels, scores, list_weights) expected_ndcg_1 = (_dcg(0., 1) + _dcg(1., 2) + _dcg(0., 3)) / ( _dcg(1., 1) + _dcg(0., 2) + _dcg(0., 3)) expected_ndcg_2 = 1.0 expected_ndcg = (expected_ndcg_1 + 2. * expected_ndcg_2) / 3.0 self.assertAlmostEqual(metric_.result().numpy(), expected_ndcg, places=5) # Test zero NDCG cases. metric_ = metrics_lib.NDCGMetric() metric_.update_state(labels, scores, [[0.], [0.]]) self.assertAlmostEqual(metric_.result().numpy(), 0., places=5) metric_ = metrics_lib.NDCGMetric(topn=1) metric_.update_state([[0., 0., 0.]], [scores[0]], weights[0]) self.assertAlmostEqual(metric_.result().numpy(), 0., places=5)