def test_recall_at_max_recs(self): """Tests Recall@k for the case when all users have exactly k recommendations. When all users have exactly k recommendations, there isn't an extra ``user_id`` index generated when sorting for the largest ``k`` scores. """ actual = pd.DataFrame({ Constants.user_id: [0, 0], Constants.item_id: [1, 2], 'click': [True, True] }) predicted = pd.DataFrame({ Constants.user_id: [0, 0, 0, 0], Constants.item_id: [0, 1, 2, 3], 'click': [0, -1, -2, -3] }) self.assertEqual( 0., RankingRecoMetrics.Recall('click', k=1).get_score(actual, predicted)) self.assertEqual( 0.5, RankingRecoMetrics.Recall('click', k=2).get_score(actual, predicted)) self.assertEqual( 1., RankingRecoMetrics.Recall('click', k=3).get_score(actual, predicted)) self.assertEqual( 1., RankingRecoMetrics.Recall('click', k=4).get_score(actual, predicted))
def test_recall(self): # User 1 has items 1, 4, 2 relevant and was recommended items 1, 3, 2, so they should be included in the support # User 2 checks for no relevant items, so they shouldn't be included in the support # User 3 & 4 checks for no recommendations, but they should be included in the support actual = pd.DataFrame({ Constants.user_id: [1, 1, 1, 1, 3, 4], Constants.item_id: [1, 2, 3, 4, 3, 1], 'click': [1, 1, 0, 1, 1, 1] }) predicted = pd.DataFrame({ Constants.user_id: [1, 1, 1, 2], Constants.item_id: [1, 2, 3, 3], 'click': [0.8, 0.7, 0.75, 0.7] }) metric = RankingRecoMetrics.Recall(click_column='click', k=2) results = metric.get_score(actual, predicted, return_extended_results=True) self.assertEqual(1. / 9, results['recall']) self.assertEqual(3, results['support']) recall_3 = RankingRecoMetrics.Recall(click_column='click', k=3).get_score(actual, predicted) self.assertEqual(2. / 9, recall_3)
def test_change_column_names(self): user_id_column = 'uid' item_id_column = 'iid' actual = pd.DataFrame({ user_id_column: [0, 0], item_id_column: [1, 2], 'click': [True, True] }) predicted = pd.DataFrame({ user_id_column: [0, 0, 0, 0], item_id_column: [0, 1, 2, 3], 'click': [0, -1, -2, -3] }) # Test that the output is the same recall_1 = RankingRecoMetrics.Recall('click', k=1, user_id_column=user_id_column, item_id_column=item_id_column) self.assertEqual(0., recall_1.get_score(actual, predicted)) recall_2 = RankingRecoMetrics.Recall('click', k=2, user_id_column=user_id_column, item_id_column=item_id_column) self.assertEqual(0.5, recall_2.get_score(actual, predicted)) # Test that none of the metrics crash metrics = CombinedMetrics( BinaryRecoMetrics.CTR('click', k=1, user_id_column=user_id_column, item_id_column=item_id_column), RankingRecoMetrics.MAP('click', k=1, user_id_column=user_id_column, item_id_column=item_id_column), RankingRecoMetrics.NDCG('click', k=1, user_id_column=user_id_column, item_id_column=item_id_column), RankingRecoMetrics.Precision('click', k=1, user_id_column=user_id_column, item_id_column=item_id_column), RankingRecoMetrics.Recall('click', k=1, user_id_column=user_id_column, item_id_column=item_id_column)) metrics.get_score(actual, predicted)
def test_reco_quick_start_example(self): # Data actual = pd.DataFrame({ "user_id": [1, 2, 3, 4], "item_id": [1, 2, 0, 3], "clicks": [0, 1, 0, 0] }) predicted = pd.DataFrame({ "user_id": [1, 2, 3, 4], "item_id": [1, 2, 2, 3], "clicks": [0.8, 0.7, 0.8, 0.7] }) # Metrics auc = BinaryRecoMetrics.AUC(click_column="clicks") ctr = BinaryRecoMetrics.CTR(click_column="clicks") ncdg_k = RankingRecoMetrics.NDCG(click_column="clicks", k=3) precision_k = RankingRecoMetrics.Precision(click_column="clicks", k=2) recall_k = RankingRecoMetrics.Recall(click_column="clicks", k=2) map_k = RankingRecoMetrics.MAP(click_column="clicks", k=2) # Scores self.assertEqual(auc.get_score(actual, predicted), 0.25) self.assertEqual(ctr.get_score(actual, predicted), 0.3333333333333333) self.assertEqual(ncdg_k.get_score(actual, predicted), 1) self.assertEqual(precision_k.get_score(actual, predicted), 1) self.assertEqual(recall_k.get_score(actual, predicted), 1) self.assertEqual(map_k.get_score(actual, predicted), 1)
def test_accumulate_when_no_results_this_batch(self): metrics = CombinedMetrics( RankingRecoMetrics.Recall(click_column='click', k=1), BinaryRecoMetrics.CTR(click_column='click', k=1)) actual = pd.DataFrame({ Constants.user_id: [0], Constants.item_id: [0], 'click': [True] }) predicted = pd.DataFrame({ Constants.user_id: [0], Constants.item_id: [0], 'click': [0] }) batch_res, acc_res = metrics.get_score(actual, predicted, batch_accumulate=True, return_extended_results=True) self.assertEqual(2, len(batch_res)) self.assertEqual(2, len(acc_res)) actual = pd.DataFrame({ Constants.user_id: [1], Constants.item_id: [1], 'click': [True] }) predicted = pd.DataFrame({ Constants.user_id: [2], Constants.item_id: [2], 'click': [0] }) batch_res, acc_res = metrics.get_score(actual, predicted, batch_accumulate=True, return_extended_results=True) self.assertEqual(2, len(batch_res)) self.assertEqual(2, len(acc_res))
def test_inter_list_diversity_in_use(self): actual = pd.DataFrame({ Constants.user_id: [0, 0, 1, 2], Constants.item_id: [4, 5, 1, 4], 'score': [True, True, True, True] }) predicted = pd.DataFrame({ Constants.user_id: [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2], Constants.item_id: [0, 1, 2, 3, 0, 1, 2, 4, 0, 1, 2, 5], 'score': [0.9, 0.7, 0.6, 0.3, 0.9, 0.7, 0.4, 0.1, 0.9, 0.8, 0.6, 0.6] }) metrics = CombinedMetrics( RankingRecoMetrics.Recall(click_column='score', k=4), BinaryRecoMetrics.CTR(click_column='score', k=4), DiversityRecoMetrics.InterListDiversity(click_column='score', k=4, user_sample_size=2, num_runs=5)) acc_res = metrics.get_score(actual, predicted, batch_accumulate=False, return_extended_results=True) self.assertEqual(3, len(acc_res)) self.assertEqual( 0.25, acc_res['Inter-List Diversity@4']['inter-list diversity']) with self.assertRaises(ValueError): # This should fail when `batch_accumulate=True`, and `InterListDiversity` gets applied in combined metrics. batch_res, acc_res = metrics.get_score( actual, predicted, batch_accumulate=True, return_extended_results=True)