def test_precision_at_max_recs(self): """Tests Precision@k for the case when all users have exactly k recommendations. When all users have exactly k recommendations, there isn't an extra ``user_id`` index generated when sorting for the largest ``k`` scores. """ actual = pd.DataFrame({ Constants.user_id: [0], Constants.item_id: [0], 'click': [True] }) predicted = pd.DataFrame({ Constants.user_id: [0, 0, 0], Constants.item_id: [0, 1, 2], 'click': [0, -1, -2] }) self.assertEqual( 1., RankingRecoMetrics.Precision('click', k=1).get_score(actual, predicted)) self.assertEqual( 0.5, RankingRecoMetrics.Precision('click', k=2).get_score(actual, predicted)) self.assertEqual( 1. / 3, RankingRecoMetrics.Precision('click', k=3).get_score(actual, predicted))
def test_reco_quick_start_example(self): # Data actual = pd.DataFrame({ "user_id": [1, 2, 3, 4], "item_id": [1, 2, 0, 3], "clicks": [0, 1, 0, 0] }) predicted = pd.DataFrame({ "user_id": [1, 2, 3, 4], "item_id": [1, 2, 2, 3], "clicks": [0.8, 0.7, 0.8, 0.7] }) # Metrics auc = BinaryRecoMetrics.AUC(click_column="clicks") ctr = BinaryRecoMetrics.CTR(click_column="clicks") ncdg_k = RankingRecoMetrics.NDCG(click_column="clicks", k=3) precision_k = RankingRecoMetrics.Precision(click_column="clicks", k=2) recall_k = RankingRecoMetrics.Recall(click_column="clicks", k=2) map_k = RankingRecoMetrics.MAP(click_column="clicks", k=2) # Scores self.assertEqual(auc.get_score(actual, predicted), 0.25) self.assertEqual(ctr.get_score(actual, predicted), 0.3333333333333333) self.assertEqual(ncdg_k.get_score(actual, predicted), 1) self.assertEqual(precision_k.get_score(actual, predicted), 1) self.assertEqual(recall_k.get_score(actual, predicted), 1) self.assertEqual(map_k.get_score(actual, predicted), 1)
def test_precision(self): # User 1 was recommended items 1, 3, 2 and has items 1, 4 relevant # User 2 and 3 check for no relevant items # User 4 checks for no recommendations actual = pd.DataFrame({ Constants.user_id: [1, 1, 1, 3, 4], Constants.item_id: [1, 2, 4, 1, 3], 'click': [1, 0, 1, 0, 1] }) predicted = pd.DataFrame({ Constants.user_id: [1, 1, 1, 2, 3], Constants.item_id: [1, 2, 3, 3, 1], 'click': [0.8, 0.7, 0.75, 0.7, 0.5] }) metric = RankingRecoMetrics.Precision(click_column='click', k=2) results = metric.get_score(actual, predicted, return_extended_results=True) self.assertEqual(0.5, results['precision']) self.assertEqual(1, results['support']) precision_3 = RankingRecoMetrics.Precision(click_column='click', k=3) result_3 = precision_3.get_score(actual, predicted) self.assertEqual(1. / 3, result_3)
def test_recall(self): # User 1 has items 1, 4, 2 relevant and was recommended items 1, 3, 2, so they should be included in the support # User 2 checks for no relevant items, so they shouldn't be included in the support # User 3 & 4 checks for no recommendations, but they should be included in the support actual = pd.DataFrame({ Constants.user_id: [1, 1, 1, 1, 3, 4], Constants.item_id: [1, 2, 3, 4, 3, 1], 'click': [1, 1, 0, 1, 1, 1] }) predicted = pd.DataFrame({ Constants.user_id: [1, 1, 1, 2], Constants.item_id: [1, 2, 3, 3], 'click': [0.8, 0.7, 0.75, 0.7] }) metric = RankingRecoMetrics.Recall(click_column='click', k=2) results = metric.get_score(actual, predicted, return_extended_results=True) self.assertEqual(1. / 9, results['recall']) self.assertEqual(3, results['support']) recall_3 = RankingRecoMetrics.Recall(click_column='click', k=3).get_score(actual, predicted) self.assertEqual(2. / 9, recall_3)
def test_map(self): # User 1 got items 1,3,2,4 as recommendations. Items 1 and 4 are relevant. # User 2 checks for no relevant items # user 3 checks for no recommendations actual = pd.DataFrame({ Constants.user_id: [1, 1, 1, 3], Constants.item_id: [1, 2, 4, 3], 'click': [1, 0, 1, 1] }) predicted = pd.DataFrame({ Constants.user_id: [1, 1, 1, 1, 2], Constants.item_id: [1, 2, 3, 4, 3], 'click': [0.8, 0.7, 0.75, 0.65, 0.7] }) metric = RankingRecoMetrics.MAP(click_column='click', k=2) results = metric.get_score(actual, predicted, return_extended_results=True) self.assertEqual(0.5, results['map']) self.assertEqual(1, results['support']) map_3 = RankingRecoMetrics.MAP(click_column='click', k=3).get_score(actual, predicted) self.assertEqual(0.5, map_3) map_4 = RankingRecoMetrics.MAP(click_column='click', k=4).get_score(actual, predicted) self.assertEqual(0.75, map_4)
def test_ndcg(self): # First, test the IDCG value idcg_val = idcg(3) self.assertEqual(1. / np.log2(2) + 1. / np.log2(3) + 1. / np.log2(4), idcg_val) # Then, test NDCG # User 1 has items 1, 4, 2 relevant and was recommended items 1, 3, 2 # User 2 checks for no relevant items, shouldn't contribute to the metric # User 3 checks for no recommendations, is 0 # User 4 has items 1, 2 relevant and was recommended items 3, 4; is 0 actual = pd.DataFrame({ Constants.user_id: [1, 1, 1, 1, 3, 4, 4], Constants.item_id: [1, 2, 3, 4, 3, 1, 2], 'click': [1, 1, 0, 1, 1, 1, 1] }) predicted = pd.DataFrame({ Constants.user_id: [1, 1, 1, 2, 4, 4], Constants.item_id: [1, 2, 3, 3, 3, 4], 'click': [0.8, 0.7, 0.75, 0.7, 0.6, 0.4] }) metric = RankingRecoMetrics.NDCG(click_column='click', k=3) results = metric.get_score(actual, predicted, return_extended_results=True) self.assertEqual(((1. / np.log2(2) + 1. / np.log2(4)) / idcg_val) / 3, results['ndcg']) self.assertEqual(3, results['support'])
def test_change_column_names(self): user_id_column = 'uid' item_id_column = 'iid' actual = pd.DataFrame({ user_id_column: [0, 0], item_id_column: [1, 2], 'click': [True, True] }) predicted = pd.DataFrame({ user_id_column: [0, 0, 0, 0], item_id_column: [0, 1, 2, 3], 'click': [0, -1, -2, -3] }) # Test that the output is the same recall_1 = RankingRecoMetrics.Recall('click', k=1, user_id_column=user_id_column, item_id_column=item_id_column) self.assertEqual(0., recall_1.get_score(actual, predicted)) recall_2 = RankingRecoMetrics.Recall('click', k=2, user_id_column=user_id_column, item_id_column=item_id_column) self.assertEqual(0.5, recall_2.get_score(actual, predicted)) # Test that none of the metrics crash metrics = CombinedMetrics( BinaryRecoMetrics.CTR('click', k=1, user_id_column=user_id_column, item_id_column=item_id_column), RankingRecoMetrics.MAP('click', k=1, user_id_column=user_id_column, item_id_column=item_id_column), RankingRecoMetrics.NDCG('click', k=1, user_id_column=user_id_column, item_id_column=item_id_column), RankingRecoMetrics.Precision('click', k=1, user_id_column=user_id_column, item_id_column=item_id_column), RankingRecoMetrics.Recall('click', k=1, user_id_column=user_id_column, item_id_column=item_id_column)) metrics.get_score(actual, predicted)
def test_accumulate_when_no_results_this_batch(self): metrics = CombinedMetrics( RankingRecoMetrics.Recall(click_column='click', k=1), BinaryRecoMetrics.CTR(click_column='click', k=1)) actual = pd.DataFrame({ Constants.user_id: [0], Constants.item_id: [0], 'click': [True] }) predicted = pd.DataFrame({ Constants.user_id: [0], Constants.item_id: [0], 'click': [0] }) batch_res, acc_res = metrics.get_score(actual, predicted, batch_accumulate=True, return_extended_results=True) self.assertEqual(2, len(batch_res)) self.assertEqual(2, len(acc_res)) actual = pd.DataFrame({ Constants.user_id: [1], Constants.item_id: [1], 'click': [True] }) predicted = pd.DataFrame({ Constants.user_id: [2], Constants.item_id: [2], 'click': [0] }) batch_res, acc_res = metrics.get_score(actual, predicted, batch_accumulate=True, return_extended_results=True) self.assertEqual(2, len(batch_res)) self.assertEqual(2, len(acc_res))
def test_inter_list_diversity_in_use(self): actual = pd.DataFrame({ Constants.user_id: [0, 0, 1, 2], Constants.item_id: [4, 5, 1, 4], 'score': [True, True, True, True] }) predicted = pd.DataFrame({ Constants.user_id: [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2], Constants.item_id: [0, 1, 2, 3, 0, 1, 2, 4, 0, 1, 2, 5], 'score': [0.9, 0.7, 0.6, 0.3, 0.9, 0.7, 0.4, 0.1, 0.9, 0.8, 0.6, 0.6] }) metrics = CombinedMetrics( RankingRecoMetrics.Recall(click_column='score', k=4), BinaryRecoMetrics.CTR(click_column='score', k=4), DiversityRecoMetrics.InterListDiversity(click_column='score', k=4, user_sample_size=2, num_runs=5)) acc_res = metrics.get_score(actual, predicted, batch_accumulate=False, return_extended_results=True) self.assertEqual(3, len(acc_res)) self.assertEqual( 0.25, acc_res['Inter-List Diversity@4']['inter-list diversity']) with self.assertRaises(ValueError): # This should fail when `batch_accumulate=True`, and `InterListDiversity` gets applied in combined metrics. batch_res, acc_res = metrics.get_score( actual, predicted, batch_accumulate=True, return_extended_results=True)