def test_precision_at_max_recs(self): """Tests Precision@k for the case when all users have exactly k recommendations. When all users have exactly k recommendations, there isn't an extra ``user_id`` index generated when sorting for the largest ``k`` scores. """ actual = pd.DataFrame({ Constants.user_id: [0], Constants.item_id: [0], 'click': [True] }) predicted = pd.DataFrame({ Constants.user_id: [0, 0, 0], Constants.item_id: [0, 1, 2], 'click': [0, -1, -2] }) self.assertEqual( 1., RankingRecoMetrics.Precision('click', k=1).get_score(actual, predicted)) self.assertEqual( 0.5, RankingRecoMetrics.Precision('click', k=2).get_score(actual, predicted)) self.assertEqual( 1. / 3, RankingRecoMetrics.Precision('click', k=3).get_score(actual, predicted))
def test_precision(self): # User 1 was recommended items 1, 3, 2 and has items 1, 4 relevant # User 2 and 3 check for no relevant items # User 4 checks for no recommendations actual = pd.DataFrame({ Constants.user_id: [1, 1, 1, 3, 4], Constants.item_id: [1, 2, 4, 1, 3], 'click': [1, 0, 1, 0, 1] }) predicted = pd.DataFrame({ Constants.user_id: [1, 1, 1, 2, 3], Constants.item_id: [1, 2, 3, 3, 1], 'click': [0.8, 0.7, 0.75, 0.7, 0.5] }) metric = RankingRecoMetrics.Precision(click_column='click', k=2) results = metric.get_score(actual, predicted, return_extended_results=True) self.assertEqual(0.5, results['precision']) self.assertEqual(1, results['support']) precision_3 = RankingRecoMetrics.Precision(click_column='click', k=3) result_3 = precision_3.get_score(actual, predicted) self.assertEqual(1. / 3, result_3)
def test_reco_quick_start_example(self): # Data actual = pd.DataFrame({ "user_id": [1, 2, 3, 4], "item_id": [1, 2, 0, 3], "clicks": [0, 1, 0, 0] }) predicted = pd.DataFrame({ "user_id": [1, 2, 3, 4], "item_id": [1, 2, 2, 3], "clicks": [0.8, 0.7, 0.8, 0.7] }) # Metrics auc = BinaryRecoMetrics.AUC(click_column="clicks") ctr = BinaryRecoMetrics.CTR(click_column="clicks") ncdg_k = RankingRecoMetrics.NDCG(click_column="clicks", k=3) precision_k = RankingRecoMetrics.Precision(click_column="clicks", k=2) recall_k = RankingRecoMetrics.Recall(click_column="clicks", k=2) map_k = RankingRecoMetrics.MAP(click_column="clicks", k=2) # Scores self.assertEqual(auc.get_score(actual, predicted), 0.25) self.assertEqual(ctr.get_score(actual, predicted), 0.3333333333333333) self.assertEqual(ncdg_k.get_score(actual, predicted), 1) self.assertEqual(precision_k.get_score(actual, predicted), 1) self.assertEqual(recall_k.get_score(actual, predicted), 1) self.assertEqual(map_k.get_score(actual, predicted), 1)
def test_change_column_names(self): user_id_column = 'uid' item_id_column = 'iid' actual = pd.DataFrame({ user_id_column: [0, 0], item_id_column: [1, 2], 'click': [True, True] }) predicted = pd.DataFrame({ user_id_column: [0, 0, 0, 0], item_id_column: [0, 1, 2, 3], 'click': [0, -1, -2, -3] }) # Test that the output is the same recall_1 = RankingRecoMetrics.Recall('click', k=1, user_id_column=user_id_column, item_id_column=item_id_column) self.assertEqual(0., recall_1.get_score(actual, predicted)) recall_2 = RankingRecoMetrics.Recall('click', k=2, user_id_column=user_id_column, item_id_column=item_id_column) self.assertEqual(0.5, recall_2.get_score(actual, predicted)) # Test that none of the metrics crash metrics = CombinedMetrics( BinaryRecoMetrics.CTR('click', k=1, user_id_column=user_id_column, item_id_column=item_id_column), RankingRecoMetrics.MAP('click', k=1, user_id_column=user_id_column, item_id_column=item_id_column), RankingRecoMetrics.NDCG('click', k=1, user_id_column=user_id_column, item_id_column=item_id_column), RankingRecoMetrics.Precision('click', k=1, user_id_column=user_id_column, item_id_column=item_id_column), RankingRecoMetrics.Recall('click', k=1, user_id_column=user_id_column, item_id_column=item_id_column)) metrics.get_score(actual, predicted)