def test_perform_greater_k(self): metric = RecallAtK(k=999, relevant_threshold=3) result = metric.perform(split_w_new_items) expected = Recall(relevant_threshold=3).perform(split_w_new_items) result = np.sort(result, axis=0) expected = np.sort(expected, axis=0) # If k > than number of rows, then it's a regular Precision self.assertTrue(np.array_equal(expected, result))
def test_perform_i1_i4_missing(self): metric = RecallAtK(k=3, relevant_threshold=3) result_macro = metric.perform(split_missing) expected_u1 = 0 / 2 result_macro_u1 = float(result_macro.query('from_id == "u1"')[str(metric)]) self.assertAlmostEqual(expected_u1, result_macro_u1) expected_u2 = 2 / 3 result_macro_u2 = float(result_macro.query('from_id == "u2"')[str(metric)]) self.assertAlmostEqual(expected_u2, result_macro_u2) expected_macro_sys = (expected_u1 + expected_u2) / 2 result_macro_sys = float(result_macro.query('from_id == "sys"')[str(metric)]) self.assertAlmostEqual(expected_macro_sys, result_macro_sys)
def test_perform_mean(self): metric = RecallAtK(k=3) result = metric.perform(split_w_new_items) expected_u1 = 1/2 result_mean_u1 = float(result.query('from_id == "u1"')[str(metric)]) self.assertAlmostEqual(expected_u1, result_mean_u1) expected_u2 = 1/1 result_mean_u2 = float(result.query('from_id == "u2"')[str(metric)]) self.assertAlmostEqual(expected_u2, result_mean_u2) expected_mean_sys = (expected_u1 + expected_u2) / 2 result_mean_sys = float(result.query('from_id == "sys"')[str(metric)]) self.assertAlmostEqual(expected_mean_sys, result_mean_sys)
def test_all(self): ratings_filename = os.path.join(contents_path, '..', 'datasets', 'examples', 'new_ratings.csv') ratings_frame = RatingsImporter( CSVFile(ratings_filename)).import_ratings() rs = ContentBasedRS( LinearPredictor( {"Plot": ['tfidf', 'embedding']}, SkLinearRegression(), ), ratings_frame, items_dir) catalog = set([ os.path.splitext(f)[0] for f in os.listdir(items_dir) if os.path.isfile(os.path.join(items_dir, f)) and f.endswith('xz') ]) em = EvalModel(rs, KFoldPartitioning(), metric_list=[ Precision(sys_average='micro'), PrecisionAtK(1, sys_average='micro'), RPrecision(), Recall(), RecallAtK(3, ), FMeasure(1, sys_average='macro'), FMeasureAtK(2, beta=1, sys_average='micro'), NDCG(), NDCGAtK(3), MRR(), MRRAtK(5, ), Correlation('pearson', top_n=5), Correlation('kendall', top_n=3), Correlation('spearman', top_n=4), MAE(), MSE(), RMSE(), CatalogCoverage(catalog), CatalogCoverage(catalog, k=2), CatalogCoverage(catalog, top_n=3), GiniIndex(), GiniIndex(top_n=3), DeltaGap({ 'primo': 0.5, 'secondo': 0.5 }) ], methodology=TestItemsMethodology()) result = em.fit()
def test_eval_ranking_needed_metrics_implicit_split(self): # We set the split_list directly by the class attribute c = MetricCalculator() RankingNeededMetric.rank_truth_list = self.rank_split_list system_res, each_user_res = c.eval_metrics([ Precision(), PrecisionAtK(2), RPrecision(), Recall(), RecallAtK(2), FMeasure(), FMeasureAtK(2), NDCG(), NDCGAtK(2), MRR(), MRRAtK(2), Correlation('pearson'), Correlation('kendall'), Correlation('spearman'), PredictionCoverage(self.catalog), CatalogCoverage(self.catalog, top_n=2), GiniIndex(), DeltaGap(user_groups={ 'a': 0.5, 'b': 0.5 }), LongTailDistr(out_dir='test_plot'), PopProfileVsRecs(user_groups={ 'a': 0.5, 'b': 0.5 }, out_dir='test_plot'), PopRecsCorrelation(out_dir='test_plot') ]) self.assertIsInstance(system_res, pd.DataFrame) self.assertIsInstance(each_user_res, pd.DataFrame)