def test_fit_graph_w_testrating_methodology(self): graph = NXFullGraph(ratings) rs = GraphBasedRS(NXPageRank(), graph) em = EvalModel(rs, KFoldPartitioning(), metric_list=[Precision()]) sys_result, users_result = em.fit() self.assertIsInstance(sys_result, pd.DataFrame) self.assertIsInstance(users_result, pd.DataFrame)
def test_perform_greater_k(self): metric = PrecisionAtK(k=999, relevant_threshold=3) result = metric.perform(split_w_new_items) expected = Precision(relevant_threshold=3).perform(split_w_new_items) result = np.sort(result, axis=0) expected = np.sort(expected, axis=0) # If k > than number of rows, then it's a regular Precision self.assertTrue(np.array_equal(expected, result))
def test_fit_cb_w_testrating_methodology(self): rs = ContentBasedRS( CentroidVector( {"Plot": "tfidf"}, CosineSimilarity(), ), ratings, items_dir) em = EvalModel(rs, KFoldPartitioning(), metric_list=[Precision()]) sys_result, users_result = em.fit() self.assertIsInstance(sys_result, pd.DataFrame) self.assertIsInstance(users_result, pd.DataFrame)
def test_all(self): ratings_filename = os.path.join(contents_path, '..', 'datasets', 'examples', 'new_ratings.csv') ratings_frame = RatingsImporter( CSVFile(ratings_filename)).import_ratings() rs = ContentBasedRS( LinearPredictor( {"Plot": ['tfidf', 'embedding']}, SkLinearRegression(), ), ratings_frame, items_dir) catalog = set([ os.path.splitext(f)[0] for f in os.listdir(items_dir) if os.path.isfile(os.path.join(items_dir, f)) and f.endswith('xz') ]) em = EvalModel(rs, KFoldPartitioning(), metric_list=[ Precision(sys_average='micro'), PrecisionAtK(1, sys_average='micro'), RPrecision(), Recall(), RecallAtK(3, ), FMeasure(1, sys_average='macro'), FMeasureAtK(2, beta=1, sys_average='micro'), NDCG(), NDCGAtK(3), MRR(), MRRAtK(5, ), Correlation('pearson', top_n=5), Correlation('kendall', top_n=3), Correlation('spearman', top_n=4), MAE(), MSE(), RMSE(), CatalogCoverage(catalog), CatalogCoverage(catalog, k=2), CatalogCoverage(catalog, top_n=3), GiniIndex(), GiniIndex(top_n=3), DeltaGap({ 'primo': 0.5, 'secondo': 0.5 }) ], methodology=TestItemsMethodology()) result = em.fit()
def test_graph(self): catalog = set(ratings.to_id) users_dir = os.path.join(dir_test_files, 'complex_contents', 'users_codified/') graph = NXFullGraph( ratings, user_contents_dir=users_dir, item_contents_dir=items_dir, item_exo_representation="dbpedia", user_exo_representation='local', item_exo_properties=['starring'], user_exo_properties=['1' ] # It's the column in the users .DAT which # identifies the gender ) graph_rs = GraphBasedRS(NXPageRank(), graph) em = EvalModel(graph_rs, KFoldPartitioning(), metric_list=[ Precision(relevant_threshold=3), Recall(), FMeasure(beta=1), FMeasure(beta=2, sys_average='micro'), MRR(), Correlation('pearson'), GiniIndex(), DeltaGap({ 'popular': 0.5, 'niche': 0.5 }), PredictionCoverage(catalog), PopProfileVsRecs(user_groups={ 'popular': 0.5, 'niche': 0.5 }, out_dir='plots/'), LongTailDistr('plots/', format='svg'), PopRecsCorrelation('plots/') ], verbose_predictions=True, methodology=TestItemsMethodology()) em.fit()
def test_fit_graph_w_allitems_methodology(self): graph = NXFullGraph(ratings) rs = GraphBasedRS(NXPageRank(), graph) items = set([ os.path.splitext(f)[0] for f in os.listdir(items_dir) if os.path.isfile(os.path.join(items_dir, f)) and f.endswith('xz') ]) em = EvalModel(rs, KFoldPartitioning(), metric_list=[Precision()], methodology=AllItemsMethodology(items)) sys_result, users_result = em.fit() self.assertIsInstance(sys_result, pd.DataFrame) self.assertIsInstance(users_result, pd.DataFrame)
def test_fit_cb_w_allitems_methodology(self): rs = ContentBasedRS( CentroidVector( {"Plot": "tfidf"}, CosineSimilarity(), ), ratings, items_dir) items = set([ os.path.splitext(f)[0] for f in os.listdir(items_dir) if os.path.isfile(os.path.join(items_dir, f)) and f.endswith('xz') ]) em = EvalModel(rs, KFoldPartitioning(), metric_list=[Precision()], methodology=AllItemsMethodology(items)) sys_result, users_result = em.fit() self.assertIsInstance(sys_result, pd.DataFrame) self.assertIsInstance(users_result, pd.DataFrame)
def test_eval_ranking_needed_metrics_implicit_split(self): # We set the split_list directly by the class attribute c = MetricCalculator() RankingNeededMetric.rank_truth_list = self.rank_split_list system_res, each_user_res = c.eval_metrics([ Precision(), PrecisionAtK(2), RPrecision(), Recall(), RecallAtK(2), FMeasure(), FMeasureAtK(2), NDCG(), NDCGAtK(2), MRR(), MRRAtK(2), Correlation('pearson'), Correlation('kendall'), Correlation('spearman'), PredictionCoverage(self.catalog), CatalogCoverage(self.catalog, top_n=2), GiniIndex(), DeltaGap(user_groups={ 'a': 0.5, 'b': 0.5 }), LongTailDistr(out_dir='test_plot'), PopProfileVsRecs(user_groups={ 'a': 0.5, 'b': 0.5 }, out_dir='test_plot'), PopRecsCorrelation(out_dir='test_plot') ]) self.assertIsInstance(system_res, pd.DataFrame) self.assertIsInstance(each_user_res, pd.DataFrame)
def setUpClass(cls) -> None: cls.metric_macro = Precision(relevant_threshold=3, sys_average='macro') cls.metric_micro = Precision(relevant_threshold=3, sys_average='micro') cls.metric_mean = Precision(sys_average='macro')