Exemplo n.º 1
0
    def test_rank_single_representation(self, model: Regressor):
        lm = model

        # Single representation
        alg = LinearPredictor({'Plot': ['tfidf']}, lm)

        user_ratings = self.ratings.query('from_id == "A000"')

        alg.process_rated(user_ratings, self.movies_dir)
        alg.fit()

        # rank with filter_list
        res_filtered = alg.rank(user_ratings, self.movies_dir, filter_list=self.filter_list)
        item_ranked_set = set(res_filtered['to_id'])
        self.assertEqual(len(item_ranked_set), len(self.filter_list))
        self.assertCountEqual(item_ranked_set, self.filter_list)

        # rank without filter_list
        res_all_unrated = alg.rank(user_ratings, self.movies_dir)
        item_rated_set = set(user_ratings['to_id'])
        item_ranked_set = set(res_all_unrated['to_id'])
        # We expect this to be empty, since the alg should rank only unrated items (unless in filter list)
        rated_in_ranked = item_ranked_set.intersection(item_rated_set)
        self.assertEqual(len(rated_in_ranked), 0)

        # rank with n_recs specified
        n_recs = 5
        res_n_recs = alg.rank(user_ratings, self.movies_dir, n_recs)
        self.assertEqual(len(res_n_recs), n_recs)
        item_rated_set = set(user_ratings['to_id'])
        item_ranked_set = set(res_n_recs['to_id'])
        # We expect this to be empty, since the alg should rank only unrated items (unless in filter list)
        rated_in_ranked = item_ranked_set.intersection(item_rated_set)
        self.assertEqual(len(rated_in_ranked), 0)
Exemplo n.º 2
0
    def test_predict_multiple_representations(self, model: Regressor):
        lm = model

        # Multiple representations filtered only items with score >= 2
        alg = LinearPredictor({'Plot': ['tfidf', 'embedding'],
                               'Genre': ['tfidf', 'embedding'],
                               'imdbRating': [0]}, lm, only_greater_eq=2)

        user_ratings = self.ratings.query('from_id == "A000"')

        alg.process_rated(user_ratings, self.movies_dir)
        alg.fit()

        # predict with filter_list
        res_filtered = alg.predict(user_ratings, self.movies_dir, filter_list=self.filter_list)
        item_scored_set = set(res_filtered['to_id'])
        self.assertEqual(len(item_scored_set), len(self.filter_list))
        self.assertCountEqual(item_scored_set, self.filter_list)

        # predict without filter_list
        res_all_unrated = alg.predict(user_ratings, self.movies_dir)
        item_rated_set = set(user_ratings['to_id'])
        item_scored_set = set(res_all_unrated['to_id'])
        # We expect this to be empty, since the alg should rank only unrated items (unless in filter list)
        rated_in_scored = item_scored_set.intersection(item_rated_set)
        self.assertEqual(len(rated_in_scored), 0)
Exemplo n.º 3
0
    def test_all(self):
        ratings_filename = os.path.join(contents_path, '..', 'datasets',
                                        'examples', 'new_ratings.csv')

        ratings_frame = RatingsImporter(
            CSVFile(ratings_filename)).import_ratings()

        rs = ContentBasedRS(
            LinearPredictor(
                {"Plot": ['tfidf', 'embedding']},
                SkLinearRegression(),
            ), ratings_frame, items_dir)

        catalog = set([
            os.path.splitext(f)[0] for f in os.listdir(items_dir)
            if os.path.isfile(os.path.join(items_dir, f)) and f.endswith('xz')
        ])

        em = EvalModel(rs,
                       KFoldPartitioning(),
                       metric_list=[
                           Precision(sys_average='micro'),
                           PrecisionAtK(1, sys_average='micro'),
                           RPrecision(),
                           Recall(),
                           RecallAtK(3, ),
                           FMeasure(1, sys_average='macro'),
                           FMeasureAtK(2, beta=1, sys_average='micro'),
                           NDCG(),
                           NDCGAtK(3),
                           MRR(),
                           MRRAtK(5, ),
                           Correlation('pearson', top_n=5),
                           Correlation('kendall', top_n=3),
                           Correlation('spearman', top_n=4),
                           MAE(),
                           MSE(),
                           RMSE(),
                           CatalogCoverage(catalog),
                           CatalogCoverage(catalog, k=2),
                           CatalogCoverage(catalog, top_n=3),
                           GiniIndex(),
                           GiniIndex(top_n=3),
                           DeltaGap({
                               'primo': 0.5,
                               'secondo': 0.5
                           })
                       ],
                       methodology=TestItemsMethodology())

        result = em.fit()
    def test_calc_scores_content_based(self):
        recsys = ContentBasedRS(
            LinearPredictor({'Plot': 'tfidf'}, SkLinearRegression()),
            self.ratings_original, movies_dir)

        # We just need a Metric of the ScoresNeededMetric class to test
        metric_list = [MAE()]

        valid_metric = PredictionCalculator(self.split_list,
                                            recsys).calc_predictions(
                                                self.test_items_list,
                                                metric_list)
        score_truth = ScoresNeededMetric.score_truth_list

        # We expect this to be empty, since there are no RankingNeededMetric in the metric list
        rank_truth = RankingNeededMetric.rank_truth_list

        self.assertEqual(valid_metric, metric_list)
        self.assertGreater(len(score_truth), 0)
        self.assertEqual(len(rank_truth), 0)