Exemplo n.º 1
0
    def test_perform(self):
        sim = CosineSimilarity()

        a = np.array([5, 9, 7, 8, 3, 5, 4, 2, 6, 4])
        b = np.array([8, 1, 3, 10, 8, 4, 9, 2, 1, 6])
        self.assertAlmostEqual(sim.perform(a, b), 0.7552110293516224)

        a = np.array([0, 0, 0])
        b = np.array([1, 1, 1])
        self.assertEqual(sim.perform(a, b), 0)

        a = np.array([1, 1, 1])
        b = np.array([0, 0, 0])
        self.assertEqual(sim.perform(a, b), 0)

        a = np.array([0, 0, 0])
        b = np.array([0, 0, 0])
        self.assertEqual(sim.perform(a, b), 0)

        a = np.array([1, 1, 1])
        b = np.array([1, 1, 1])
        self.assertEqual(sim.perform(a, b), 1)

        a = np.array([1, 1, 1])
        b = np.array([-1, -1, -1])
        self.assertEqual(sim.perform(a, b), -1)
Exemplo n.º 2
0
    def test_empty_frame(self):
        ratings_only_positive = pd.DataFrame.from_records(
            [("A000", "tt0114576", 5, "54654675")],
            columns=["from_id", "to_id", "score", "timestamp"])

        ratings_only_negative = pd.DataFrame.from_records(
            [("A000", "tt0114576", 1, "54654675")],
            columns=["from_id", "to_id", "score", "timestamp"])

        ratings_item_inexistent = pd.DataFrame.from_records(
            [("A000", "not exists", 1, "54654675")],
            columns=["from_id", "to_id", "score", "timestamp"])

        # ClassifierRecommender returns an empty frame
        alg = ClassifierRecommender({'Plot': ['tfidf', 'embedding']},
                                    SkSVC(),
                                    threshold=3)
        rs = ContentBasedRS(alg, ratings_only_positive, self.movies_multiple)
        result = rs.fit_rank('A000')
        self.assertTrue(result.empty)

        alg = ClassifierRecommender({'Plot': ['tfidf', 'embedding']},
                                    SkSVC(),
                                    threshold=3)
        rs = ContentBasedRS(alg, ratings_only_negative, self.movies_multiple)
        result = rs.fit_rank('A000')
        self.assertTrue(result.empty)

        alg = ClassifierRecommender({'Plot': ['tfidf', 'embedding']},
                                    SkSVC(),
                                    threshold=3)
        rs = ContentBasedRS(alg, ratings_item_inexistent, self.movies_multiple)
        result = rs.fit_rank('A000')
        self.assertTrue(result.empty)

        # CentroidVector returns an empty frame
        alg = CentroidVector({'Plot': ['tfidf', 'embedding']},
                             CosineSimilarity(),
                             threshold=3)
        rs = ContentBasedRS(alg, ratings_only_negative, self.movies_multiple)
        result = rs.fit_rank('A000')
        self.assertTrue(result.empty)

        alg = CentroidVector({'Plot': ['tfidf', 'embedding']},
                             CosineSimilarity(),
                             threshold=3)
        rs = ContentBasedRS(alg, ratings_item_inexistent, self.movies_multiple)
        result = rs.fit_rank('A000')
        self.assertTrue(result.empty)
Exemplo n.º 3
0
    def test_rank_multiple_representations(self):
        # Multiple representations with auto threshold based on the mean ratings of the user
        alg = CentroidVector({'Plot': ['tfidf', 'embedding'],
                              "Genre": ['tfidf', 'embedding'],
                              'imdbRating': [0]}, CosineSimilarity())

        user_ratings = self.ratings.query('from_id == "A000"')

        alg.process_rated(user_ratings, self.movies_dir)
        alg.fit()

        # rank with filter_list
        res_filtered = alg.rank(user_ratings, self.movies_dir, filter_list=self.filter_list)
        item_ranked_set = set(res_filtered['to_id'])
        self.assertEqual(len(item_ranked_set), len(self.filter_list))
        self.assertCountEqual(item_ranked_set, self.filter_list)

        # rank without filter_list
        res_all_unrated = alg.rank(user_ratings, self.movies_dir)
        item_rated_set = set(user_ratings['to_id'])
        item_ranked_set = set(res_all_unrated['to_id'])
        # We expect this to be empty, since the alg should rank only unrated items (unless in filter list)
        rated_in_ranked = item_ranked_set.intersection(item_rated_set)
        self.assertEqual(len(rated_in_ranked), 0)

        # rank with n_recs specified
        n_recs = 5
        res_n_recs = alg.rank(user_ratings, self.movies_dir, n_recs)
        self.assertEqual(len(res_n_recs), n_recs)
        item_rated_set = set(user_ratings['to_id'])
        item_ranked_set = set(res_n_recs['to_id'])
        # We expect this to be empty, since the alg should rank only unrated items (unless in filter list)
        rated_in_ranked = item_ranked_set.intersection(item_rated_set)
        self.assertEqual(len(rated_in_ranked), 0)
Exemplo n.º 4
0
    def test_rank_single_representation(self):
        # Single representation
        alg = CentroidVector({'Genre': ['embedding']}, CosineSimilarity(), threshold=0)

        user_ratings = self.ratings.query('from_id == "A000"')

        alg.process_rated(user_ratings, self.movies_dir)
        alg.fit()

        # rank with filter_list
        res_filtered = alg.rank(user_ratings, self.movies_dir, filter_list=self.filter_list)
        item_ranked_set = set(res_filtered['to_id'])
        self.assertEqual(len(item_ranked_set), len(self.filter_list))
        self.assertCountEqual(item_ranked_set, self.filter_list)

        # rank without filter_list
        res_all_unrated = alg.rank(user_ratings, self.movies_dir)
        item_rated_set = set(user_ratings['to_id'])
        item_ranked_set = set(res_all_unrated['to_id'])
        # We expect this to be empty, since the alg should rank only unrated items (unless in filter list)
        rated_in_ranked = item_ranked_set.intersection(item_rated_set)
        self.assertEqual(len(rated_in_ranked), 0)

        # rank with n_recs specified
        n_recs = 5
        res_n_recs = alg.rank(user_ratings, self.movies_dir, n_recs)
        self.assertEqual(len(res_n_recs), n_recs)
        item_rated_set = set(user_ratings['to_id'])
        item_ranked_set = set(res_n_recs['to_id'])
        # We expect this to be empty, since the alg should rank only unrated items (unless in filter list)
        rated_in_ranked = item_ranked_set.intersection(item_rated_set)
        self.assertEqual(len(rated_in_ranked), 0)
Exemplo n.º 5
0
    def test_predict(self):
        alg = CentroidVector({'Genre': ['embedding']}, CosineSimilarity(), threshold=0)
        user_ratings = self.ratings.query('from_id == "A000"')
        alg.process_rated(user_ratings, self.movies_dir)
        alg.fit()

        # Will raise Exception since it's not a Score Prediction Algorithm
        with self.assertRaises(NotPredictionAlg):
            alg.predict(user_ratings, self.movies_dir)
Exemplo n.º 6
0
    def test_fit_cb_w_testrating_methodology(self):
        rs = ContentBasedRS(
            CentroidVector(
                {"Plot": "tfidf"},
                CosineSimilarity(),
            ), ratings, items_dir)

        em = EvalModel(rs, KFoldPartitioning(), metric_list=[Precision()])

        sys_result, users_result = em.fit()

        self.assertIsInstance(sys_result, pd.DataFrame)
        self.assertIsInstance(users_result, pd.DataFrame)
Exemplo n.º 7
0
    def test_raise_errors(self):
        # Only negative available
        self.ratings = pd.DataFrame.from_records([
            ("A000", "tt0112281", -1, "54654675")],
            columns=["from_id", "to_id", "score", "timestamp"])

        alg = CentroidVector({'Plot': 'embedding'}, CosineSimilarity(), 0)
        user_ratings = self.ratings.query('from_id == "A000"')

        with self.assertRaises(OnlyNegativeItems):
            alg.process_rated(user_ratings, self.movies_dir)

        # No Item avilable locally
        self.ratings = pd.DataFrame.from_records([
            ("A000", "non existent", 1, "54654675")],
            columns=["from_id", "to_id", "score", "timestamp"])

        alg = CentroidVector({'Plot': 'embedding'}, CosineSimilarity(), 0)
        user_ratings = self.ratings.query('from_id == "A000"')

        with self.assertRaises(NoRatedItems):
            alg.process_rated(user_ratings, self.movies_dir)
Exemplo n.º 8
0
    def test_centroid_vector(self):
        recs_number = 3

        # Test prediction and ranking with the Centroid Vector algorithm
        alg = CentroidVector({'Plot': ['tfidf', 'embedding']},
                             CosineSimilarity())
        rs = ContentBasedRS(alg, ratings, self.movies_multiple)

        # Prediction should raise error since it's not a ScorePredictionAlg
        with self.assertRaises(NotPredictionAlg):
            rs.fit_predict('A000')

        # Test ranking with the Centroid Vector algorithm on specified items
        result_rank_filtered = rs.fit_rank('A000',
                                           filter_list=self.filter_list)
        self.assertEqual(len(result_rank_filtered), len(self.filter_list))

        # Test top-n ranking with the Centroid Vector algorithm
        result_rank_numbered = rs.fit_rank('A000', recs_number=recs_number)
        self.assertEqual(len(result_rank_numbered), recs_number)
Exemplo n.º 9
0
    def test_fit_cb_w_allitems_methodology(self):
        rs = ContentBasedRS(
            CentroidVector(
                {"Plot": "tfidf"},
                CosineSimilarity(),
            ), ratings, items_dir)

        items = set([
            os.path.splitext(f)[0] for f in os.listdir(items_dir)
            if os.path.isfile(os.path.join(items_dir, f)) and f.endswith('xz')
        ])

        em = EvalModel(rs,
                       KFoldPartitioning(),
                       metric_list=[Precision()],
                       methodology=AllItemsMethodology(items))

        sys_result, users_result = em.fit()

        self.assertIsInstance(sys_result, pd.DataFrame)
        self.assertIsInstance(users_result, pd.DataFrame)
    def setUp(self) -> None:

        # ContentBasedAlgorithm is an abstract class, so we need to instantiate
        # a subclass to test its methods. No initialization since we are not testing
        # methods that need it
        self.alg = CentroidVector({'Plot': 'tfidf'}, CosineSimilarity(), 0)