示例#1
0
    def test_predict(self):
        alg = NXPageRank()
        ratings = pd.DataFrame.from_records(
            [("A000", "tt0114576", 0.5, "54654675"),
             ("A000", "tt0112453", -0.5, "54654675"),
             ("A001", "tt0114576", 0.8, "54654675"),
             ("A001", "tt0112896", -0.4, "54654675"),
             ("A000", "tt0113041", 0.6, "54654675"),
             ("A002", "tt0112453", -0.2, "54654675"),
             ("A002", "tt0113497", 0.5, "54654675"),
             ("A003", "tt0112453", -0.8, "54654675")],
            columns=["from_id", "to_id", "score", "timestamp"])

        try:
            path = "../../../contents/movielens_test1591885241.5520566"
            file = os.path.join(path, "tt0114576.xz")
            with lzma.open(file, "r") as content_file:
                pass
        except FileNotFoundError:
            path = "contents/movielens_test1591885241.5520566"

        rank = alg.predict('A000', ratings, 1, path, ['tt0114576'])
        logger.info('pg_rk results')
        for r in rank.keys():
            logger.info('%s %s', str(r), str(rank[r]))
    def test_clean_rank(self):
        rank = {
            "A000": 0.5,
            "tt0114576": 0.5,
            "A001": 0.5,
            "tt0113497": 0.5,
            "tt0112453": 0.5
        }
        alg = NXPageRank(graph=graph)

        # remove from rank all from nodes
        result = alg.clean_rank(rank,
                                user_id="A000",
                                remove_profile=False,
                                remove_from_nodes=True)
        expected = {"tt0114576": 0.5, "tt0113497": 0.5, "tt0112453": 0.5}
        self.assertEqual(expected, result)

        # remove from rank all from nodes and all data from user A000
        result = alg.clean_rank(rank,
                                user_id="A000",
                                remove_profile=True,
                                remove_from_nodes=True)
        expected = {"tt0113497": 0.5}
        self.assertEqual(expected, result)
    def test_extract_profile(self):
        alg = NXPageRank(graph=graph)
        result = alg.extract_profile("A000")

        expected = {'tt0114576': 0.75, 'tt0112453': 0.25, 'tt0113041': 0.8}

        self.assertEqual(expected, result)
示例#4
0
    def test_extract_profile(self):
        alg = NXPageRank(graph=graph)
        result = alg.extract_profile("3", graph)

        expected = {"tt0112453": 0.55, "55117": 0.5, "M": 0.5}

        self.assertEqual(expected, result)
    def test_predict(self):
        user_ratings = ratings[ratings['from_id'] == 'A001']
        alg = NXPageRank()
        rank = alg.predict(user_ratings, 2)
        self.assertEqual(rank, {})

        alg = NXPageRank(graph=graph)
        rank = alg.predict(user_ratings, 1)
        logger.info('pg_rk results')
        for r in rank.keys():
            print(str(r) + " " + str(rank[r]))

        self.assertIn('tt0112453', rank.keys())

        # alg = NXPageRank(graph=graph)
        # rank_fs = alg.predict('A001', ratings, 1, feature_selection_algorithm=NXFSPageRank())
        # logger.info('pg_rk results')
        # for r in rank_fs.keys():
        #     print(str(r) + " " + str(rank_fs[r]))

        alg = NXPageRank(graph=graph, personalized=True)
        rank_personalized = alg.predict(user_ratings, 1)
        logger.info('pg_rk results')
        for r in rank_personalized.keys():
            print(str(r) + " " + str(rank_personalized[r]))

        self.assertIn('tt0113041', rank_personalized)
示例#6
0
    def test_clean_rank(self):
        rank = {
            "1": 0.5,
            "tt0112281": 0.5,
            "2": 0.5,
            "tt0113497": 0.5,
            "tt0112302": 0.5
        }
        alg = NXPageRank(graph=graph)

        # doesn't remove any node
        alg.remove_items_in_profile = False
        alg.remove_properties = False
        alg.remove_user_nodes = False
        result = alg.clean_rank(rank, graph, "1")
        self.assertGreaterEqual(len(result.keys()), 0)

        # removes user and property nodes and item nodes already in the user profile
        alg.remove_items_in_profile = True
        alg.remove_properties = True
        alg.remove_user_nodes = True
        result = alg.clean_rank(rank, graph, "1")
        expected = {"tt0113497": 0.5}
        self.assertEqual(expected, result)
示例#7
0
                           processor=NumberNormalizer(min_=1, max_=5))
    ],
    from_field_name='user_id',
    to_field_name='item_id',
    timestamp_field_name='timestamp').import_ratings()

full_graph = NXFullGraph(
    source_frame=ratings_import,
    user_contents_dir=None,
    item_contents_dir=item_contents_dir,
    user_exogenous_properties=None,
    item_exogenous_properties=['film_director', 'starring', 'producer'])

rank = NXPageRank(graph=full_graph).predict(
    user_id='1',
    ratings=ratings_import,
    recs_number=10,
)

rank_pd = pd.DataFrame({
    'from_id': ['1' for x in rank.keys()],
    'to_id': [x for x in rank.keys()],
    'rating': [x for x in rank.values()]
})

truth_rank = ratings_import[ratings_import['from_id'] == '1']
truth_rank = truth_rank.rename(columns={'score': 'rating'}, inplace=False)
print(truth_rank)

print(rank_pd)
示例#8
0
    def test_predict(self):
        alg = NXPageRank(graph)

        user_ratings = ratings[ratings['from_id'] == '2']

        # test for number of recommendations <= 0
        rank = alg.predict(user_ratings, -10)
        self.assertEqual(len(rank), 0)

        # test for standard prediction considering ratings from a user (PageRank with priors)
        rank = alg.predict(user_ratings, 3)
        self.assertEqual(len(rank), 3)

        # test for prediction with empty dataframe (standard PageRank)
        empty_ratings = pd.DataFrame()
        rank = alg.predict(empty_ratings, 3)
        self.assertEqual(len(rank), 3)

        # test for prediction with a candidate_item_id_list
        rank = alg.predict(user_ratings,
                           3,
                           candidate_item_id_list=['tt0113277', 'tt0114709'])
        self.assertEqual(len(rank), 2)

        # test for prediction with feature selection algorithms both for items and users and empty ratings
        alg = NXPageRank(graph=graph,
                         item_feature_selection_algorithm=FSPageRank(1),
                         user_feature_selection_algorithm=FSPageRank(2))
        rank = alg.predict(empty_ratings, 3)
        self.assertEqual(len(rank), 3)

        # test for prediction with feature selection algorithms both for items and users and user ratings
        alg = NXPageRank(graph=graph,
                         item_feature_selection_algorithm=FSPageRank(1),
                         user_feature_selection_algorithm=FSPageRank(2))
        rank = alg.predict(user_ratings, 3)
        self.assertEqual(len(rank), 3)

        # test for prediction with feature selection algorithm for items only
        alg = NXPageRank(graph, item_feature_selection_algorithm=FSPageRank(1))
        rank = alg.predict(user_ratings, 3)
        self.assertEqual(len(rank), 3)
        print(rank)

        # test for prediction with feature selection algorithm for users only
        alg = NXPageRank(graph, user_feature_selection_algorithm=FSPageRank(1))
        rank = alg.predict(user_ratings, 3)
        self.assertEqual(len(rank), 3)
        print(rank)

        # test for prediction with feature selection algorithm for items and users with k set to 0
        alg = NXPageRank(graph,
                         item_feature_selection_algorithm=FSPageRank(0),
                         user_feature_selection_algorithm=FSPageRank(0))
        rank = alg.predict(user_ratings, 3)
        self.assertEqual(len(rank), 3)
        print(rank)

        # test for prediction while considering only a subset of ratings from the user
        small_ratings = pd.DataFrame.from_records(
            [("2", "tt0112453", 0.4)], columns=["from_id", "to_id", "score"])
        alg = NXPageRank(graph,
                         item_feature_selection_algorithm=FSPageRank(1),
                         user_feature_selection_algorithm=FSPageRank(1))
        rank = alg.predict(small_ratings, 3)
        self.assertEqual(len(rank), 3)
        print(rank)

        # test for prediction while considering only a subset of ratings from the user with an item not present in the
        # original rating frame
        wrong_small_ratings = pd.DataFrame.from_records(
            [("2", "tt0112453", 0.4), ("2", "test", 0.3)],
            columns=["from_id", "to_id", "score"])
        alg = NXPageRank(graph,
                         item_feature_selection_algorithm=FSPageRank(1),
                         user_feature_selection_algorithm=FSPageRank(1))
        rank = alg.predict(wrong_small_ratings, 3)
        self.assertEqual(len(rank), 0)

        user_ratings = ratings[ratings['from_id'] == '8']
        # test for prediction with user ratings containing only the most negative vote as possible
        alg = NXPageRank(graph,
                         item_feature_selection_algorithm=FSPageRank(1),
                         user_feature_selection_algorithm=FSPageRank(0))
        rank = alg.predict(user_ratings, 3)
        self.assertEqual(len(rank), 3)
        print(rank)