def test_predict(self): alg = NXPageRank() ratings = pd.DataFrame.from_records( [("A000", "tt0114576", 0.5, "54654675"), ("A000", "tt0112453", -0.5, "54654675"), ("A001", "tt0114576", 0.8, "54654675"), ("A001", "tt0112896", -0.4, "54654675"), ("A000", "tt0113041", 0.6, "54654675"), ("A002", "tt0112453", -0.2, "54654675"), ("A002", "tt0113497", 0.5, "54654675"), ("A003", "tt0112453", -0.8, "54654675")], columns=["from_id", "to_id", "score", "timestamp"]) try: path = "../../../contents/movielens_test1591885241.5520566" file = os.path.join(path, "tt0114576.xz") with lzma.open(file, "r") as content_file: pass except FileNotFoundError: path = "contents/movielens_test1591885241.5520566" rank = alg.predict('A000', ratings, 1, path, ['tt0114576']) logger.info('pg_rk results') for r in rank.keys(): logger.info('%s %s', str(r), str(rank[r]))
def test_clean_rank(self): rank = { "A000": 0.5, "tt0114576": 0.5, "A001": 0.5, "tt0113497": 0.5, "tt0112453": 0.5 } alg = NXPageRank(graph=graph) # remove from rank all from nodes result = alg.clean_rank(rank, user_id="A000", remove_profile=False, remove_from_nodes=True) expected = {"tt0114576": 0.5, "tt0113497": 0.5, "tt0112453": 0.5} self.assertEqual(expected, result) # remove from rank all from nodes and all data from user A000 result = alg.clean_rank(rank, user_id="A000", remove_profile=True, remove_from_nodes=True) expected = {"tt0113497": 0.5} self.assertEqual(expected, result)
def test_extract_profile(self): alg = NXPageRank(graph=graph) result = alg.extract_profile("A000") expected = {'tt0114576': 0.75, 'tt0112453': 0.25, 'tt0113041': 0.8} self.assertEqual(expected, result)
def test_extract_profile(self): alg = NXPageRank(graph=graph) result = alg.extract_profile("3", graph) expected = {"tt0112453": 0.55, "55117": 0.5, "M": 0.5} self.assertEqual(expected, result)
def test_predict(self): user_ratings = ratings[ratings['from_id'] == 'A001'] alg = NXPageRank() rank = alg.predict(user_ratings, 2) self.assertEqual(rank, {}) alg = NXPageRank(graph=graph) rank = alg.predict(user_ratings, 1) logger.info('pg_rk results') for r in rank.keys(): print(str(r) + " " + str(rank[r])) self.assertIn('tt0112453', rank.keys()) # alg = NXPageRank(graph=graph) # rank_fs = alg.predict('A001', ratings, 1, feature_selection_algorithm=NXFSPageRank()) # logger.info('pg_rk results') # for r in rank_fs.keys(): # print(str(r) + " " + str(rank_fs[r])) alg = NXPageRank(graph=graph, personalized=True) rank_personalized = alg.predict(user_ratings, 1) logger.info('pg_rk results') for r in rank_personalized.keys(): print(str(r) + " " + str(rank_personalized[r])) self.assertIn('tt0113041', rank_personalized)
def test_clean_rank(self): rank = { "1": 0.5, "tt0112281": 0.5, "2": 0.5, "tt0113497": 0.5, "tt0112302": 0.5 } alg = NXPageRank(graph=graph) # doesn't remove any node alg.remove_items_in_profile = False alg.remove_properties = False alg.remove_user_nodes = False result = alg.clean_rank(rank, graph, "1") self.assertGreaterEqual(len(result.keys()), 0) # removes user and property nodes and item nodes already in the user profile alg.remove_items_in_profile = True alg.remove_properties = True alg.remove_user_nodes = True result = alg.clean_rank(rank, graph, "1") expected = {"tt0113497": 0.5} self.assertEqual(expected, result)
processor=NumberNormalizer(min_=1, max_=5)) ], from_field_name='user_id', to_field_name='item_id', timestamp_field_name='timestamp').import_ratings() full_graph = NXFullGraph( source_frame=ratings_import, user_contents_dir=None, item_contents_dir=item_contents_dir, user_exogenous_properties=None, item_exogenous_properties=['film_director', 'starring', 'producer']) rank = NXPageRank(graph=full_graph).predict( user_id='1', ratings=ratings_import, recs_number=10, ) rank_pd = pd.DataFrame({ 'from_id': ['1' for x in rank.keys()], 'to_id': [x for x in rank.keys()], 'rating': [x for x in rank.values()] }) truth_rank = ratings_import[ratings_import['from_id'] == '1'] truth_rank = truth_rank.rename(columns={'score': 'rating'}, inplace=False) print(truth_rank) print(rank_pd)
def test_predict(self): alg = NXPageRank(graph) user_ratings = ratings[ratings['from_id'] == '2'] # test for number of recommendations <= 0 rank = alg.predict(user_ratings, -10) self.assertEqual(len(rank), 0) # test for standard prediction considering ratings from a user (PageRank with priors) rank = alg.predict(user_ratings, 3) self.assertEqual(len(rank), 3) # test for prediction with empty dataframe (standard PageRank) empty_ratings = pd.DataFrame() rank = alg.predict(empty_ratings, 3) self.assertEqual(len(rank), 3) # test for prediction with a candidate_item_id_list rank = alg.predict(user_ratings, 3, candidate_item_id_list=['tt0113277', 'tt0114709']) self.assertEqual(len(rank), 2) # test for prediction with feature selection algorithms both for items and users and empty ratings alg = NXPageRank(graph=graph, item_feature_selection_algorithm=FSPageRank(1), user_feature_selection_algorithm=FSPageRank(2)) rank = alg.predict(empty_ratings, 3) self.assertEqual(len(rank), 3) # test for prediction with feature selection algorithms both for items and users and user ratings alg = NXPageRank(graph=graph, item_feature_selection_algorithm=FSPageRank(1), user_feature_selection_algorithm=FSPageRank(2)) rank = alg.predict(user_ratings, 3) self.assertEqual(len(rank), 3) # test for prediction with feature selection algorithm for items only alg = NXPageRank(graph, item_feature_selection_algorithm=FSPageRank(1)) rank = alg.predict(user_ratings, 3) self.assertEqual(len(rank), 3) print(rank) # test for prediction with feature selection algorithm for users only alg = NXPageRank(graph, user_feature_selection_algorithm=FSPageRank(1)) rank = alg.predict(user_ratings, 3) self.assertEqual(len(rank), 3) print(rank) # test for prediction with feature selection algorithm for items and users with k set to 0 alg = NXPageRank(graph, item_feature_selection_algorithm=FSPageRank(0), user_feature_selection_algorithm=FSPageRank(0)) rank = alg.predict(user_ratings, 3) self.assertEqual(len(rank), 3) print(rank) # test for prediction while considering only a subset of ratings from the user small_ratings = pd.DataFrame.from_records( [("2", "tt0112453", 0.4)], columns=["from_id", "to_id", "score"]) alg = NXPageRank(graph, item_feature_selection_algorithm=FSPageRank(1), user_feature_selection_algorithm=FSPageRank(1)) rank = alg.predict(small_ratings, 3) self.assertEqual(len(rank), 3) print(rank) # test for prediction while considering only a subset of ratings from the user with an item not present in the # original rating frame wrong_small_ratings = pd.DataFrame.from_records( [("2", "tt0112453", 0.4), ("2", "test", 0.3)], columns=["from_id", "to_id", "score"]) alg = NXPageRank(graph, item_feature_selection_algorithm=FSPageRank(1), user_feature_selection_algorithm=FSPageRank(1)) rank = alg.predict(wrong_small_ratings, 3) self.assertEqual(len(rank), 0) user_ratings = ratings[ratings['from_id'] == '8'] # test for prediction with user ratings containing only the most negative vote as possible alg = NXPageRank(graph, item_feature_selection_algorithm=FSPageRank(1), user_feature_selection_algorithm=FSPageRank(0)) rank = alg.predict(user_ratings, 3) self.assertEqual(len(rank), 3) print(rank)