def __init__(self, *args, **kwargs): super(TestDatasetOperator, self).__init__(*args, **kwargs) self.dataset = MovielensDataset( ratings_file_path= r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small' r'\ratings.csv', movies_file_path= r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small' r'\movies.csv') self.movie_ratings = self.dataset.load_movie_ratings() self.movie_ratings_length = len(self.movie_ratings)
def __init__(self, *args, **kwargs): super(TestPrediction, self).__init__(*args, **kwargs) self.dataset = MovielensDataset( ratings_file_path=r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small' r'\ratings.csv', movies_file_path=r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small' r'\movies.csv') self.optimized_dataset = DatasetOptimizer(self.dataset) self.pearson_similarity = OptimizedPearsonSimilarity(self.optimized_dataset, 3)
def test_dataset_loading(self): dataset = MovielensDataset( ratings_file_path=r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small' r'\ratings.csv', movies_file_path=r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small' r'\movies.csv') movies = dataset.load_movies() self.assertEqual(len(movies) > 0, True) ratings = dataset.load_ratings() self.assertEqual(len(ratings) > 0, True) movie_ratings = dataset.load_movie_ratings() self.assertEqual(len(movie_ratings) > 0, True) movie_ratings2 = Dataset.merge_ratings_and_movies_to_movie_ratings(ratings, movies) self.assertEqual(len(movie_ratings2) > 0, True)
def test_get_movie_record(self): dataset = MovielensDataset( ratings_file_path=r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small' r'\ratings.csv', movies_file_path=r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small' r'\movies.csv') movie_operator = DatasetMovieOperator(DatasetOptimizer(dataset)); self.assertTrue(movie_operator.get_movie_record(9999999999).empty) self.assertFalse(movie_operator.get_movie_record(3).empty) self.assertTrue(movie_operator.get_movie_record(-1).empty)
def __init__(self, *args, **kwargs): super(TestMutualInformation, self).__init__(*args, **kwargs) self.dataset = MovielensDataset( ratings_file_path= r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small' r'\ratings.csv', movies_file_path= r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small' r'\movies.csv') self.optimized_dataset = DatasetOptimizer(self.dataset)
def __init__(self, *args, **kwargs): super(TestTimebinSimilarity, self).__init__(*args, **kwargs) self.dataset = MovielensDataset( ratings_file_path= r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small' r'\ratings.csv', movies_file_path= r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small' r'\movies.csv') self.optimized_dataset = DatasetOptimizer(self.dataset) self.dataset_user_operator = DatasetUserOperator( self.optimized_dataset.get_ratings())
def test_movie_based_neighbourhood(self): dataset = MovielensDataset( ratings_file_path= r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small' r'\ratings.csv', movies_file_path= r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small' r'\movies.csv') pearson_similarity = OptimizedPearsonSimilarity( DatasetOptimizer(dataset), 3) knn = KNearestNeighbours(pearson_similarity, 20) self.assertTrue( len(knn.get_common_movie_based_k_nearest_neighbours(448, 3)) > 0)
class TestDatasetOperator(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestDatasetOperator, self).__init__(*args, **kwargs) self.dataset = MovielensDataset( ratings_file_path= r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small' r'\ratings.csv', movies_file_path= r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small' r'\movies.csv') self.movie_ratings = self.dataset.load_movie_ratings() self.movie_ratings_length = len(self.movie_ratings) def assert_data_with_time_constraint_has_less_length_dataframe( self, interval: Interval, assertion): with__timebin_constraint = DatasetOperator.apply_time_constraint( self.movie_ratings, interval) self.assertEqual( len(with__timebin_constraint) < self.movie_ratings_length, assertion) def test_applying_timebin_time_constraint(self): movie_ratings = self.movie_ratings.copy(True) DatasetOperator.apply_time_constraint(movie_ratings, None) self.assertEqual(len(movie_ratings) > 0, True) interval = TimebinInterval(datetime(2000, 5, 5), datetime(2020, 5, 5)) self.assert_data_with_time_constraint_has_less_length_dataframe( interval, True) interval = TimebinInterval(None, datetime(2020, 5, 5)) self.assert_data_with_time_constraint_has_less_length_dataframe( interval, False) interval = TimebinInterval(None, None) self.assert_data_with_time_constraint_has_less_length_dataframe( interval, False) interval = TimebinInterval(datetime(2020, 5, 5), None) self.assert_data_with_time_constraint_has_less_length_dataframe( interval, False) def test_applying_max_limit_time_constraint(self): movie_ratings = self.movie_ratings.copy(True) DatasetOperator.apply_time_constraint(movie_ratings, None) self.assertEqual(len(movie_ratings) > 0, True) interval = MaxLimitInterval(None, datetime(2010, 5, 5)) self.assert_data_with_time_constraint_has_less_length_dataframe( interval, True) interval = MaxLimitInterval(None, None) self.assert_data_with_time_constraint_has_less_length_dataframe( interval, False) interval = MaxLimitInterval(datetime(2010, 5, 5), None) self.assert_data_with_time_constraint_has_less_length_dataframe( interval, False) interval = MaxLimitInterval(datetime(2000, 1, 1), datetime(2010, 5, 5)) self.assert_data_with_time_constraint_has_less_length_dataframe( interval, False)
class TestDatasetUserOperator(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestDatasetUserOperator, self).__init__(*args, **kwargs) self.dataset = MovielensDataset( ratings_file_path= r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small' r'\ratings.csv', movies_file_path= r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small' r'\movies.csv') self.ratings = self.dataset.load_ratings() self.movies = self.dataset.load_movies() self.movie_ratings = self.dataset.merge_ratings_and_movies_to_movie_ratings( self.ratings, self.movies) self.user_operator = DatasetUserOperator(self.ratings) def test_get_all_users(self): users = self.user_operator.get_all_users() self.assertEqual(len(users) > 0, True) def test_top_raters(self): raters = self.user_operator.get_top_n_raters(3) self.assertEqual(len(raters), 3) user_ratings_length = len( self.__get_target_user_ratings( TestDatasetUserOperator.__get_first_rater_user_id(raters))) self.assertEqual( user_ratings_length == TestDatasetUserOperator.__get_first_rater_rating_count(raters), True) raters = self.user_operator.get_top_n_raters(-1) self.assertEqual(len(raters), 0) def test_random_users(self): self.assertEqual(len(self.user_operator.get_random_user_list(2)), 2) self.assertLessEqual( len(self.user_operator.get_random_user_list(50000)), 50000) self.assertEqual(len(self.user_operator.get_random_user_list(0)), 0) self.assertEqual(len(self.user_operator.get_random_user_list(-5)), 0) def test_user_history(self): self.assertEqual( len(self.user_operator.get_user_rating_history(-1)) >= 0, True) self.assertEqual( len(self.user_operator.get_user_rating_history(0)) >= 0, True) self.assertEqual( len(self.user_operator.get_user_rating_history(414)) >= 1, True) self.assertEqual( len(self.user_operator.get_user_rating_history(99999999)) == 0, True) # Use this as get_movie_rating def test_user_rating(self): self.assertEqual( self.user_operator.get_user_rating_record(414, 839)['rating'], 4.0) self.assertEqual( self.user_operator.get_user_rating_record(414, -1).empty, True) self.assertEqual( self.user_operator.get_user_rating_record(-5, -1).empty, True) self.assertEqual( self.user_operator.get_user_rating_record(9999999, 5).empty, True) self.assertEqual( self.user_operator.get_user_rating_record(414, 999999).empty, True) self.assertEqual( self.user_operator.get_user_rating_record(3, 39).empty, True) def test_rating_timestamp(self): self.assertTrue( self.user_operator.get_rating_timestamp(414, 839) is not None) self.assertTrue(self.user_operator.get_rating_timestamp(3, 39) is None) self.assertTrue( self.user_operator.get_rating_timestamp(99999999, 39) is None) self.assertTrue( self.user_operator.get_rating_timestamp(-1, 39) is None) self.assertTrue( self.user_operator.get_rating_timestamp(414, -5) is None) def test_user_avg_timestamp(self): self.assertEqual( self.user_operator.get_user_avg_rating_timestamp(448) is not None, True) self.assertEqual( self.user_operator.get_user_avg_rating_timestamp(-1) is None, True) self.assertEqual( self.user_operator.get_user_avg_rating_timestamp(0) is None, True) self.assertEqual( self.user_operator.get_user_avg_rating_timestamp(9999999999) is None, True) def test_get_user_ratings_at_interval(self): self.assert_user_ratings_with_max_limit_within_interval( 448, datetime(2010, 5, 6)) self.assert_user_ratings_with_max_limit_within_interval( 448, datetime(2025, 5, 6)) self.assert_user_ratings_with_max_limit_within_interval( 448, datetime(1800, 5, 6)) def test_get_user_avg_at_interval(self): print( self.user_operator.get_user_avg_at_interval( 448, MaxLimitInterval(interval_end_datetime=datetime(2015, 5, 5)))) def assert_user_ratings_with_max_limit_within_interval(self, user_id, dt): last_timestamp = self.__get_last_timestamp_of_user_ratings_with_max_limit_interval( dt, user_id) if last_timestamp is not None: self.assertTrue( self. __get_last_timestamp_of_user_ratings_with_max_limit_interval( dt, user_id) <= dt) def __get_last_timestamp_of_user_ratings_with_max_limit_interval( self, dt, user_id): user_ratings = self.user_operator.get_user_ratings_at_interval( user_id, MaxLimitInterval(interval_end_datetime=dt)) try: return TestDatasetUserOperator.__get_last_rating_timestamp_from_ratings( user_ratings) except IndexError: return None @staticmethod def __get_last_rating_timestamp_from_ratings(user_ratings): return user_ratings.iloc[-1]['timestamp'] @staticmethod def __get_first_rating_timestamp_from_ratings(user_ratings): return user_ratings.iloc[0]['timestamp'] def __get_target_user_ratings(self, user_id): return self.ratings.loc[self.ratings['user_id'] == user_id] @staticmethod def __get_first_rater_rating_count(raters): return raters.iloc[0][1] @staticmethod def __get_first_rater_user_id(raters): return raters.index[0]