示例#1
0
 def __init__(self, *args, **kwargs):
     super(TestDatasetOperator, self).__init__(*args, **kwargs)
     self.dataset = MovielensDataset(
         ratings_file_path=
         r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small'
         r'\ratings.csv',
         movies_file_path=
         r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small'
         r'\movies.csv')
     self.movie_ratings = self.dataset.load_movie_ratings()
     self.movie_ratings_length = len(self.movie_ratings)
 def __init__(self, *args, **kwargs):
   super(TestPrediction, self).__init__(*args, **kwargs)
   self.dataset = MovielensDataset(
           ratings_file_path=r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small'
                             r'\ratings.csv',
           movies_file_path=r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small'
                            r'\movies.csv')
   self.optimized_dataset = DatasetOptimizer(self.dataset)
   self.pearson_similarity = OptimizedPearsonSimilarity(self.optimized_dataset, 3)
示例#3
0
  def test_dataset_loading(self):
    dataset = MovielensDataset(
            ratings_file_path=r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small'
                              r'\ratings.csv',
            movies_file_path=r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small'
                             r'\movies.csv')

    movies = dataset.load_movies()
    self.assertEqual(len(movies) > 0, True)

    ratings = dataset.load_ratings()
    self.assertEqual(len(ratings) > 0, True)

    movie_ratings = dataset.load_movie_ratings()
    self.assertEqual(len(movie_ratings) > 0, True)

    movie_ratings2 = Dataset.merge_ratings_and_movies_to_movie_ratings(ratings, movies)
    self.assertEqual(len(movie_ratings2) > 0, True)
 def test_get_movie_record(self):
   dataset = MovielensDataset(
           ratings_file_path=r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small'
                             r'\ratings.csv',
           movies_file_path=r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small'
                            r'\movies.csv')
   movie_operator = DatasetMovieOperator(DatasetOptimizer(dataset));
   self.assertTrue(movie_operator.get_movie_record(9999999999).empty)
   self.assertFalse(movie_operator.get_movie_record(3).empty)
   self.assertTrue(movie_operator.get_movie_record(-1).empty)
 def __init__(self, *args, **kwargs):
     super(TestMutualInformation, self).__init__(*args, **kwargs)
     self.dataset = MovielensDataset(
         ratings_file_path=
         r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small'
         r'\ratings.csv',
         movies_file_path=
         r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small'
         r'\movies.csv')
     self.optimized_dataset = DatasetOptimizer(self.dataset)
示例#6
0
 def __init__(self, *args, **kwargs):
     super(TestTimebinSimilarity, self).__init__(*args, **kwargs)
     self.dataset = MovielensDataset(
         ratings_file_path=
         r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small'
         r'\ratings.csv',
         movies_file_path=
         r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small'
         r'\movies.csv')
     self.optimized_dataset = DatasetOptimizer(self.dataset)
     self.dataset_user_operator = DatasetUserOperator(
         self.optimized_dataset.get_ratings())
示例#7
0
 def test_movie_based_neighbourhood(self):
     dataset = MovielensDataset(
         ratings_file_path=
         r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small'
         r'\ratings.csv',
         movies_file_path=
         r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small'
         r'\movies.csv')
     pearson_similarity = OptimizedPearsonSimilarity(
         DatasetOptimizer(dataset), 3)
     knn = KNearestNeighbours(pearson_similarity, 20)
     self.assertTrue(
         len(knn.get_common_movie_based_k_nearest_neighbours(448, 3)) > 0)
示例#8
0
class TestDatasetOperator(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super(TestDatasetOperator, self).__init__(*args, **kwargs)
        self.dataset = MovielensDataset(
            ratings_file_path=
            r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small'
            r'\ratings.csv',
            movies_file_path=
            r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small'
            r'\movies.csv')
        self.movie_ratings = self.dataset.load_movie_ratings()
        self.movie_ratings_length = len(self.movie_ratings)

    def assert_data_with_time_constraint_has_less_length_dataframe(
            self, interval: Interval, assertion):
        with__timebin_constraint = DatasetOperator.apply_time_constraint(
            self.movie_ratings, interval)
        self.assertEqual(
            len(with__timebin_constraint) < self.movie_ratings_length,
            assertion)

    def test_applying_timebin_time_constraint(self):
        movie_ratings = self.movie_ratings.copy(True)

        DatasetOperator.apply_time_constraint(movie_ratings, None)
        self.assertEqual(len(movie_ratings) > 0, True)

        interval = TimebinInterval(datetime(2000, 5, 5), datetime(2020, 5, 5))
        self.assert_data_with_time_constraint_has_less_length_dataframe(
            interval, True)

        interval = TimebinInterval(None, datetime(2020, 5, 5))
        self.assert_data_with_time_constraint_has_less_length_dataframe(
            interval, False)

        interval = TimebinInterval(None, None)
        self.assert_data_with_time_constraint_has_less_length_dataframe(
            interval, False)

        interval = TimebinInterval(datetime(2020, 5, 5), None)
        self.assert_data_with_time_constraint_has_less_length_dataframe(
            interval, False)

    def test_applying_max_limit_time_constraint(self):
        movie_ratings = self.movie_ratings.copy(True)

        DatasetOperator.apply_time_constraint(movie_ratings, None)
        self.assertEqual(len(movie_ratings) > 0, True)

        interval = MaxLimitInterval(None, datetime(2010, 5, 5))
        self.assert_data_with_time_constraint_has_less_length_dataframe(
            interval, True)

        interval = MaxLimitInterval(None, None)
        self.assert_data_with_time_constraint_has_less_length_dataframe(
            interval, False)

        interval = MaxLimitInterval(datetime(2010, 5, 5), None)
        self.assert_data_with_time_constraint_has_less_length_dataframe(
            interval, False)

        interval = MaxLimitInterval(datetime(2000, 1, 1), datetime(2010, 5, 5))
        self.assert_data_with_time_constraint_has_less_length_dataframe(
            interval, False)
示例#9
0
class TestDatasetUserOperator(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super(TestDatasetUserOperator, self).__init__(*args, **kwargs)
        self.dataset = MovielensDataset(
            ratings_file_path=
            r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small'
            r'\ratings.csv',
            movies_file_path=
            r'C:\Users\Yukawa\PycharmProjects\ProjectAlpha\data\movie_datasets\ml-latest-small'
            r'\movies.csv')
        self.ratings = self.dataset.load_ratings()
        self.movies = self.dataset.load_movies()
        self.movie_ratings = self.dataset.merge_ratings_and_movies_to_movie_ratings(
            self.ratings, self.movies)

        self.user_operator = DatasetUserOperator(self.ratings)

    def test_get_all_users(self):
        users = self.user_operator.get_all_users()
        self.assertEqual(len(users) > 0, True)

    def test_top_raters(self):
        raters = self.user_operator.get_top_n_raters(3)
        self.assertEqual(len(raters), 3)
        user_ratings_length = len(
            self.__get_target_user_ratings(
                TestDatasetUserOperator.__get_first_rater_user_id(raters)))
        self.assertEqual(
            user_ratings_length ==
            TestDatasetUserOperator.__get_first_rater_rating_count(raters),
            True)

        raters = self.user_operator.get_top_n_raters(-1)
        self.assertEqual(len(raters), 0)

    def test_random_users(self):
        self.assertEqual(len(self.user_operator.get_random_user_list(2)), 2)
        self.assertLessEqual(
            len(self.user_operator.get_random_user_list(50000)), 50000)
        self.assertEqual(len(self.user_operator.get_random_user_list(0)), 0)
        self.assertEqual(len(self.user_operator.get_random_user_list(-5)), 0)

    def test_user_history(self):
        self.assertEqual(
            len(self.user_operator.get_user_rating_history(-1)) >= 0, True)
        self.assertEqual(
            len(self.user_operator.get_user_rating_history(0)) >= 0, True)
        self.assertEqual(
            len(self.user_operator.get_user_rating_history(414)) >= 1, True)
        self.assertEqual(
            len(self.user_operator.get_user_rating_history(99999999)) == 0,
            True)

    # Use this as get_movie_rating
    def test_user_rating(self):
        self.assertEqual(
            self.user_operator.get_user_rating_record(414, 839)['rating'], 4.0)
        self.assertEqual(
            self.user_operator.get_user_rating_record(414, -1).empty, True)
        self.assertEqual(
            self.user_operator.get_user_rating_record(-5, -1).empty, True)
        self.assertEqual(
            self.user_operator.get_user_rating_record(9999999, 5).empty, True)
        self.assertEqual(
            self.user_operator.get_user_rating_record(414, 999999).empty, True)
        self.assertEqual(
            self.user_operator.get_user_rating_record(3, 39).empty, True)

    def test_rating_timestamp(self):
        self.assertTrue(
            self.user_operator.get_rating_timestamp(414, 839) is not None)
        self.assertTrue(self.user_operator.get_rating_timestamp(3, 39) is None)
        self.assertTrue(
            self.user_operator.get_rating_timestamp(99999999, 39) is None)
        self.assertTrue(
            self.user_operator.get_rating_timestamp(-1, 39) is None)
        self.assertTrue(
            self.user_operator.get_rating_timestamp(414, -5) is None)

    def test_user_avg_timestamp(self):
        self.assertEqual(
            self.user_operator.get_user_avg_rating_timestamp(448) is not None,
            True)
        self.assertEqual(
            self.user_operator.get_user_avg_rating_timestamp(-1) is None, True)
        self.assertEqual(
            self.user_operator.get_user_avg_rating_timestamp(0) is None, True)
        self.assertEqual(
            self.user_operator.get_user_avg_rating_timestamp(9999999999) is
            None, True)

    def test_get_user_ratings_at_interval(self):
        self.assert_user_ratings_with_max_limit_within_interval(
            448, datetime(2010, 5, 6))
        self.assert_user_ratings_with_max_limit_within_interval(
            448, datetime(2025, 5, 6))
        self.assert_user_ratings_with_max_limit_within_interval(
            448, datetime(1800, 5, 6))

    def test_get_user_avg_at_interval(self):
        print(
            self.user_operator.get_user_avg_at_interval(
                448,
                MaxLimitInterval(interval_end_datetime=datetime(2015, 5, 5))))

    def assert_user_ratings_with_max_limit_within_interval(self, user_id, dt):
        last_timestamp = self.__get_last_timestamp_of_user_ratings_with_max_limit_interval(
            dt, user_id)
        if last_timestamp is not None:
            self.assertTrue(
                self.
                __get_last_timestamp_of_user_ratings_with_max_limit_interval(
                    dt, user_id) <= dt)

    def __get_last_timestamp_of_user_ratings_with_max_limit_interval(
            self, dt, user_id):
        user_ratings = self.user_operator.get_user_ratings_at_interval(
            user_id, MaxLimitInterval(interval_end_datetime=dt))
        try:
            return TestDatasetUserOperator.__get_last_rating_timestamp_from_ratings(
                user_ratings)
        except IndexError:
            return None

    @staticmethod
    def __get_last_rating_timestamp_from_ratings(user_ratings):
        return user_ratings.iloc[-1]['timestamp']

    @staticmethod
    def __get_first_rating_timestamp_from_ratings(user_ratings):
        return user_ratings.iloc[0]['timestamp']

    def __get_target_user_ratings(self, user_id):
        return self.ratings.loc[self.ratings['user_id'] == user_id]

    @staticmethod
    def __get_first_rater_rating_count(raters):
        return raters.iloc[0][1]

    @staticmethod
    def __get_first_rater_user_id(raters):
        return raters.index[0]