Пример #1
0
    def test_als_convergence(self):
        n_user = 100
        n_item = 200
        n_feature = self.n_feature
        ratings = make_ratings(n_user,
                               n_item,
                               20,
                               30,
                               self.rating_choices,
                               seed=self.seed)

        als1 = ALS(n_user,
                   n_item,
                   n_feature,
                   reg=1e-2,
                   seed=0,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat,
                   converge=1e-2)

        als1.fit(ratings, n_iters=10)
        rmse_1 = RMSE(als1.predict(ratings[:, :2]), ratings[:, 2])

        als2 = ALS(n_user,
                   n_item,
                   n_feature,
                   reg=1e-2,
                   seed=0,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat,
                   converge=1e-1)

        als2.fit(ratings, n_iters=10)
        rmse_2 = RMSE(als2.predict(ratings[:, :2]), ratings[:, 2])
        self.assertTrue(rmse_1 < rmse_2)
Пример #2
0
    def test_als_convergence(self):
        n_user = 100
        n_item = 200
        n_feature = self.n_feature
        ratings = make_ratings(
            n_user, n_item, 20, 30, self.rating_choices, seed=self.seed)

        als1 = ALS(n_user, n_item, n_feature,
                   reg=1e-2,
                   seed=0,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat,
                   converge=1e-2)

        als1.fit(ratings, n_iters=10)
        rmse_1 = RMSE(als1.predict(ratings[:, :2]), ratings[:, 2])

        als2 = ALS(n_user, n_item, n_feature,
                   reg=1e-2,
                   seed=0,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat,
                   converge=1e-1)

        als2.fit(ratings, n_iters=10)
        rmse_2 = RMSE(als2.predict(ratings[:, :2]), ratings[:, 2])
        self.assertTrue(rmse_1 < rmse_2)
Пример #3
0
    def test_als_with_random_data(self):
        n_user = 100
        n_item = 200
        n_feature = self.n_feature
        ratings = make_ratings(n_user,
                               n_item,
                               20,
                               30,
                               self.rating_choices,
                               seed=self.seed)

        als1 = ALS(n_user,
                   n_item,
                   n_feature,
                   reg=1e-2,
                   seed=0,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat)

        als1.fit(ratings, n_iters=1)
        rmse_1 = RMSE(als1.predict(ratings[:, :2]), ratings[:, 2])

        als2 = ALS(n_user,
                   n_item,
                   n_feature,
                   reg=1e-2,
                   seed=0,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat)

        als2.fit(ratings, n_iters=3)
        rmse_2 = RMSE(als2.predict(ratings[:, :2]), ratings[:, 2])
        self.assertTrue(rmse_1 > rmse_2)
Пример #4
0
    def test_als_with_missing_data(self):
        n_user = 10
        n_item = 20
        n_feature = self.n_feature
        ratings = make_ratings(n_user - 1,
                               n_item - 1,
                               5,
                               10,
                               self.rating_choices,
                               seed=self.seed)
        als1 = ALS(n_user,
                   n_item,
                   n_feature,
                   reg=1e-2,
                   seed=0,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat)

        unuse_user_f_before = als1.user_features_[n_user - 1, :]
        unuse_item_f_before = als1.item_features_[n_item - 1, :]
        als1.fit(ratings, n_iters=1)
        unuse_user_f_after = als1.user_features_[n_user - 1, :]
        unuse_item_f_after = als1.item_features_[n_item - 1, :]
        # last user/item feature should be
        #  unchanged since no rating data on them
        assert_array_equal(unuse_user_f_before, unuse_user_f_after)
        assert_array_equal(unuse_item_f_before, unuse_item_f_after)
Пример #5
0
    def test_als_with_random_data(self):
        n_user = 100
        n_item = 200
        n_feature = self.n_feature
        ratings = make_ratings(
            n_user, n_item, 20, 30, self.rating_choices, seed=self.seed)

        als1 = ALS(n_user, n_item, n_feature,
                   reg=1e-2,
                   seed=0,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat)

        als1.fit(ratings, n_iters=1)
        rmse_1 = RMSE(als1.predict(ratings[:, :2]), ratings[:, 2])

        als2 = ALS(n_user, n_item, n_feature,
                   reg=1e-2,
                   seed=0,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat)

        als2.fit(ratings, n_iters=3)
        rmse_2 = RMSE(als2.predict(ratings[:, :2]), ratings[:, 2])
        self.assertTrue(rmse_1 > rmse_2)
Пример #6
0
    def test_als_with_ml_100k_rating(self):
        n_user = 943
        n_item = 1682
        n_feature = 10
        ratings = self.ratings

        als = ALS(n_user, n_item, n_feature,
                  reg=1e-2,
                  max_rating=5.,
                  min_rating=1.,
                  seed=self.seed)

        als.fit(ratings, n_iters=5)
        rmse = RMSE(als.predict(ratings[:, :2]), ratings[:, 2])
        self.assertTrue(rmse < 0.8)
Пример #7
0
    def test_als_with_ml_100k_rating(self):
        n_user = 943
        n_item = 1682
        n_feature = 10
        ratings = self.ratings

        als = ALS(n_user,
                  n_item,
                  n_feature,
                  reg=1e-2,
                  max_rating=5.,
                  min_rating=1.,
                  seed=self.seed)

        als.fit(ratings, n_iters=5)
        rmse = RMSE(als.predict(ratings[:, :2]), ratings[:, 2])
        self.assertTrue(rmse < 0.8)
Пример #8
0
    def test_als_with_missing_data(self):
        n_user = 10
        n_item = 20
        n_feature = self.n_feature
        ratings = make_ratings(
            n_user - 1, n_item - 1, 5, 10, self.rating_choices, seed=self.seed)
        als1 = ALS(n_user, n_item, n_feature,
                   reg=1e-2,
                   seed=0,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat)

        unuse_user_f_before = als1.user_features_[n_user - 1, :]
        unuse_item_f_before = als1.item_features_[n_item - 1, :]
        als1.fit(ratings, n_iters=1)
        unuse_user_f_after = als1.user_features_[n_user - 1, :]
        unuse_item_f_after = als1.item_features_[n_item - 1, :]
        # last user/item feature should be
        #  unchanged since no rating data on them
        assert_array_equal(unuse_user_f_before, unuse_user_f_after)
        assert_array_equal(unuse_item_f_before, unuse_item_f_after)
Пример #9
0
    def test_als_seed(self):
        n_user = 100
        n_item = 200
        n_feature = self.n_feature
        ratings = make_ratings(
            n_user, n_item, 20, 30, self.rating_choices, seed=self.seed)

        # seed 0
        als1 = ALS(n_user, n_item, n_feature,
                   reg=1e-2,
                   seed=0,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat)
        als1.fit(ratings, n_iters=3)

        als2 = ALS(n_user, n_item, n_feature,
                   reg=1e-2,
                   seed=0,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat)
        als2.fit(ratings, n_iters=3)
        assert_array_equal(als1.user_features_, als2.user_features_)
        assert_array_equal(als1.item_features_, als2.item_features_)

        # seed 1
        als3 = ALS(n_user, n_item, n_feature,
                   reg=1e-2,
                   seed=1,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat)
        als3.fit(ratings, n_iters=3)
        assert_raises(AssertionError, assert_array_equal,
                      als1.user_features_, als3.user_features_)
        assert_raises(AssertionError, assert_array_equal,
                      als1.item_features_, als3.item_features_)
Пример #10
0
    def test_als_seed(self):
        n_user = 100
        n_item = 200
        n_feature = self.n_feature
        ratings = make_ratings(n_user,
                               n_item,
                               20,
                               30,
                               self.rating_choices,
                               seed=self.seed)

        # seed 0
        als1 = ALS(n_user,
                   n_item,
                   n_feature,
                   reg=1e-2,
                   seed=0,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat)
        als1.fit(ratings, n_iters=3)

        als2 = ALS(n_user,
                   n_item,
                   n_feature,
                   reg=1e-2,
                   seed=0,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat)
        als2.fit(ratings, n_iters=3)
        assert_array_equal(als1.user_features_, als2.user_features_)
        assert_array_equal(als1.item_features_, als2.item_features_)

        # seed 1
        als3 = ALS(n_user,
                   n_item,
                   n_feature,
                   reg=1e-2,
                   seed=1,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat)
        als3.fit(ratings, n_iters=3)
        assert_raises(AssertionError, assert_array_equal, als1.user_features_,
                      als3.user_features_)
        assert_raises(AssertionError, assert_array_equal, als1.item_features_,
                      als3.item_features_)
Пример #11
0
validation = ratings[train_size:]

print(train.shape, validation.shape)
print(train[0, :], train[1, :])
# plt.imshow(train, cmap='jet', interpolation='nearest')
# plt.show()
# plt.imshow(validation, cmap='jet', interpolation='nearest')
# plt.show()

# models settings
n_feature = 10
eval_iters = 10
print(
    "n_user: %d, n_item: %d, n_feature: %d, training size: %d, validation size: %d"
    % (n_user, n_item, n_feature, train.shape[0], validation.shape[0]))
als = ALS(n_user=n_user,
          n_item=n_item,
          n_feature=n_feature,
          reg=5e-2,
          max_rating=5.,
          min_rating=1.,
          seed=0)

als.fit(train, n_iters=eval_iters)
train_preds = als.predict(train[:, :2])
train_rmse = RMSE(train_preds, train[:, 2])
val_preds = als.predict(validation[:, :2])
val_rmse = RMSE(val_preds, validation[:, 2])
print("after %d iterations, train RMSE: %.6f, validation RMSE: %.6f" % \
      (eval_iters, train_rmse, val_rmse))
Пример #12
0
rating_file = ml_1m_download(ML_1M_FOLDER, file_size=ML_1M_ZIP_SIZE)
ratings = load_movielens_1m_ratings(rating_file)
n_user = max(ratings[:, 0])
n_item = max(ratings[:, 1])

# shift user_id & movie_id by 1. let user_id & movie_id start from 0
ratings[:, (0, 1)] -= 1

# split data to training & testing
train_pct = 0.9
rand_state.shuffle(ratings)
train_size = int(train_pct * ratings.shape[0])
train = ratings[:train_size]
validation = ratings[train_size:]

# models settings
n_feature = 10
eval_iters = 10
print("n_user: %d, n_item: %d, n_feature: %d, training size: %d, validation size: %d" % (
    n_user, n_item, n_feature, train.shape[0], validation.shape[0]))
als = ALS(n_user=n_user, n_item=n_item, n_feature=n_feature,
          reg=5e-2, max_rating=5., min_rating=1., seed=0)

als.fit(train, n_iters=eval_iters)
train_preds = als.predict(train[:, :2])
train_rmse = RMSE(train_preds, train[:, 2])
val_preds = als.predict(validation[:, :2])
val_rmse = RMSE(val_preds, validation[:, 2])
print("after %d iterations, train RMSE: %.6f, validation RMSE: %.6f" % \
      (eval_iters, train_rmse, val_rmse))