示例#1
0
    def test_pmf_convergence(self):
        n_user = 100
        n_item = 200
        n_feature = self.n_feature
        ratings = make_ratings(n_user,
                               n_item,
                               20,
                               30,
                               self.rating_choices,
                               seed=self.seed)

        pmf1 = PMF(n_user,
                   n_item,
                   n_feature,
                   seed=0,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat,
                   converge=1e-2)

        pmf1.fit(ratings, n_iters=5)
        rmse_1 = RMSE(pmf1.predict(ratings[:, :2]), ratings[:, 2])

        pmf2 = PMF(n_user,
                   n_item,
                   n_feature,
                   seed=0,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat,
                   converge=1e-1)

        pmf2.fit(ratings, n_iters=5)
        rmse_2 = RMSE(pmf2.predict(ratings[:, :2]), ratings[:, 2])
        self.assertTrue(rmse_1 < rmse_2)
示例#2
0
    def test_pmf_with_random_data(self):
        n_user = 1000
        n_item = 2000
        n_feature = self.n_feature
        ratings = make_ratings(n_user,
                               n_item,
                               20,
                               30,
                               self.rating_choices,
                               seed=self.seed)

        pmf1 = PMF(n_user,
                   n_item,
                   n_feature,
                   batch_size=1000.,
                   epsilon=10.,
                   seed=0,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat)

        pmf1.fit(ratings, n_iters=1)
        rmse_1 = RMSE(pmf1.predict(ratings[:, :2]), ratings[:, 2])

        pmf2 = PMF(n_user,
                   n_item,
                   n_feature,
                   batch_size=1000.,
                   epsilon=10.,
                   seed=0,
                   max_rating=self.max_rat,
                   min_rating=self.min_rat)

        pmf2.fit(ratings, n_iters=3)
        rmse_2 = RMSE(pmf2.predict(ratings[:, :2]), ratings[:, 2])
        self.assertTrue(rmse_1 > rmse_2)
示例#3
0
 def test_pmf_not_fitted_err(self):
     with self.assertRaises(NotFittedError):
         ratings = make_ratings(10,
                                10,
                                1,
                                5,
                                self.rating_choices,
                                seed=self.seed)
         bpmf = PMF(10, 10, self.n_feature)
         bpmf.predict(ratings[:, :2])
示例#4
0
    def test_pmf_with_ml_100k_rating(self):
        n_user = 943
        n_item = 1682
        n_feature = 10
        ratings = self.ratings

        pmf = PMF(n_user,
                  n_item,
                  n_feature,
                  batch_size=1e4,
                  epsilon=20.,
                  reg=1e-4,
                  max_rating=5.,
                  min_rating=1.,
                  seed=self.seed)

        pmf.fit(ratings, n_iters=15)
        rmse = RMSE(pmf.predict(ratings[:, :2]), ratings[:, 2])
        self.assertTrue(rmse < 0.85)
ratings[:, (0, 1)] -= 1

# split data to training & testing
train_pct = 0.9
np.random.shuffle(ratings)
train_size = int(train_pct * ratings.shape[0])
train = ratings[:train_size]
validation = ratings[train_size:]

# models settings
n_feature = 10
eval_iters = 20
print(
    "n_user: %d, n_item: %d, n_feature: %d, training size: %d, validation size: %d"
    % (n_user, n_item, n_feature, train.shape[0], validation.shape[0]))
pmf = PMF(n_user=n_user,
          n_item=n_item,
          n_feature=n_feature,
          epsilon=25.,
          max_rating=5.,
          min_rating=1.,
          seed=0)

pmf.fit(train, n_iters=eval_iters)
train_preds = pmf.predict(train[:, :2])
train_rmse = RMSE(train_preds, train[:, 2])
val_preds = pmf.predict(validation[:, :2])
val_rmse = RMSE(val_preds, validation[:, 2])
print("after %d iterations, train RMSE: %.6f, validation RMSE: %.6f" %
      (eval_iters, train_rmse, val_rmse))
示例#6
0
        print("Iteration: %d/%d" % (i + 1, Iteration_time))
    print("Time cost: %f" % (time.time() - c))
"""
Probabilistic Matrix Factorization
"""
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
rand_state = RandomState(0)
n_feature = 10
eval_iters = 20

print("training PMF model ...")
pmf = PMF(n_user=user_count,
          n_item=song_origin_count + 1,
          n_feature=n_feature,
          epsilon=15.,
          converge=1e-8,
          momentum=0.4,
          max_rating=5.0,
          min_rating=0.,
          seed=100,
          reg=0.01)
pmf.fit(train, n_iters=eval_iters)
"""
Mix 2 model and write result to file
"""
print("Begin Writing result to file ...")
f = open('submission.txt', 'w')
userGroup = 0
GroupSize = 1000
GroupNumber = 200
print("Finished: %d / %d" % (0, GroupNumber * GroupSize))
while (userGroup < GroupNumber):