def test_pmf_convergence(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings(n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) pmf1 = PMF(n_user, n_item, n_feature, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-2) pmf1.fit(ratings, n_iters=5) rmse_1 = RMSE(pmf1.predict(ratings[:, :2]), ratings[:, 2]) pmf2 = PMF(n_user, n_item, n_feature, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-1) pmf2.fit(ratings, n_iters=5) rmse_2 = RMSE(pmf2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 < rmse_2)
def test_pmf_with_random_data(self): n_user = 1000 n_item = 2000 n_feature = self.n_feature ratings = make_ratings(n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) pmf1 = PMF(n_user, n_item, n_feature, batch_size=1000., epsilon=10., seed=0, max_rating=self.max_rat, min_rating=self.min_rat) pmf1.fit(ratings, n_iters=1) rmse_1 = RMSE(pmf1.predict(ratings[:, :2]), ratings[:, 2]) pmf2 = PMF(n_user, n_item, n_feature, batch_size=1000., epsilon=10., seed=0, max_rating=self.max_rat, min_rating=self.min_rat) pmf2.fit(ratings, n_iters=3) rmse_2 = RMSE(pmf2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 > rmse_2)
def test_pmf_not_fitted_err(self): with self.assertRaises(NotFittedError): ratings = make_ratings(10, 10, 1, 5, self.rating_choices, seed=self.seed) bpmf = PMF(10, 10, self.n_feature) bpmf.predict(ratings[:, :2])
def test_pmf_with_ml_100k_rating(self): n_user = 943 n_item = 1682 n_feature = 10 ratings = self.ratings pmf = PMF(n_user, n_item, n_feature, batch_size=1e4, epsilon=20., reg=1e-4, max_rating=5., min_rating=1., seed=self.seed) pmf.fit(ratings, n_iters=15) rmse = RMSE(pmf.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse < 0.85)
ratings[:, (0, 1)] -= 1 # split data to training & testing train_pct = 0.9 np.random.shuffle(ratings) train_size = int(train_pct * ratings.shape[0]) train = ratings[:train_size] validation = ratings[train_size:] # models settings n_feature = 10 eval_iters = 20 print( "n_user: %d, n_item: %d, n_feature: %d, training size: %d, validation size: %d" % (n_user, n_item, n_feature, train.shape[0], validation.shape[0])) pmf = PMF(n_user=n_user, n_item=n_item, n_feature=n_feature, epsilon=25., max_rating=5., min_rating=1., seed=0) pmf.fit(train, n_iters=eval_iters) train_preds = pmf.predict(train[:, :2]) train_rmse = RMSE(train_preds, train[:, 2]) val_preds = pmf.predict(validation[:, :2]) val_rmse = RMSE(val_preds, validation[:, 2]) print("after %d iterations, train RMSE: %.6f, validation RMSE: %.6f" % (eval_iters, train_rmse, val_rmse))
print("Iteration: %d/%d" % (i + 1, Iteration_time)) print("Time cost: %f" % (time.time() - c)) """ Probabilistic Matrix Factorization """ logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) rand_state = RandomState(0) n_feature = 10 eval_iters = 20 print("training PMF model ...") pmf = PMF(n_user=user_count, n_item=song_origin_count + 1, n_feature=n_feature, epsilon=15., converge=1e-8, momentum=0.4, max_rating=5.0, min_rating=0., seed=100, reg=0.01) pmf.fit(train, n_iters=eval_iters) """ Mix 2 model and write result to file """ print("Begin Writing result to file ...") f = open('submission.txt', 'w') userGroup = 0 GroupSize = 1000 GroupNumber = 200 print("Finished: %d / %d" % (0, GroupNumber * GroupSize)) while (userGroup < GroupNumber):