def run_matrix_factorization(): """ Executes the Matrix Factorization model on the ratings dataset. """ ratings = pd.read_csv('../data/ratings2.csv', sep='\t') num_users = len(ratings.buyer_id.unique()) num_items = ratings.product_id.max() + 1 train, val = train_test_split(ratings) model = MatrixFactorization(train, num_users, num_items, num_latent_factors=20) model.train(max_iter=20, learning_rate=0.01, regularize=0.5, val=val, lr_scheduler=True)
def main(): """ main function for test """ # For command line arguments psr = argparse.ArgumentParser() psr.add_argument("--data_dir", default="ml-100k/") psr.add_argument("--base", default="u1.base") psr.add_argument("--test", default="u1.test") psr.add_argument("--a", default=0.01, type=float) psr.add_argument("--b", default=0.2, type=float) psr.add_argument("--K", default=50, type=int) psr.add_argument("--tol", default=3.e-2, type=float) args = psr.parse_args() # Get rating matrix R, R_test, user_size, item_size = get_rating(data_dir=args.data_dir, base=args.base, test=args.test, debug=True) # Training mf = MatrixFactorization(R=R, R_test=R_test, user_size=user_size, item_size=item_size, K=args.K, a=args.a, b=args.b) print("training...") mf.train(tol=args.tol, debug=True) print("The number of test data is {}.".format(R_test.shape[0])) icor = 0 for r in R_test: iu = int(r[0]) - 1 ii = int(r[1]) - 1 Rhatui = mf.rating(iu=iu, ii=ii) if np.round(Rhatui) == r[2]: icor = icor + 1 print("The number of correct predictions is {}.".format(icor))
#!/usr/bin/env python3 from matrix_factorization import MatrixFactorization DATA = { "DATA_DIR": "./ml-latest-small", "RATING_FILE": "ratings.csv", "MOVIE_FILE": "movies.csv", "TAG_FILE": "tags.csv" } if __name__ == "__main__": mf_cf = MatrixFactorization(DATA) mf_cf.train() mf_cf.predict_single(1, 110)