from BinaryProbabilisticMatrixFactorization import BPMF if __name__ == "__main__": # file_path = "data/ml-100k/u.data" file_path = "/home/lxu/Documents/StudentLearningProcess/skill_builder_data_corrected_withskills_finished.csv" pmf = BPMF() pmf.set_params({ "num_feat": 6, "epsilon": 2, "_lambda": 0.1, "momentum": 0.8, "maxepoch": 100, "num_batches": 300, "batch_size": 1000 }) ratings, order = load_rating_data(file_path) print(len(np.unique(ratings[:, 0])), len(np.unique(ratings[:, 1])), pmf.num_feat) train, test, order_train, order_test = train_test_split( ratings, order, test_size=0.2) # spilt_rating_dat(ratings) pmf.two_step_fit(train, test, order_train, order_test, len(np.unique(ratings[:, 0])), len(np.unique(ratings[:, 1]))) # Check performance by plotting train and test errors plt.plot(range(pmf.maxepoch), pmf.logloss_train, marker='o', label='Training Data')
from ProbabilisticMatrixFactorization import PMF import glob from os.path import exists, basename import sys if __name__ == "__main__": for file_path in sorted(glob.glob("./monthly-data-bkp/monthly-data/"+sys.argv[1]+"*")): print(file_path) if( not exists("./monthly-pmf/"+basename(file_path)+"_u")): print("Processing ...:"+basename(file_path)); pmf = PMF() pmf.set_params({"num_feat": 30, "epsilon": 1, "_lambda": 0.1, "momentum": 0.8, "maxepoch": 30, "num_batches": 100, "batch_size": 1000}) ratings = load_rating_data(file_path) print(len(np.unique(ratings[:, 0])), len(np.unique(ratings[:, 1])), pmf.num_feat) train, test = train_test_split(ratings, test_size=0.0) # spilt_rating_dat(ratings) pmf.fit(train, test, basename(file_path)) # Check performance by plotting train and test errors #plt.plot(range(pmf.maxepoch), pmf.rmse_train, marker='o', label='Training Data') #plt.plot(range(pmf.maxepoch), pmf.rmse_test, marker='v', label='Test Data') #plt.title('The MovieLens Dataset Learning Curve') #plt.xlabel('Number of Epochs') #plt.ylabel('RMSE') #plt.legend() #plt.grid() #plt.show() #print("precision_acc,recall_acc:" + str(pmf.topK(test)))
import matplotlib.pyplot as plt import numpy as np from LoadData import load_rating_data, spilt_rating_dat from sklearn.model_selection import train_test_split from ProbabilisticMatrixFactorization import PMF if __name__ == "__main__": file_path = "data/ml-100k/u.data" pmf = PMF() pmf.set_params({"num_feat": 10, "epsilon": 1, "_lambda": 0.1, "momentum": 0.8, "maxepoch": 10, "num_batches": 100, "batch_size": 1000}) ratings = load_rating_data(file_path) print(len(np.unique(ratings[:, 0])), len(np.unique(ratings[:, 1])), pmf.num_feat) train, test = train_test_split(ratings, test_size=0.2) # spilt_rating_dat(ratings) pmf.fit(train, test) # Check performance by plotting train and test errors plt.plot(range(pmf.maxepoch), pmf.rmse_train, marker='o', label='Training Data') plt.plot(range(pmf.maxepoch), pmf.rmse_test, marker='v', label='Test Data') plt.title('The MovieLens Dataset Learning Curve') plt.xlabel('Number of Epochs') plt.ylabel('RMSE') plt.legend() plt.grid() plt.show() print("precision_acc,recall_acc:" + str(pmf.topK(test)))