import numpy as np from LoadData import load_rating_data, spilt_rating_dat from sklearn.model_selection import train_test_split from ProbabilisticMatrixFactorization import PMF import glob from os.path import exists, basename import sys if __name__ == "__main__": for file_path in sorted(glob.glob("./monthly-data-bkp/monthly-data/"+sys.argv[1]+"*")): print(file_path) if( not exists("./monthly-pmf/"+basename(file_path)+"_u")): print("Processing ...:"+basename(file_path)); pmf = PMF() pmf.set_params({"num_feat": 30, "epsilon": 1, "_lambda": 0.1, "momentum": 0.8, "maxepoch": 30, "num_batches": 100, "batch_size": 1000}) ratings = load_rating_data(file_path) print(len(np.unique(ratings[:, 0])), len(np.unique(ratings[:, 1])), pmf.num_feat) train, test = train_test_split(ratings, test_size=0.0) # spilt_rating_dat(ratings) pmf.fit(train, test, basename(file_path)) # Check performance by plotting train and test errors #plt.plot(range(pmf.maxepoch), pmf.rmse_train, marker='o', label='Training Data') #plt.plot(range(pmf.maxepoch), pmf.rmse_test, marker='v', label='Test Data') #plt.title('The MovieLens Dataset Learning Curve') #plt.xlabel('Number of Epochs') #plt.ylabel('RMSE') #plt.legend() #plt.grid()
import matplotlib.pyplot as plt import numpy as np from LoadData import load_rating_data, spilt_rating_dat from sklearn.model_selection import train_test_split from ProbabilisticMatrixFactorization import PMF if __name__ == "__main__": file_path = "data/ml-100k/u.data" pmf = PMF() pmf.set_params({ "num_feat": 10, "epsilon": 1, "_lambda": 0.1, "momentum": 0.8, "maxepoch": 10, "num_batches": 100, "batch_size": 1000 }) ratings = load_rating_data(file_path) print(len(np.unique(ratings[:, 0])), len(np.unique(ratings[:, 1])), pmf.num_feat) train, test = train_test_split(ratings, test_size=0.2) # spilt_rating_dat(ratings) pmf.fit(train, test) # Check performance by plotting train and test errors plt.plot(range(pmf.maxepoch), pmf.rmse_train, marker='o', label='Training Data') plt.plot(range(pmf.maxepoch), pmf.rmse_test, marker='v', label='Test Data')
import matplotlib.pyplot as plt import numpy as np from LoadData import load_rating_data, spilt_rating_dat from sklearn.model_selection import train_test_split from ProbabilisticMatrixFactorization import PMF if __name__ == "__main__": file_path = "data/ml-100k/u.data" pmf = PMF() # 建立模型并进行初始化 pmf.set_params({ "num_feat": 10, "alpha": 0.01, "lambda_u": 0.1, "lambda_v": 0.1, "max_epoch": 60, "num_batches": 100, "batch_size": 500 }) ratings = load_rating_data(file_path) # 获得数据集 train, test = train_test_split(ratings, test_size=0.2) # 将数据集拆分为训练集和测试集 pmf.train(train, test) # 训练 rlt = pmf.predict(3) idx = np.argsort(rlt) idx = idx[::-1] size = rlt.shape[0] print("对用户3推荐的电影为:") for i in range(size): print("电影:{},推荐值为:{:.2f}".format(idx[i], rlt[idx[i]]))
gmm = GaussianMixture(n_components=args.K) gmm.fit(obs) gmm_z = gmm.predict_proba(obs) fout.create_dataset("gmm/z", data=gmm_z) fout.create_dataset("gmm/components", data=gmm.means_) fout.create_dataset("gmm/covariances", data=gmm.covariances_) fout.create_dataset("gmm/weights", data=gmm.weights_) # Probablistic Matrix Factorization from ProbabilisticMatrixFactorization import PMF # reshape data for input if args.PMF_bool: N = obs.shape[0] * obs.shape[1] x = np.arange(0, obs.shape[0], 1) y = np.arange(0, obs.shape[1], 1) xv, yv = np.meshgrid(x, y) l3 = np.reshape(obs, (N, 1)) l2 = np.reshape(yv.T, (N, 1)) l1 = np.reshape(xv.T, (N, 1)) Y = np.concatenate((l1, l2, l3), axis=1) pmf = PMF() pmf.set_params({"num_feat": args.K}) pmf.fit(Y, Y) fout.create_dataset("pmf/z", data=pmf.w_User) fout.create_dataset("pmf/components", data=pmf.w_Item.T) fout.close()
import matplotlib.pyplot as plt import numpy as np from LoadData import load_rating_data, spilt_rating_dat from sklearn.model_selection import train_test_split from ProbabilisticMatrixFactorization import PMF if __name__ == "__main__": file_path = "data/ml-100k/u.data" pmf = PMF() pmf.set_params({"num_feat": 10, "epsilon": 1, "_lambda": 0.1, "momentum": 0.8, "maxepoch": 10, "num_batches": 100, "batch_size": 1000}) ratings = load_rating_data(file_path) print(len(np.unique(ratings[:, 0])), len(np.unique(ratings[:, 1])), pmf.num_feat) train, test = train_test_split(ratings, test_size=0.2) # spilt_rating_dat(ratings) pmf.fit(train, test) # Check performance by plotting train and test errors plt.plot(range(pmf.maxepoch), pmf.rmse_train, marker='o', label='Training Data') plt.plot(range(pmf.maxepoch), pmf.rmse_test, marker='v', label='Test Data') plt.title('The MovieLens Dataset Learning Curve') plt.xlabel('Number of Epochs') plt.ylabel('RMSE') plt.legend() plt.grid() plt.show() print("precision_acc,recall_acc:" + str(pmf.topK(test)))