示例#1
0
import numpy as np
from LoadData import load_rating_data, spilt_rating_dat
from sklearn.model_selection import train_test_split
from ProbabilisticMatrixFactorization import PMF
import glob
from os.path import exists, basename
import sys

if __name__ == "__main__":
    
    for file_path in sorted(glob.glob("./monthly-data-bkp/monthly-data/"+sys.argv[1]+"*")):
        print(file_path)
        if( not exists("./monthly-pmf/"+basename(file_path)+"_u")):
            print("Processing ...:"+basename(file_path));

            pmf = PMF()
            pmf.set_params({"num_feat": 30, "epsilon": 1, "_lambda": 0.1, "momentum": 0.8, "maxepoch": 30, "num_batches": 100,
                    "batch_size": 1000})
            ratings = load_rating_data(file_path)
            print(len(np.unique(ratings[:, 0])), len(np.unique(ratings[:, 1])), pmf.num_feat)
            train, test = train_test_split(ratings, test_size=0.0)  # spilt_rating_dat(ratings)
            pmf.fit(train, test, basename(file_path))

    # Check performance by plotting train and test errors
    #plt.plot(range(pmf.maxepoch), pmf.rmse_train, marker='o', label='Training Data')
    #plt.plot(range(pmf.maxepoch), pmf.rmse_test, marker='v', label='Test Data')
    #plt.title('The MovieLens Dataset Learning Curve')
    #plt.xlabel('Number of Epochs')
    #plt.ylabel('RMSE')
    #plt.legend()
    #plt.grid()
import matplotlib.pyplot as plt
import numpy as np
from LoadData import load_rating_data, spilt_rating_dat
from sklearn.model_selection import train_test_split
from ProbabilisticMatrixFactorization import PMF

if __name__ == "__main__":
    file_path = "data/ml-100k/u.data"
    pmf = PMF()
    pmf.set_params({
        "num_feat": 10,
        "epsilon": 1,
        "_lambda": 0.1,
        "momentum": 0.8,
        "maxepoch": 10,
        "num_batches": 100,
        "batch_size": 1000
    })
    ratings = load_rating_data(file_path)
    print(len(np.unique(ratings[:, 0])), len(np.unique(ratings[:, 1])),
          pmf.num_feat)
    train, test = train_test_split(ratings,
                                   test_size=0.2)  # spilt_rating_dat(ratings)
    pmf.fit(train, test)

    # Check performance by plotting train and test errors
    plt.plot(range(pmf.maxepoch),
             pmf.rmse_train,
             marker='o',
             label='Training Data')
    plt.plot(range(pmf.maxepoch), pmf.rmse_test, marker='v', label='Test Data')
import matplotlib.pyplot as plt
import numpy as np
from LoadData import load_rating_data, spilt_rating_dat
from sklearn.model_selection import train_test_split
from ProbabilisticMatrixFactorization import PMF

if __name__ == "__main__":
    file_path = "data/ml-100k/u.data"
    pmf = PMF()  # 建立模型并进行初始化
    pmf.set_params({
        "num_feat": 10,
        "alpha": 0.01,
        "lambda_u": 0.1,
        "lambda_v": 0.1,
        "max_epoch": 60,
        "num_batches": 100,
        "batch_size": 500
    })
    ratings = load_rating_data(file_path)  # 获得数据集
    train, test = train_test_split(ratings, test_size=0.2)  # 将数据集拆分为训练集和测试集
    pmf.train(train, test)  # 训练

    rlt = pmf.predict(3)
    idx = np.argsort(rlt)
    idx = idx[::-1]
    size = rlt.shape[0]
    print("对用户3推荐的电影为:")

    for i in range(size):
        print("电影:{},推荐值为:{:.2f}".format(idx[i], rlt[idx[i]]))
示例#4
0
文件: comparison.py 项目: ajbc/ndm
gmm = GaussianMixture(n_components=args.K)
gmm.fit(obs)
gmm_z = gmm.predict_proba(obs)
fout.create_dataset("gmm/z", data=gmm_z)
fout.create_dataset("gmm/components", data=gmm.means_)
fout.create_dataset("gmm/covariances", data=gmm.covariances_)
fout.create_dataset("gmm/weights", data=gmm.weights_)

# Probablistic Matrix Factorization
from ProbabilisticMatrixFactorization import PMF

# reshape data for input

if args.PMF_bool:
    N = obs.shape[0] * obs.shape[1]
    x = np.arange(0, obs.shape[0], 1)
    y = np.arange(0, obs.shape[1], 1)
    xv, yv = np.meshgrid(x, y)
    l3 = np.reshape(obs, (N, 1))
    l2 = np.reshape(yv.T, (N, 1))
    l1 = np.reshape(xv.T, (N, 1))
    Y = np.concatenate((l1, l2, l3), axis=1)

    pmf = PMF()
    pmf.set_params({"num_feat": args.K})
    pmf.fit(Y, Y)
    fout.create_dataset("pmf/z", data=pmf.w_User)
    fout.create_dataset("pmf/components", data=pmf.w_Item.T)
fout.close()
import matplotlib.pyplot as plt
import numpy as np
from LoadData import load_rating_data, spilt_rating_dat
from sklearn.model_selection import train_test_split
from ProbabilisticMatrixFactorization import PMF

if __name__ == "__main__":
    file_path = "data/ml-100k/u.data"
    pmf = PMF()
    pmf.set_params({"num_feat": 10, "epsilon": 1, "_lambda": 0.1, "momentum": 0.8, "maxepoch": 10, "num_batches": 100,
                    "batch_size": 1000})
    ratings = load_rating_data(file_path)
    print(len(np.unique(ratings[:, 0])), len(np.unique(ratings[:, 1])), pmf.num_feat)
    train, test = train_test_split(ratings, test_size=0.2)  # spilt_rating_dat(ratings)
    pmf.fit(train, test)

    # Check performance by plotting train and test errors
    plt.plot(range(pmf.maxepoch), pmf.rmse_train, marker='o', label='Training Data')
    plt.plot(range(pmf.maxepoch), pmf.rmse_test, marker='v', label='Test Data')
    plt.title('The MovieLens Dataset Learning Curve')
    plt.xlabel('Number of Epochs')
    plt.ylabel('RMSE')
    plt.legend()
    plt.grid()
    plt.show()
    print("precision_acc,recall_acc:" + str(pmf.topK(test)))