Пример #1
0
def svdpp_algorithm() -> SVDpp:
    user_input = input(
        'Do you want to continue with the default parameters? Y/N')
    if user_input.lower() == 'y':
        return SVDpp()
    else:
        n_factors = int(input('Enter total number of factors: '))
        n_epochs = int(input('Enter number of epochs: '))
        lr_all = float(
            input('Enter the learning rate for all the paramaters: '))
        return SVDpp(n_factors, n_epochs, lr_all)
    def train(self, df, model_path=''):
        '''
        隐语义模型训练
        :param df: 格式包含该三列 --》 userid,iteamid,rating
        :param model_path:模型持久化地址,默认为空,不执行持久化
        :return: 训练好的模型
        '''
        # 数据类型转换为 surprise 需要的格式
        data = Dataset.load_from_df(df, self.reader)
        trainset = data.build_full_trainset()

        algo_lfm = SVDpp()
        algo_lfm.fit(trainset)
        if model_path: surprise.dump.dump(model_path, algo=algo_lfm, verbose=1)
        return algo_lfm
Пример #3
0
 def slot_select_algo_combobox(self):
     self.algo_change_flag=True
     self.algo_trained_flag=False
     algo_name=self.select_algo_comboBox.currentText()
     if algo_name=='SVD':
         self.algo=SVD()
         self.display_process_label.append('加载SVD模型...')
     elif algo_name=='SVD++':
         self.algo = SVDpp()
         self.display_process_label.append('加载SVD++模型...')
     elif algo_name == 'NMF':
         self.algo = NMF()
         self.display_process_label.append('加载NMF模型...')
     elif algo_name == 'Slope One':
         self.algo = SlopeOne()
         self.display_process_label.append('加载Slope One模型...')
     elif algo_name == 'k-NN':
         self.algo = KNNBasic()
         self.display_process_label.append('加载k-NN模型...')
     elif algo_name == 'Centered k-NN':
         self.algo = KNNWithMeans()
         self.display_process_label.append('加载Centered k-NN模型...')
     elif algo_name == 'k-NN Baseline':
         self.algo = KNNBaseline()
         self.display_process_label.append('加载k-NN Baseline模型...')
     elif algo_name == 'Co-Clustering':
         self.algo = CoClustering()
         self.display_process_label.append('加载Co-Clustering模型...')
     elif algo_name == 'Baseline':
         self.algo = BaselineOnly()
         self.display_process_label.append('加载Baseline模型...')
     elif algo_name == 'Random':
         self.algo = NormalPredictor()
         self.display_process_label.append('加载Random模型...')
Пример #4
0
def run_svd(data, params, svdpp=False):
    '''Returns trained SVD model based on matrix factorization'''
    if svdpp:
        alg = SVDpp(n_factors=utils.get_param(params, 'n_factors'),
                    n_epochs=utils.get_param(params, 'n_epochs'),
                    lr_all=utils.get_param(params, 'learning_rate'),
                    reg_all=utils.get_param(params, 'reg'),
                    verbose=True)
    else:
        alg = SVD(biased=utils.get_param(params, 'biased'),
                  n_factors=utils.get_param(params, 'n_factors'),
                  n_epochs=utils.get_param(params, 'n_epochs'),
                  lr_all=utils.get_param(params, 'learning_rate'),
                  reg_all=utils.get_param(params, 'reg'),
                  verbose=True)
    alg.fit(data)
    return alg
def grid_search(surprise_model):

    if type(surprise_model()) == type(SVDpp()):

        param_grid = {'n_factors':[20] , 'n_epochs':[20], 'lr_all':[0.005, 0.007, 0.05, 0.07, 0.5, 0.7, 1.0], 'reg_all':[0.02, 0.05, 0.2, 0.5]}
        gs = GridSearchCV(surprise_model, param_grid, measures=['rmse', 'mae'], cv=3,n_jobs=-1,joblib_verbose=1,refit=True)

    elif type(surprise_model()) == type(SVD()):

        param_grid = {'n_epochs':[20], 'lr_all':[0.005, 0.007, 0.05, 0.07, 0.5, 0.7, 1.0], 'reg_all':[0.02, 0.05, 0.2, 0.5]}
        gs = GridSearchCV(surprise_model, param_grid, measures=['rmse', 'mae'], cv=3,n_jobs=-1,joblib_verbose=1,refit=True)

    elif type(surprise_model()) == type(NMF()):

        param_grid = {'n_epochs':[20], 'reg_pu':[0.02, 0.04, 0.06, 0.08, 0.2], 'reg_qi':[0.02, 0.04, 0.06, 0.08, 0.2]}
        gs = GridSearchCV(surprise_model, param_grid, measures=['rmse', 'mae'], cv=3,n_jobs=-1,joblib_verbose=1,refit=True)

    elif type(surprise_model()) == type(BaselineOnly()):
        param_grid = {'bsl_options': {'method': ['als', 'sgd'], 'reg': [1, 2], 'learning_rate': [0.005, 0.05, 0.5, 1.0]}}
        gs = GridSearchCV(surprise_model, param_grid, measures=['rmse', 'mae'], cv=3,n_jobs=-1,joblib_verbose=1,refit=True)

    return gs
RS_data = Dataset.load_from_df(RS_ratings, RS_reader)

# Benchmark_Algorithm_Metric
benchmark = []
for algorithm in [
        BaselineOnly(),
        CoClustering(),
        KNNBaseline(),
        KNNBasic(),
        KNNWithMeans(),
        KNNWithZScore(),
        NMF(),
        NormalPredictor(),
        SlopeOne(),
        SVD(),
        SVDpp()
]:
    # Perform cross validation
    results = cross_validate(algorithm,
                             RS_data,
                             measures=['rmse', 'mae', 'mse', 'fcp'],
                             cv=5,
                             verbose=True)
    # Results To Serie List
    tmp = pd.DataFrame.from_dict(results).mean(axis=0)
    tmp = tmp.append(
        pd.Series([str(algorithm).split(' ')[0].split('.')[-1]],
                  index=['Algorithm']))
    benchmark.append(tmp)
    pass
# Results to Dataframe and .csv
import time
from surprise.prediction_algorithms.matrix_factorization import SVDpp
from surprise import Dataset, evaluate

start_time = time.time()

data = Dataset.load_builtin('ml-1m')
e = 15
reg = .03
init_mean = .1
algo = SVDpp(verbose=1)
evaluate(algo, data)

running_time = time.time() - start_time
print("SVD:", running_time, " s")
Пример #8
0
    print(f'HW5 Implementation Out-of-Sample Error: {e_out:.3}')

    # "off-the-shelf" SVD from numpy
    U, Sigma, V = off_the_shelf.scipy_svd_train(M, N, K, Y_train)
    U = np.matmul(U, np.diag(np.sqrt(Sigma)))
    V = np.matmul(np.diag(np.sqrt(Sigma)), V)
    e_in = svd_sgd.get_err(U, V.transpose(), Y_train)
    e_out = svd_sgd.get_err(U, V.transpose(), Y_test)
    print(f'SciPy SVD In-Sample Error: {e_in:.3}')
    print(f'SciPy SVD Out-of-Sample Error: {e_out:.3}')

    # Surprise models
    svd_models = [
        ('SVD Unbiased', SVD(n_factors=20, biased=False, n_epochs=100)),
        ('SVD w/ Global and Term Bias', SVD(n_factors=20, n_epochs=100)),
        ('SVD++', SVDpp(n_factors=20)),
    ]

    def get_surprise_err(model, d):
        err = 0.0
        for u, v, rating in d:
            # square error
            est = model.predict(u, v).est
            err += 0.5 * (int(rating) - est)**2

        return err / len(d)

    reader = Reader(line_format='user item rating', sep='\t')
    data = Dataset.load_from_folds([('../data/train.txt', '../data/test.txt')],
                                   reader)
    train, test = list(data.folds())[0]