Пример #1
0
def matrix_factorization_param(data_cv):
    # Iterate over all algorithms
    benchmark = []

    for algorithm in [
            SVD(),
            SVDpp(),
            NMF(),
            SlopeOne(),
            NormalPredictor(),
            CoClustering()
    ]:
        # Perform cross validation
        results = model_selection.cross_validate(algorithm,
                                                 data_cv,
                                                 measures=['RMSE', 'MAE'],
                                                 cv=5,
                                                 verbose=False)
        # Get results & append algorithm name
        tmp = pd.DataFrame.from_dict(results).mean(axis=0)
        tmp = tmp.append(
            pd.Series([str(algorithm).split(' ')[0].split('.')[-1]],
                      index=['Algorithm']))
        benchmark.append(tmp)

    rmse = pd.DataFrame(benchmark).set_index('Algorithm').sort_values(
        'test_mae')
    #print(rmse)

    # Parameter grid
    param_grid = {
        'n_factors': [100, 150, 200],
        'n_epochs': [20, 40],
        'lr_all': [0.001, 0.005, 0.008],
        'reg_all': [0.075, 0.1, 0.15]
    }
    algorithm_gs = model_selection.GridSearchCV(SVD,
                                                param_grid,
                                                measures=['rmse'],
                                                cv=5,
                                                n_jobs=-1)
    algorithm_gs.fit(data_cv)

    # best parameters for a model with the lowest rmse
    best_algo = algorithm_gs.best_estimator['rmse']
    return best_algo
Пример #2
0
                rating_scale=(1, 5),
                skip_lines=1)

data_train = Dataset.load_from_file(file_path, reader=reader)

# %% Hyper parameter tuning and CV analysis
# Algorithm: SVD
Hyper_Params = {
    'n_epochs': [10],
    'n_factors': [50, 100, 150, 200],
    'biased': [False],
    'lr_all': [0.005],
    'reg_all': [0.01, 0.1, 0.3, 1.0]
}

Train_CV = Grid_Search_Result = model_selection.GridSearchCV(
    SVD, Hyper_Params, measures=['rmse', 'mae'], cv=3, n_jobs=3)

Train_CV.fit(data_train)

# %% Figures
plt.figure(figsize=(20, 12))
plt.rcParams.update({'font.size': 12})
plt.plot(Train_CV.cv_results['param_reg_all'],
         Train_CV.cv_results['mean_test_rmse'], '.k')
plt.xscale('log')
plt.xlabel('Regularization Parameter ($\lambda$)')
plt.ylabel('RMSE')
plt.grid()
plt.title('3-Fold CV - Regularization Parameter ($\lambda$)')
plt.savefig('3_fold_CV_Reg_Param.png')
Пример #3
0
reader = Reader(line_format='item user rating', sep=',',
                rating_scale=(1, 5), skip_lines=1)

data_train = Dataset.load_from_file(file_path, reader=reader)

# %% Hyper parameter tuning and CV analysis
Hyper_Params = {'bsl_options':
                {'method': ['als'],
                'n_epochs': [20],
                'reg_u': [1, 3, 10],
                'reg_i': [1, 3, 10]}}

start = time.time()
Train_CV = Grid_Search_Result = model_selection.GridSearchCV(BaselineOnly,
                                                             Hyper_Params,
                                                             measures=['rmse'],
                                                             cv=3, n_jobs=3,
                                                             return_train_measures=True,
                                                             joblib_verbose=3)

Train_CV.fit(data_train)

end = time.time()
print("***********************************************")
print("Exe time:")
print(end - start)


# %% Figures
reg_i = []
reg_u = []
for i in Train_CV.cv_results['param_bsl_options']:
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n


movie_train = Dataset.load_builtin('ml-100k')
print(movie_train.raw_ratings)

svd_estimator = surprise.SVD
svd_grid = {
    'n_factors': [50, 100, 150],
    'reg_all': [0, 0.4, 0.6],
    'biased': ['True', 'False']
}
gs = model_selection.GridSearchCV(svd_estimator,
                                  svd_grid,
                                  measures=['rmse'],
                                  cv=3)
gs.fit(movie_train)

print(gs.best_score['rmse'])
print(gs.best_params['rmse'])
final_estimator = gs.best_estimator['rmse']
#build final model on entire train data
movie_train = movie_train.build_full_trainset()
final_estimator.fit(movie_train)

movie_test = movie_train.build_anti_testset()
predictions = final_estimator.test(movie_test)

top_n = get_top_n(predictions, n=3)
Пример #5
0

movie_train = read_train_data('E:/train_v2.csv')
print(movie_train.raw_ratings)

knn_estimator = surprise.KNNWithMeans()
knn_grid = {
    'k': [10, 20],
    'sim_options': {
        'name': ['cosine'],
        'min_support': [1, 5],
        'user_based': [False]
    }
}
knn_grid_estimator = model_selection.GridSearchCV(knn_estimator,
                                                  knn_grid,
                                                  measures=['rmse'],
                                                  cv=3)
#do grid search using cv strategy
knn_grid_estimator.fit(movie_train)
print(knn_grid_estimator.best_score['rmse'])
print(knn_grid_estimator.best_params['rmse'])
results_df = pd.DataFrame.from_dict(knn_grid_estimator.cv_results)
final_model = knn_grid_estimator.best_estimator['rmse']

#build final model using best parameters from grid search
trainSet = movie_train.build_full_trainset()
final_model.fit(trainSet)

rows = csv.reader(open('F:/test_v2.csv'))
rows = list(rows)
rows.pop(0)
Пример #6
0
    return data


movie_train = read_train_data(
    'C:\\Users\\Algorithmica\\Downloads\\train_v2.csv')
print(movie_train.raw_ratings)

knn_grid = {
    'k': [10, 20],
    'sim_options': {
        'name': ['cosine'],
        'min_support': [1, 5],
        'user_based': [False]
    }
}
gs = model_selection.GridSearchCV(KNNBasic, knn_grid, measures=['rmse'], cv=3)
gs.fit(movie_train)

print(gs.best_score['rmse'])
print(gs.best_params['rmse'])
results_df = pd.DataFrame.from_dict(gs.cv_results)
algo = gs.best_estimator['rmse']
trainSet = movie_train.build_full_trainset()
algo.fit(trainSet)

rows = csv.reader(open('F:/test_v2.csv'))
rows = list(rows)
rows.pop(0)
f = open('F:/submission.csv', 'w', newline='')
writer = csv.writer(f)
writer.writerow(['ID', 'rating'])