def svd(data, training, testing): ''' Tune SVD parameters then calculates RMSE, coverage and running time of SVD Args: data(Dataset): the whole dataset divided into 5 folds training(Dataset): training dataset testing(Dataset): test dataset Returns: rmse: RMSE of SVD with Z-score with optimized parameters top_n: number of unique predictions for top n items ''' # candidate parameters param_grid = {'n_factors': [25, 50, 100, 250], 'n_epochs': [10, 20, 30, 40, 50]} # optimize parameters grid_search = GridSearch(SVD, param_grid, measures=['RMSE'], verbose=False) grid_search.evaluate(data) param = grid_search.best_params['RMSE'] print('SVD:', param) # fit model using the optimized parameters svd = SVD(n_factors=param['n_factors'], n_epochs=param['n_epochs']) svd.train(training) # evaluate the model using test data predictions = svd.test(testing) top_n = get_top_n(predictions, n=5) rmse = accuracy.rmse(predictions, verbose=True) return rmse, top_n
def nmf(data, training, testing): ''' Tune NMF parameters then calculates RMSE, coverage and running time of NMF Args: data(Dataset): the whole dataset divided into 5 folds training(Dataset): training dataset testing(Dataset): test dataset Returns: rmse: RMSE of NMF with optimized parameters top_n: number of unique predictions for top n items ''' # candidate parameters nmf_param_grid = {'n_factors': [45, 50, 55, 60], 'n_epochs': [45, 50, 55]} # optimize parameters grid_search = GridSearch(NMF, nmf_param_grid, measures=['RMSE'], verbose=False) grid_search.evaluate(data) param = grid_search.best_params['RMSE'] print('NMF:', param) # fit model using the optimized parameters nmf = NMF(n_factors=param['n_factors'], n_epochs=param['n_epochs']) nmf.train(training) # evaluate the model using test data predictions = nmf.test(testing) top_n = get_top_n(predictions, n=5) rmse = accuracy.rmse(predictions, verbose=True) return rmse, top_n
def knn_m(data, training, testing): ''' Tune KNN with Means parameters then calculates RMSE, coverage and running time of KNN with Means Args: data(Dataset): the whole dataset divided into 5 folds training(Dataset): training dataset testing(Dataset): test dataset Returns: rmse: RMSE of KNN with Means with optimized parameters top_n: number of unique predictions for top n items ''' # candidate parameters knn_param_grid = {'k': [5, 10, 20], 'sim_options': {'name': ['msd', 'cosine', 'pearson'], 'min_support': [1, 5], 'user_based': [False]}} # optimize parameters knnm_grid_search = GridSearch(KNNWithMeans, knn_param_grid, measures=['RMSE'], verbose=False) knnm_grid_search.evaluate(data) param = knnm_grid_search.best_params['RMSE'] print('KNNWithMeans:', param) # fit model using the optimized parameters knnm = KNNWithMeans(k=param['k'], name=param['sim_options']['name'], min_support=param['sim_options']['min_support'], user_based=param['sim_options']['user_based']) knnm.train(training) # evaluate the model using test data predictions = knnm.test(testing) top_n = get_top_n(predictions, n=5) rmse = accuracy.rmse(predictions, verbose=True) return rmse, top_n
def grid_search_knn_surprise(data_train, n_epochs, reg_u, reg_i): print('KNN Surprise grid search') #Construct KNN algo with params param_grid = { 'bsl_options': { 'method': ['als'], 'n_epochs': n_epochs, 'reg_u': reg_u, 'reg_i': reg_i }, 'k': [3], 'sim_options': { 'name': ['msd'], 'min_support': [5], 'user_based': [False] } } #Create the Grid search algo with the params grid_search = GridSearch(KNNBaseline, param_grid, measures=['RMSE']) #Evaluate the Grid search and print the best params grid_search.evaluate(data_train) print(grid_search.best_score['RMSE']) print(grid_search.best_params['RMSE']) # Return the best params for the ALS algo return grid_search.best_params['RMSE']["bsl_options"]
def test_dict_parameters(small_ml): """Dict parameters like bsl_options and sim_options require special treatment in the param_grid argument. We here test both in one shot with KNNBaseline.""" param_grid = { 'bsl_options': { 'method': ['als', 'sgd'], 'reg': [1, 2] }, 'k': [2, 3], 'sim_options': { 'name': ['msd', 'cosine'], 'min_support': [1, 5], 'user_based': [False] } } small_ml.split(2) with pytest.warns(UserWarning): grid_search = GridSearch(KNNBaseline, param_grid, measures=['FCP', 'mae', 'rMSE'], n_jobs=1) with pytest.warns(UserWarning): grid_search.evaluate(small_ml) assert len(grid_search.cv_results['params']) == 32
def test_measure_is_not_case_sensitive(): param_grid = {'n_epochs': [1], 'lr_all': [0.002, 0.005], 'reg_all': [0.4, 0.6], 'n_factors': [1], 'init_std_dev': [0]} grid_search = GridSearch(SVD, param_grid, measures=['FCP', 'mae', 'rMSE']) grid_search.evaluate(data) assert grid_search.best_index['fcp'] == grid_search.best_index['FCP'] assert grid_search.best_params['mAe'] == grid_search.best_params['MaE'] assert grid_search.best_score['RmSE'] == grid_search.best_score['RMSE']
def test_best_estimator(): param_grid = {'n_epochs': [5], 'lr_all': [0.002, 0.005], 'reg_all': [0.4, 0.6], 'n_factors': [1], 'init_std_dev': [0]} grid_search = GridSearch(SVD, param_grid, measures=['FCP', 'mae', 'rMSE']) grid_search.evaluate(data) best_estimator = grid_search.best_estimator['MAE'] assert evaluate( best_estimator, data)['MAE'] == grid_search.best_score['MAE']
def test_grid_search_cv_results(): """Ensure that the number of parameter combinations is correct.""" param_grid = {'n_epochs': [1, 2], 'lr_all': [0.002, 0.005], 'reg_all': [0.4, 0.6], 'n_factors': [1], 'init_std_dev': [0]} with pytest.warns(UserWarning): grid_search = GridSearch(SVD, param_grid, n_jobs=1) with pytest.warns(UserWarning): grid_search.evaluate(data) assert len(grid_search.cv_results['params']) == 8
def test_grid_search_cv_results(): param_grid = { 'n_epochs': [1, 2], 'lr_all': [0.002, 0.005], 'reg_all': [0.4, 0.6], 'n_factors': [1] } grid_search = GridSearch(SVD, param_grid) grid_search.evaluate(data) assert len(grid_search.cv_results['params']) == 8
def test_measure_is_not_case_sensitive(): """Ensure that all best_* dictionaries are case insensitive.""" param_grid = {'n_epochs': [1], 'lr_all': [0.002, 0.005], 'reg_all': [0.4, 0.6], 'n_factors': [1], 'init_std_dev': [0]} with pytest.warns(UserWarning): grid_search = GridSearch(SVD, param_grid, measures=['FCP', 'mae', 'rMSE'], n_jobs=1) with pytest.warns(UserWarning): grid_search.evaluate(data) assert grid_search.best_index['fcp'] == grid_search.best_index['FCP'] assert grid_search.best_params['mAe'] == grid_search.best_params['MaE'] assert grid_search.best_score['RmSE'] == grid_search.best_score['RMSE']
def knnz_running_time(data): ''' Calculates the running times for training and predictions for KNN with Z-score Args: data(Dataset): a list of datasets with different numbers of users Returns: elapsedtime_KnnZtrain: running time for training elapsedtime_KnnZtest: running time for predictions on testset ''' elapsedtime_KnnZtrain = [] elapsedtime_KnnZtest = [] # tune the parameters on the entire data param_grid = { 'k': [5, 10, 20], 'sim_options': { 'name': ['msd', 'cosine', 'pearson'], 'min_support': [1, 5], 'user_based': [False] } } grid_search = GridSearch(KNNWithZScore, param_grid, measures=['RMSE'], verbose=False) grid_search.evaluate(data[3]) param = grid_search.best_params['RMSE'] k = param['k'] sim = param['sim_options']['name'] min_support = param['sim_options']['min_support'] user_based = param['sim_options']['user_based'] # using the tuned parameters calculate running times for i in range(len(data)): # training running time training_start = time.time() training = data[i].build_full_trainset() testing = training.build_anti_testset() knnz = KNNWithZScore(k=k, name=sim, min_support=min_support, user_based=user_based) knnz.train(training) elapsedtime_KnnZtrain.append(time.time() - training_start) # prediction running time test_start = time.time() knnz.test(testing) elapsedtime_KnnZtest.append(time.time() - test_start) return elapsedtime_KnnZtrain, elapsedtime_KnnZtest
def test_best_rmse_fcp(): param_grid = {'n_epochs': [1, 2], 'lr_all': [0.002, 0.005], 'reg_all': [0.4, 0.6], 'n_factors': [1], 'init_std_dev': [0]} grid_search = GridSearch(SVD, param_grid, measures=['FCP', 'rmse']) grid_search.evaluate(data) assert grid_search.best_params['RMSE'] == { 'lr_all': 0.005, 'n_factors': 1, 'reg_all': 0.4, 'n_epochs': 2, 'init_std_dev': 0} assert grid_search.best_params['FCP'] == { 'reg_all': 0.6, 'n_epochs': 2, 'lr_all': 0.002, 'n_factors': 1, 'init_std_dev': 0}
def parameter_tuning(self): param_grid = {'n_epochs': [10, 20, 40], 'lr_all': [0.002, 0.005, 0.01], 'reg_all': [0.05, 0.1, 0.2]} print("Starting grid search...") start_time = time.perf_counter() self.grid_search = GridSearch(SVD, param_grid, measures=['RMSE']) self.grid_search.evaluate(self.data) print('Grid search took {}s'.format(time.perf_counter() - start_time)) self.svd = self.grid_search.best_estimator['RMSE'] print(self.grid_search.best_score['RMSE']) print(self.grid_search.best_params['RMSE'])
def test_best_estimator(): """Ensure that the best estimator is the one giving the best score (by re-running it)""" param_grid = {'n_epochs': [5], 'lr_all': [0.002, 0.005], 'reg_all': [0.4, 0.6], 'n_factors': [1], 'init_std_dev': [0]} with pytest.warns(UserWarning): grid_search = GridSearch(SVD, param_grid, measures=['FCP', 'mae', 'rMSE'], n_jobs=1) with pytest.warns(UserWarning): grid_search.evaluate(data) best_estimator = grid_search.best_estimator['MAE'] with pytest.warns(UserWarning): assert evaluate( best_estimator, data)['MAE'] == grid_search.best_score['MAE']
def surprise_gridsearch(param_grid, model, data): """ Gridsearch on a surprise recommender model to extract the best parameters Args: param_grid: dictionary of model parameters and potential parameter values model: instantiated recommender model data: surprise dataframe Returns: The best score from the gridsearch as well as the accompanying best parameters """ grid_search = GridSearch(model, param_grid, measures=['RMSE'], verbose=False) grid_search.evaluate(data) return grid_search.best_score['RMSE'], grid_search.best_params['RMSE']
def grid_search_rec(data, algo, param_grid): """ Grid search a RS algorithm using surprise lib. Inputs --------- data: surprise Dataset trainset format for ratings algo: surprise algorithm for grid searching param_grid: parameter map to grid search Returns -------- best_estimator: surprise algorithm with best hyperparameters """ grid_search = GridSearch(algo, param_grid, measures=['RMSE'], verbose=True) grid_search.evaluate(data) return grid_search.best_estimator["RMSE"]
def grid_search_svd_surprise(data_train, n_epochs, lr_all, reg_all, init_mean, n_factors): print('SVD Surprise grid search') # Construct SVD algo with params param_grid = { 'n_epochs': n_epochs, 'lr_all': lr_all, 'reg_all': reg_all, 'init_mean': init_mean, 'n_factors': n_factors } # Create the Grid search algo with the params grid_search = GridSearch(SVD, param_grid, measures=['RMSE'], verbose=False) # Evaluate the Grid search and print the best params grid_search.evaluate(data_train) print(grid_search.best_score['RMSE']) print(grid_search.best_params['RMSE']) # Return the best params for the SVD algo return grid_search.best_params['RMSE']
def svdpp_running_time(data): ''' Calculates the running times for training and predictions for SVD++ Args: data(Dataset): a list of datasets with different numbers of users Returns: elapsedtime_SVDpptrain: running time for training elapsedtime_SVDpptest: running time for predictions on testset ''' elapsedtime_SVDpptrain = [] elapsedtime_SVDpptest = [] # tune the parameters on the entire data param_grid = { 'n_factors': [25, 50, 100, 250], 'n_epochs': [10, 20, 30, 40, 50] } grid_search = GridSearch(SVD, param_grid, measures=['RMSE'], verbose=False) grid_search.evaluate(data[3]) param = grid_search.best_params['RMSE'] n_factors = param['n_factors'] n_epochs = param['n_epochs'] # using the tuned parameters calculate running times for i in range(len(data)): # training running time training_start = time.time() training = data[i].build_full_trainset() testing = training.build_anti_testset() svdpp = SVDpp(n_factors=n_factors, n_epochs=n_epochs) svdpp.train(training) elapsedtime_SVDpptrain.append(time.time() - training_start) # prediction running time test_start = time.time() svdpp.test(testing) elapsedtime_SVDpptest.append(time.time() - test_start) return elapsedtime_SVDpptrain, elapsedtime_SVDpptest
def test_dict_parameters(): """Dict parameters like bsl_options and sim_options require special treatment. We here test both in one shot with KNNBaseline.""" param_grid = { 'bsl_options': { 'method': ['als', 'sgd'], 'reg': [1, 2] }, 'k': [2, 3], 'sim_options': { 'name': ['msd', 'cosine'], 'min_support': [1, 5], 'user_based': [False] } } grid_search = GridSearch(KNNBaseline, param_grid, measures=['FCP', 'mae', 'rMSE']) grid_search.evaluate(data) assert len(grid_search.cv_results['params']) == 32
def knn(data, training, testing): ''' Tune Basic KNN parameters then calculates RMSE, coverage and running time of Basic KNN Args: data(Dataset): the whole dataset divided into 5 folds training(Dataset): training dataset testing(Dataset): test dataset Returns: rmse: RMSE of Basic KNN with optimized parameters top_n: number of unique predictions for top n items ''' # candidate parameters knn_param_grid = {'k': [5, 10, 20], 'sim_options': {'name': ['msd', 'cosine', 'pearson'], 'min_support': [1, 5], 'user_based': [False]}} # optimize parameters knn_grid_search = GridSearch(KNNBasic, knn_param_grid, measures=['RMSE'], verbose=False) knn_grid_search.evaluate(data) param = knn_grid_search.best_params['RMSE'] print('KNNBasic:', param) # RMSE against parameters result_df = pd.DataFrame.from_dict(knn_grid_search.cv_results) result_df.to_csv('data/knn_rmse_against_param.csv') # fit model using the optimized parameters knn = KNNBasic(k=param['k'], name=param['sim_options']['name'], min_support=param['sim_options']['min_support'], user_based=param['sim_options']['user_based'] ) knn.train(training) # evaluate the model using test data predictions = knn.test(testing) top_n = get_top_n(predictions, n=5) rmse = accuracy.rmse(predictions, verbose=True) return rmse, top_n
def svdpp(data, training, testing): ''' Tune SVD++ parameters then calculates RMSE, coverage and running time of SVD++ Args: data(Dataset): the whole dataset divided into 5 folds training(Dataset): training dataset testing(Dataset): test dataset Returns: rmse: RMSE of SVD++ with optimized parameters top_n: number of unique predictions for top n items ''' # candidate parameters param_grid = {'n_factors': [25, 50, 100, 250], 'n_epochs': [10, 20, 30, 40, 50]} # optimize parameters grid_search = GridSearch(SVDpp, param_grid, measures=['RMSE'], verbose=False) grid_search.evaluate(data) param = grid_search.best_params['RMSE'] print('SVDpp:', param) # RMSE against parameters result_df = pd.DataFrame.from_dict(grid_search.cv_results) result_df.to_csv('data/svdpp_rmse_against_param.csv') # fit model using the optimized parameters svdpp = SVDpp(n_factors=param['n_factors'], n_epochs=param['n_epochs']) svdpp.train(training) # evaluate the model using test data predictions = svdpp.test(testing) top_n = get_top_n(predictions, n=5) rmse = accuracy.rmse(predictions, verbose=True) return rmse, top_n
def test_same_splits(small_ml): """Ensure that all parameter combinations are tested on the same splits (we check that average RMSE scores are the same, which should be enough).""" small_ml.split(3) # all RMSE should be the same (as param combinations are the same) param_grid = {'n_epochs': [1, 1], 'lr_all': [.5, .5]} with pytest.warns(UserWarning): grid_search = GridSearch(SVD, param_grid, measures=['RMSE'], n_jobs=-1) grid_search.evaluate(small_ml) rmse_scores = [s['RMSE'] for s in grid_search.cv_results['scores']] assert len(set(rmse_scores)) == 1 # assert rmse_scores are all equal # evaluate grid search again, to make sure that splits are still the same. grid_search.evaluate(small_ml) rmse_scores += [s['RMSE'] for s in grid_search.cv_results['scores']] assert len(set(rmse_scores)) == 1
def test_same_splits(): """Ensure that all parameter combinations are tested on the same splits (we check that average RMSE scores are the same, which should be enough).""" data_file = os.path.join(os.path.dirname(__file__), './u1_ml100k_train') data = Dataset.load_from_file(data_file, reader=Reader('ml-100k')) data.split(3) # all RMSE should be the same (as param combinations are the same) param_grid = {'n_epochs': [1, 1], 'lr_all': [.5, .5]} grid_search = GridSearch(SVD, param_grid, measures=['RMSE'], n_jobs=-1) grid_search.evaluate(data) rmse_scores = [s['RMSE'] for s in grid_search.cv_results['scores']] assert len(set(rmse_scores)) == 1 # assert rmse_scores are all equal # evaluate grid search again, to make sure that splits are still the same. grid_search.evaluate(data) rmse_scores += [s['RMSE'] for s in grid_search.cv_results['scores']] assert len(set(rmse_scores)) == 1
df = df[cols] reader = Reader(rating_scale=(1, 5)) trainset = Dataset.load_from_df(df, reader) trainset.split(n_folds=10) # http://surprise.readthedocs.io/en/stable/getting_started.html#tune-algorithm-parameters-with-gridsearch # best params: MEAN RMSE: 1.2525329258 param_grid = { 'n_factors': [1], 'n_epochs': [45], 'lr_bu': [0.004], 'lr_bi': [0.008], 'lr_pu': [0.0015], 'lr_qi': [0.000025], 'reg_bu': [0.24], 'reg_bi': [0.24], 'reg_pu': [0.055], 'reg_qi': [0.0085], 'init_mean': [0], 'init_std_dev': [0] } grid_search = GridSearch(SVD, param_grid, measures=['RMSE']) grid_search.evaluate(trainset) print(grid_search.best_score['RMSE']) print(grid_search.best_params['RMSE'])
reader = Reader(rating_scale=(1, 5)) dataset = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], reader) dataset.split(n_folds=5) """ #Sample Run algo = pa.KNNBasic(k=10, min_k=5) perf = evaluate(algo, dataset, measures=['MAE', 'RMSE', 'FCP']) print_perf(perf) """ similarities = ['cosine', 'msd', 'pearson', 'pearson_baseline'] user_based = [True, False] start_time = ('Timestamp: {:%Y-%b-%d %H:%M:%S}'.format( datetime.datetime.now())) sim_options = {'name': similarities, 'user_based': user_based} param_grid = { 'k': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100], 'min_k': [5], 'sim_options': sim_options } grid_search = GridSearch(pa.KNNBasic, param_grid=param_grid, measures=['MAE', 'RMSE', 'FCP']) grid_search.evaluate(dataset) results_df = pd.DataFrame.from_dict(grid_search.cv_results) results_df.to_csv("KNNBasic_Results.csv") end_time = ('Timestamp: {:%Y-%b-%d %H:%M:%S}'.format(datetime.datetime.now())) print "Start Time: ", start_time print "End Time: ", end_time
""" This module describes how to manually train and test an algorithm without using the evaluate() function. """ from __future__ import (absolute_import, division, print_function, unicode_literals) from surprise import GridSearch from surprise import SVD from surprise import Dataset param_grid = {'n_epochs': [5, 10], 'lr_all': [0.002, 0.005], 'reg_all': [0.4, 0.6]} grid_search = GridSearch(SVD, param_grid, measures={'RMSE', 'FCP'}, verbose=False) # Prepare Data data = Dataset.load_builtin('ml-100k') data.split(n_folds=3) grid_search.evaluate(data) # best RMSE score print(grid_search.best_score['RMSE']) # >>> 0.96117566386 # combination of parameters that gave the best RMSE score print(grid_search.best_params['RMSE']) # >>> {'reg_all': 0.4, 'lr_all': 0.005, 'n_epochs': 10}
print(ratings_explicit.shape) # #### split the ratings table into taining and testing dataset ratings_train, ratings_test = train_test_split( ratings_explicit, stratify=ratings_explicit['UserID'], test_size=0.30, random_state=0) # # reader = Reader(rating_scale=(1, 5)) data = Dataset.load_from_df(ratings_train[['UserID', 'ISBN', 'Rating']], reader) parameter_grid = {'n_factors': [50, 100, 150, 200, 250, 300]} grid_search = GridSearch(SVD, parameter_grid, measures=['RMSE', 'MAE']) grid_search.evaluate(data) best_parameters = grid_search.best_params print(best_parameters) # best RMSE and MAE score best_result = grid_search.best_score print(best_result) # In[ ]:
# shuffle ratings if you want random.shuffle(raw_ratings) # A = 90% of the data, B = 10% of the data threshold = int(.9 * len(raw_ratings)) A_raw_ratings = raw_ratings[:threshold] B_raw_ratings = raw_ratings[threshold:] data.raw_ratings = A_raw_ratings # data is now the set A data.split(n_folds=3) # Select your best algo with grid search. print('Grid Search...') param_grid = {'n_epochs': [5, 10], 'lr_all': [0.002, 0.005]} grid_search = GridSearch(SVD, param_grid, measures={'RMSE'}, verbose=True) grid_search.evaluate(data) algo = grid_search.best_estimator['RMSE'] # retrain on the whole set A trainset = data.build_full_trainset() algo.train(trainset) # Compute biased accuracy on A predictions = algo.test(trainset.build_testset()) print('Biased accuracy on A,', end=' ') accuracy.rmse(predictions) # Compute unbiased accuracy on B testset = data._construct_testset(B_raw_ratings) # testset is now the set B
def recommend(self, uids, n_items=10, verbose=False): if verbose: print('■ ■ ■ {} ■ ■ ■'.format(self.algorithm.__name__)) data = self.data trained_model = os.path.expanduser(self.dump_file_name) try: _, algo = dump.load(trained_model) except FileNotFoundError: if verbose: print('■ Performing random sampling on the dataset') raw_ratings = data.raw_ratings np.random.shuffle(raw_ratings) threshold = int(self.trainset_size * len(raw_ratings)) trainset_raw_ratings = raw_ratings[:threshold] testset_raw_ratings = raw_ratings[threshold:] data.raw_ratings = trainset_raw_ratings if any(self.param_grid): if self.perf_measure not in ['rmse', 'mae', 'fcp']: raise ValueError('■ Invalid accuracy measurement provided') if verbose: print('■ Performing Grid Search') data.split(n_folds=self.n_folds) grid_search = GridSearch(self.algorithm, param_grid=self.param_grid, measures=[self.perf_measure], verbose=verbose) grid_search.evaluate(data) algo = grid_search.best_estimator[self.perf_measure] if self.sim_options is not None: algo.sim_options = self.sim_options if self.bsl_options is not None: algo.bsl_options = self.bsl_options if verbose: print('■ Grid Search summary') cv_results = grid_search.cv_results del cv_results['scores'] df = pd.DataFrame.from_dict(cv_results) sort_column = self.perf_measure.upper() if df.columns.contains(sort_column): df = df.sort_values([sort_column], ascending=True) pretty_print(df) print('■ Algorithm properties') print_object(algo) else: algo = self.algorithm() algo.verbose = verbose if verbose: print('■ Training using trainset') trainset = data.build_full_trainset() algo.train(trainset) print('■ Evaluating using testset') testset = data.construct_testset(testset_raw_ratings) predictions = algo.test(testset) accuracy.rmse(predictions) if verbose: print('■ Using the best estimator on the full dataset') data = self.data trainset = data.build_full_trainset() if self.anti_testset: testset = trainset.build_anti_testset() else: testset = trainset.build_testset() start = default_timer() algo.train(trainset) predictions = algo.test(testset) if self.dump_model: if verbose: print('■ Saving the trained model') dump.dump(trained_model, predictions, algo, verbose) print('■ Accuracy scores') accuracy.mae(predictions) accuracy.rmse(predictions) self.print_precision_call(predictions, uids, n_items) recommendations = self.get_recommendations_for_users( uids, predictions, n_items) duration = default_timer() - start duration = datetime.timedelta(seconds=math.ceil(duration)) print('■ Time elapsed:', duration) if verbose: print('■ Recommendations:') pretty_print(recommendations) return recommendations
axarr[1].plot(best_item_est_oma, marker='o') axarr[1].plot(best_item_est, marker='<') axarr[1].plot(best_item_est_svd, marker='>') f.show() raise ('ddd') #%% SVD param_grid_SVD = { 'n_factors': [5, 10, 15, 20, 40, 80], 'n_epochs': [35], 'lr_all': [0.007, 0.005, 0.003], 'reg_all': [0.005, 0.01, 0.02, 0.05] } grid_search = GridSearch(SVD, param_grid_SVD, measures=['MAE', 'RMSE']) grid_search.evaluate(data) # best MAE print('best params (MAE): ' + str(grid_search.best_params['MAE'])) # combination of parameters that gave the best FCP score print('best params (RMSE): ' + str(grid_search.best_params['RMSE'])) params = grid_search.best_params['MAE'] algo_SVD = SVD(verbose=True, n_factors=params['n_factors'], n_epochs=params['n_epochs'], lr_all=params['lr_all'], reg_all=params['reg_all'])