def knn_z(data, training, testing): ''' Tune KNN with Z-score parameters then calculates RMSE, coverage and running time of KNN with Z-score Args: data(Dataset): the whole dataset divided into 5 folds training(Dataset): training dataset testing(Dataset): test dataset Returns: rmse: RMSE of KNN with Z-score with optimized parameters top_n: number of unique predictions for top n items ''' # candidate parameters knn_param_grid = {'k': [5, 10, 20], 'sim_options': {'name': ['msd', 'cosine', 'pearson'], 'min_support': [1, 5],'user_based': [False]}} # optimize parameters knnz_grid_search = GridSearch(KNNWithZScore, knn_param_grid, measures=['RMSE'], verbose=False) knnz_grid_search.evaluate(data) param = knnz_grid_search.best_params['RMSE'] print('KNNWithZScore:', param) # fit model using the optimized parameters knnz = KNNWithZScore(k = param['k'], name=param['sim_options']['name'], min_support=param['sim_options']['min_support'], user_based=param['sim_options']['user_based']) knnz.train(training) # evaluate the model using test data predictions = knnz.test(testing) rmse = accuracy.rmse(predictions, verbose=True) top_n = get_top_n(predictions, n=5) return rmse, top_n
def knnz_running_time(data): ''' Calculates the running times for training and predictions for KNN with Z-score Args: data(Dataset): a list of datasets with different numbers of users Returns: elapsedtime_KnnZtrain: running time for training elapsedtime_KnnZtest: running time for predictions on testset ''' elapsedtime_KnnZtrain = [] elapsedtime_KnnZtest = [] # tune the parameters on the entire data param_grid = { 'k': [5, 10, 20], 'sim_options': { 'name': ['msd', 'cosine', 'pearson'], 'min_support': [1, 5], 'user_based': [False] } } grid_search = GridSearch(KNNWithZScore, param_grid, measures=['RMSE'], verbose=False) grid_search.evaluate(data[3]) param = grid_search.best_params['RMSE'] k = param['k'] sim = param['sim_options']['name'] min_support = param['sim_options']['min_support'] user_based = param['sim_options']['user_based'] # using the tuned parameters calculate running times for i in range(len(data)): # training running time training_start = time.time() training = data[i].build_full_trainset() testing = training.build_anti_testset() knnz = KNNWithZScore(k=k, name=sim, min_support=min_support, user_based=user_based) knnz.train(training) elapsedtime_KnnZtrain.append(time.time() - training_start) # prediction running time test_start = time.time() knnz.test(testing) elapsedtime_KnnZtest.append(time.time() - test_start) return elapsedtime_KnnZtrain, elapsedtime_KnnZtest
count = 1 for o in name: for p in user_based: for q in shrinkage: for n1 in k: for n2 in min_k: print("================================================") sim_options = {'name': o, 'user_based': p, 'shrinkage': q} algo = KNNWithZScore(k=n1, min_k=n2, sim_options=sim_options) algo.train(trainset) print("This is the #" + str(count) + " parameter combination") predictions = algo.test(testset) print("name=" + str(o) + ", user_based=" + str(p) + ", shrinkage=" + str(q) + ", k=" + str(n1) + ", min_k=" + str(n2)) accuracy.rmse(predictions, verbose=True) accuracy.fcp(predictions, verbose=True) accuracy.mae(predictions, verbose=True) count = count + 1