示例#1
0
def knnz_running_time(data):
    '''
        Calculates the running times for training and predictions for KNN with Z-score

        Args:
            data(Dataset): a list of datasets with different numbers of users

        Returns:
            elapsedtime_KnnZtrain: running time for training
            elapsedtime_KnnZtest: running time for predictions on testset
    '''
    elapsedtime_KnnZtrain = []
    elapsedtime_KnnZtest = []

    # tune the parameters on the entire data
    param_grid = {
        'k': [5, 10, 20],
        'sim_options': {
            'name': ['msd', 'cosine', 'pearson'],
            'min_support': [1, 5],
            'user_based': [False]
        }
    }
    grid_search = GridSearch(KNNWithZScore,
                             param_grid,
                             measures=['RMSE'],
                             verbose=False)
    grid_search.evaluate(data[3])
    param = grid_search.best_params['RMSE']
    k = param['k']
    sim = param['sim_options']['name']
    min_support = param['sim_options']['min_support']
    user_based = param['sim_options']['user_based']

    # using the tuned parameters calculate running times
    for i in range(len(data)):
        # training running time
        training_start = time.time()
        training = data[i].build_full_trainset()
        testing = training.build_anti_testset()
        knnz = KNNWithZScore(k=k,
                             name=sim,
                             min_support=min_support,
                             user_based=user_based)
        knnz.train(training)
        elapsedtime_KnnZtrain.append(time.time() - training_start)

        # prediction running time
        test_start = time.time()
        knnz.test(testing)
        elapsedtime_KnnZtest.append(time.time() - test_start)
    return elapsedtime_KnnZtrain, elapsedtime_KnnZtest
示例#2
0
def knn_z(data, training, testing):
    '''
    Tune KNN with Z-score parameters then calculates RMSE, coverage and running time of KNN with Z-score

    Args:
        data(Dataset): the whole dataset divided into 5 folds
        training(Dataset): training dataset
        testing(Dataset): test dataset

    Returns:
        rmse: RMSE of KNN with Z-score with optimized parameters
        top_n: number of unique predictions for top n items
    '''

    # candidate parameters
    knn_param_grid = {'k': [5, 10, 20], 'sim_options': {'name': ['msd', 'cosine', 'pearson'],
                                                        'min_support': [1, 5],'user_based': [False]}}

    # optimize parameters
    knnz_grid_search = GridSearch(KNNWithZScore, knn_param_grid, measures=['RMSE'], verbose=False)
    knnz_grid_search.evaluate(data)
    param = knnz_grid_search.best_params['RMSE']
    print('KNNWithZScore:', param)

    # fit model using the optimized parameters
    knnz = KNNWithZScore(k = param['k'], name=param['sim_options']['name'],
                         min_support=param['sim_options']['min_support'], user_based=param['sim_options']['user_based'])
    knnz.train(training)

    # evaluate the model using test data
    predictions = knnz.test(testing)
    rmse = accuracy.rmse(predictions, verbose=True)
    top_n = get_top_n(predictions, n=5)

    return rmse, top_n
示例#3
0
    def CFZ(self):
        kf = KFold(n_splits=5)
        sim_options = {'name': 'cosine', 'user_based': True}
        algo = KNNWithZScore(k=40, min_k=1, sim_options=sim_options)

        for trainset, testset in kf.split(self.data):
            algo.fit(trainset)
            predictions = algo.test(testset)
            precisions, recalls = self.precision_recall_at_k(predictions)

            P = sum(prec for prec in precisions.values()) / len(precisions)
            R = sum(rec for rec in recalls.values()) / len(recalls)
            F1 = 2 * P * R / (P + R)

            print("Precision : ", P)
            print("Recall    : ", R)
            print("F1        : ", F1)
示例#4
0
文件: KNN.py 项目: LLNL/MTLRecSys
class KNN_Normalized(BaseSurpriseSTLEstimator):
    def __init__(self, k, name='KNN_Normalized'):
        super().__init__(name, 'non_feature_based')
        self.k = k
        self.model = KNNWithZScore(k=self.k, verbose=False)

    def _fit(self, x):
        self.model.fit(x)

    def _predict(self, x):
        return self.model.test(x)

    def get_hyper_params(self):
        hparams = {'k': {'type': 'integer', 'values': [2, 13]}}
        return hparams

    def set_hyper_params(self, **kwargs):
        self.k = kwargs['k']

    def similarity_matrix(self):
        return self.model.compute_similarities()
示例#5
0
        for q in shrinkage:
            for n1 in k:
                for n2 in min_k:
                    print("================================================")
                    sim_options = {'name': o, 'user_based': p, 'shrinkage': q}

                    algo = KNNWithZScore(k=n1,
                                         min_k=n2,
                                         sim_options=sim_options)

                    algo.train(trainset)

                    print("This is the #" + str(count) +
                          " parameter combination")

                    predictions = algo.test(testset)

                    print("name=" + str(o) + ", user_based=" + str(p) +
                          ", shrinkage=" + str(q) + ", k=" + str(n1) +
                          ", min_k=" + str(n2))

                    accuracy.rmse(predictions, verbose=True)
                    accuracy.fcp(predictions, verbose=True)
                    accuracy.mae(predictions, verbose=True)
                    count = count + 1

name = ['cosine', 'pearson', 'msd']  # where default = 'msd'
user_based = [False]  # user or item based
k = [20, 40]  # maximum neighbors where default = 40
min_k = [1, 5]  # minimum neighbors where default = 1
示例#6
0
    We are setting minimum number of neighbous (min_k) 1 and maximum number of neighbours (k) = 40  
    We train the model on train set '''

algo2 = KNNBasic(sim_options=sim_options, k=40, min_k=1)
algo2.fit(trainset)

predictions2 = algo2.test(testset)
print("RMSE for KNNBasic:", accuracy.rmse(predictions2, verbose=True))

# In[ ]:
''' We build the model by making use of KNNBasic which is collaborative filtering based algorithm. 
    We are setting minimum number of neighbous (min_k) 1 and maximum number of neighbours (k) = 40  
    We train the model on train set '''

algo3 = KNNBaseline(sim_options=sim_options, k=40, min_k=1)
algo3.fit(trainset)

predictions3 = algo3.test(testset)
print("RMSE for KNNBaseline:", accuracy.rmse(predictions3, verbose=True))

# In[ ]:
''' We build the model by making use of KNNBasic which is collaborative filtering based algorithm. 
    We are setting minimum number of neighbous (min_k) 1 and maximum number of neighbours (k) = 40  
    We train the model on train set '''

algo4 = KNNWithZScore(sim_options=sim_options, k=40, min_k=1)
algo4.fit(trainset)

predictions4 = algo4.test(testset)
print("RMSE for KNNBasic:", accuracy.rmse(predictions4, verbose=True))
@author: lishuang
@description: 使用邻域的协同过滤对movie lens进行预测,并采用K折交叉验证
"""

from surprise import KNNWithZScore, Reader, Dataset
from surprise import accuracy
from surprise.model_selection import KFold

# 加载数据
reader = Reader(line_format='user item rating timestamp',
                sep=',',
                skip_lines=1)
data = Dataset.load_from_file('data/ratings.csv', reader)

# ItemCF 计算得分
# 取最相思的用户计算时,只取最相思的k个
algo = KNNWithZScore(k=40,
                     sim_options={
                         'user_based': False,
                         'verbose': 'True'
                     })

kf = KFold(n_splits=3)

for train_set, test_set in kf.split(data):
    algo.fit(train_set)
    pred = algo.test(test_set)
    rmse = accuracy.rmse(pred, verbose=True)
    accuracy.mae(pred, verbose=True)
    print(rmse)