示例#1
0
def co_clustering():
    print('Algoritmo CoClustering...')
    print('Que data desea utilizar')
    print('(1) Android')
    print('(2) WordPress')
    data_utilizar = input()

    # Funcion de encoding para no tener error de lectura del archivo.
    reload(sys)
    sys.setdefaultencoding('utf8')
    sys.setdefaultencoding('latin-1')

    if data_utilizar == 1:
        file_path = configuration.FILE_PATH_ANDROID
        reader = Reader(line_format='user item rating', sep='\t')
    else:
        file_path = configuration.FILE_PATH_WORDPRESS
        file_path_corregido = configuration.FILE_PATH_WORDPRESS_CORREGIDA
        util.corregir_csv(file_path, file_path_corregido, sep="|")
        reader = Reader(line_format='user item rating', sep='|')

    data = Dataset.load_from_file(file_path_corregido, reader=reader)
    data.split(n_folds=10)

    algo = CoClustering()

    perf = evaluate(algo, data, measures=['RMSE', 'MAE'])
    print_perf(perf)
示例#2
0
def surpriseTesting():
    """scikit-surprise library testing"""
    # Load the movielens-100k dataset (download it if needed),
    # and split it into 3 folds for cross-validation.
    data = surprise.Dataset.load_builtin('ml-100k')

    #  reader = surprise.Reader(line_format='user item rating', sep=',')
    #  data = Dataset.load_from_file('temp.csv', reader=reader)

    trainSet = data.build_full_trainset()
    data.split(n_folds=3)
    for rating in data.build_full_trainset().all_ratings():
        print(rating)

    print(trainSet.n_items)
    algo = SVD()
    #  algo = KNNBasic()
    algo.fit(trainSet)
    # Evaluate performances of our algorithm on the dataset.
    perf = surprise.evaluate(algo, data, measures=['RMSE', 'MAE'])

    surprise.print_perf(perf)
    uid = str(
        196)  # raw user id (as in the ratings file). They are **strings**!
    iid = str(
        242)  # raw item id (as in the ratings file). They are **strings**!

    # get a prediction for specific users and items.
    pred = algo.predict(uid, iid, r_ui=-1, verbose=True)
    print(pred.est)
示例#3
0
def ibcf_eval(co_pe):
    kfold = input("Enter number of folds required to Evaluate:")

    reader = Reader(line_format="user item rating",
                    sep='\t',
                    rating_scale=(1, 5))
    df = Dataset.load_from_file('ml-100k/u.data', reader=reader)

    splitter(kfold, df)

    # SIMILARITY & ALGORITHM DEFINING
    sim_op = {'name': co_pe, 'user_based': False}
    algo = KNNBasic(sim_options=sim_op)

    # RESPONSIBLE TO EXECUTE DATA SPLITS MENTIONED IN STEP 4
    start = time.time()
    perf = evaluate(
        algo,
        df,
        measures=['RMSE', 'MAE'],
    )
    end = time.time()

    print_perf(perf)

    print "\nTotal Time elapsed =", (end - start)
    print "Average time per fold =", (end - start) / kfold, "\n"

    return perf
示例#4
0
def batchrunSVDpp(data, al, folds):
    '''
    define a function to run batches of data
    Args:
        data: data file name in string.
        al: algorithm name in string.
        folds: split the data into x folds for cross-validation, interger
    Returns:
        None	
	'''

    #load the data with given data format
    print "load data..."
    data = Dataset.load_from_file(path + data, reader=reader)

    #split the data into x folds for cross-validation.
    print "Split data...."
    data.split(n_folds=folds)

    # We'll use the famous SVDpp algorithm.

    if al == 'SVDpp':
        algo = SVDpp()
    elif al == 'Base':
        algo = BaselineOnly(bsl_options=bsl_options)

    # Evaluate performances of the algorithm on the dataset.
    perf = evaluate(algo, data, measures=['RMSE', 'MAE'])

    print_perf(perf)
示例#5
0
def knn(data):
    data.split(n_folds=3)
    # We'll use the famous KNNBasic algorithm.
    knn = KNNBasic()
    # Evaluate performances of our algorithm on the dataset.
    perf = evaluate(knn, data, measures=['RMSE', 'MAE'])
    print_perf(perf)
示例#6
0
def svd_pp():
    print('Algoritmo Baseline Only...')
    print('Que data desea utilizar?')
    print('(1) Android')
    print('(2) WordPress')
    data_utilizar = input()

    # Funcion de encoding para no tener error de lectura del archivo.
    reload(sys)
    sys.setdefaultencoding('utf8')

    if data_utilizar == 1:
        file_path = configuration.FILE_PATH_ANDROID
        reader = Reader(line_format='user item rating', sep='\t')
    else:
        file_path = configuration.FILE_PATH_WORDPRESS
        reader = Reader(line_format='user item rating', sep=',')

    # Dataset
    data = Dataset.load_from_file(file_path, reader=reader)
    data.split(n_folds=10)

    algo = SVDpp()

    perf = evaluate(algo, data, measures=['RMSE', 'MAE'])
    print_perf(perf)
示例#7
0
def ubcf_eval(co_pe):
    kfold = input("Enter number of folds required to Evaluate:")

    reader = Reader(line_format="user item rating", sep='\t', rating_scale=(1, 5))
    df = Dataset.load_from_file('ml-100k/u.data', reader=reader)

    splitter(kfold,df)

    # SIMILARITY & ALGORITHM DEFINING
    sim_op = {'name': co_pe, 'user_based': True}
    algo = KNNBasic(sim_options=sim_op)

    # RESPONSIBLE TO EXECUTE DATA SPLITS MENTIONED IN STEP 4
    start = time.time()
    perf = evaluate(algo, df, measures=['RMSE', 'MAE'], )
    end = time.time()

    print_perf(perf)

    print "\nTotal Time elapsed =", (end - start)
    print "Average time per fold =", (end - start)/kfold, "\n"

    print perf

    ds = pd.read_csv("pred_matrix-full_ubcf.csv")
    confusion_matrix = np.matrix(ds)

    FP = confusion_matrix.sum(axis=0) - np.diag(confusion_matrix)
    FN = confusion_matrix.sum(axis=1) - np.diag(confusion_matrix)
    TP = np.diag(confusion_matrix)
    TN = confusion_matrix.sum() - (FP + FN + TP)

    # Sensitivity, hit rate, recall, or true positive rate
    TPR = TP / (TP + FN)
    # Specificity or true negative rate
    TNR = TN / (TN + FP)
    # Precision or positive predictive value
    PPV = TP / (TP + FP)
    # Negative predictive value
    NPV = TN / (TN + FN)
    # Fall out or false positive rate
    FPR = FP / (FP + TN)
    # False negative rate
    FNR = FN / (TP + FN)
    # False discovery rate
    FDR = FP / (TP + FP)

    # Overall accuracy
    ACC = (TP + TN) / (TP + FP + FN + TN)

    print "\nTrue Positive:\n", TP, "\n\nTrue Negative\n", TN, "\n\nFalse Positive\n", FP, "\n\nFalse Negative\n", FN
    print "-" * 30
    print "\nTrue Postive Ratio =", TPR, "\n\nFalse Positive Ratio =", FPR
    print "-" * 30

    print "*" * 20
    print confusion_matrix

    print "Accuracy with current Algorithm", algo, "is ", ACC.mean(axis=0)
示例#8
0
def q7():
    file_path = os.path.expanduser('restaurant_ratings.txt')
    reader = Reader(line_format='user item rating timestamp', sep='\t')
    data = Dataset.load_from_file(file_path, reader=reader)

    data.split(n_folds=3)

    algo = NMF()
    perf = evaluate(algo, data, measures=['RMSE', 'MAE'])
    print_perf(perf)
示例#9
0
def IBCFpearson():
    file_path = os.path.expanduser('restaurant_ratings.txt')
    reader = Reader(line_format='user item rating timestamp', sep='\t')
    data = Dataset.load_from_file(file_path, reader=reader)

    data.split(n_folds=3)

    algo = KNNBasic(sim_options={'name': 'pearson', 'user_based': False})
    perf = evaluate(algo, data, measures=['RMSE', 'MAE'])
    print_perf(perf)
示例#10
0
def runSurprise(algo,
                train,
                test,
                algo_string,
                n_folds=5,
                writeCSV=False,
                file_name="result.csv"):
    """
    Run the fitting procedure on the training data. Write the result for the test
    data in its "Result" data field.

    Args:
        algo: Surprise algorithm (SVD, SVDpp, NMF, etc)
        train (Panda DataFrame): training data
        test (Panda dDtaFrame): test data
        algo_string (string): printable name of the algorithm
        n_folds (int): Number of k-folds
        writeCSV (bool): set to True to write a .csv submission
        file_name (string): name for the .csv file
    """
    if writeCSV:
        sub = datahelper.load_submission()

    df = pd.DataFrame(train)

    # A reader is needed but only the rating_scale param is requiered.
    reader = Reader(rating_scale=(1, 5))
    # The columns must correspond to user id, item id and ratings (in that order).
    data = Dataset.load_from_df(df[['User', 'Item', 'Rating']], reader)
    random.seed(42)
    data.split(n_folds=n_folds)

    # Evaluate performances of our algorithm on the dataset.
    perf = evaluate(algo, data, measures=['RMSE'])
    print_perf(perf)

    for index, row in test.iterrows():
        test.at[index, "Result"] = algo.estimate(row['User'] - 1,
                                                 row['Item'] - 1)

    if writeCSV:
        file_out = open(file_name, 'w')
        file_out.truncate()
        file_out.write('Id,Prediction\n')

        for index, row in sub.iterrows():
            file_out.write("r{us}_c{mo},{res}\n".format(us=row['User'],
                                                        mo=row['Item'],
                                                        res=algo.estimate(
                                                            row['User'] - 1,
                                                            row['Item'] - 1)))
        file_out.close()
示例#11
0
def surprise_cross_validate(algo, data, *options):
    """
    3-Fold cross-validation on surprise recommendation model.

    Args:
        algo: instansitated recommender model
        data: surprise dataframe
        *options: additional parameter options to gridsearch on

    Returns:
        Mean RMSE of 3-Fold cross-validated model.
    """
    perf = evaluate(algo, data, measures=['RMSE'])
    print_perf(perf)
示例#12
0
def number15():
    data.split(n_folds=3)
    k_num = 1
    ks = []
    ubcf_rmses = []
    ibcf_rmses = []
    while k_num <= 101:
        ubcf_msd_algo = KNNBasic(k=k_num,
                                 sim_options={
                                     'name': 'MSD',
                                     'user_based': True
                                 })
        ubcf_perf = evaluate(ubcf_msd_algo, data, measures=['RMSE'])
        print_perf(ubcf_perf)
        for key, value in ubcf_perf.items():
            mean = 0
            for v in value:
                mean = mean + v
            print(mean)
            mean = mean / 3
            print(mean)
            ubcf_rmses.append(mean)

        ibcf_msd_algo = KNNBasic(k=k_num,
                                 sim_options={
                                     'name': 'MSD',
                                     'user_based': False
                                 })
        ibcf_perf = evaluate(ibcf_msd_algo, data, measures=['RMSE'])
        print_perf(ibcf_perf)
        for key, value in ibcf_perf.items():
            mean = 0
            for v in value:
                mean = mean + v
            print(mean)
            mean = mean / 3
            print(mean)
            ibcf_rmses.append(mean)

        print(k_num)
        ks.append(k_num)
        k_num += 10

    plt.bar(ks, ubcf_rmses)
    plt.show()
    plt.bar(ks, ibcf_rmses)
    plt.show()
示例#13
0
def knn_baseline():
    print('Algoritmo KNN Baseline...')
    print('Que data desea utilizar?')
    print('(1) Android')
    print('(2) WordPress')
    data_utilizar = input()

    #Funcion de encoding para no tener error de lectura del archivo.
    reload(sys)
    sys.setdefaultencoding('utf8')

    if data_utilizar == 1:
        file_path = configuration.FILE_PATH_ANDROID
        reader = Reader(line_format='user item rating', sep='\t')
    else:
        file_path = configuration.FILE_PATH_WORDPRESS
        reader = Reader(line_format='user item rating', sep=',')

    # Dataset
    data = Dataset.load_from_file(file_path, reader=reader)
    data.split(n_folds=10)
    """Segmento que utiliza KNN para el analisis:
        'k' Es el numero maximo de vecinos a tomar en cuenta para la agregacion
        'min_k' El numero minimo de vecinos a tomar en cuenta para la agregacion.
            Si no hay suficientes vecinos,la predicción se establece en la media global de todas las calificaciones
        'sim_options' son las opciones de similitud que utiliza el knn
        'bsl_options' configuracion de las estimaciones de base"""

    k = 40
    min_k = 1
    sim_options = {
        'name': 'pearson_baseline',
        'user_based': 0  # no shrinkage
    }
    bsl_options = {'method': 'als', 'n_epochs': 5, 'reg_u': 12, 'reg_i': 5}

    algo = KNNBaseline(k=k,
                       min_k=k,
                       sim_options=sim_options,
                       bsl_options=bsl_options)

    perf = evaluate(algo, data, measures=['RMSE', 'MAE'])
    print_perf(perf)
示例#14
0
def number12():
    data.split(n_folds=3)
    svd_algo = SVD()
    perf = evaluate(svd_algo, data, measures=['RMSE', 'MAE'])
    print_perf(perf)

    pmf_algo = SVD(biased=False)
    perf = evaluate(pmf_algo, data, measures=['RMSE', 'MAE'])
    print_perf(perf)

    nmf_algo = NMF()
    perf = evaluate(nmf_algo, data, measures=['RMSE', 'MAE'])
    print_perf(perf)

    ubcf_algo = KNNBasic(sim_options={'user_based': True})
    perf = evaluate(ubcf_algo, data, measures=['RMSE', 'MAE'])
    print_perf(perf)

    ibcf_algo = KNNBasic(sim_options={'user_based': False})
    perf = evaluate(ibcf_algo, data, measures=['RMSE', 'MAE'])
    print_perf(perf)
file_path = os.path.expanduser('restaurant_ratings.txt')
reader = Reader(line_format='user item rating timestamp', sep='\t')
data3Folds = Dataset.load_from_file(file_path, reader=reader)

data3Folds.split(n_folds=3)

#
# 3-Folds Comparison
#
if threeFolds == True:
    print('SVD')
    algoSVD = SVD()
    start_time = time.time()
    perfSVD = evaluate(algoSVD,data3Folds,measures=['RMSE','MAE'])
    end_time = time.time()
    print_perf(perfSVD)
    print(end_time - start_time, '\n\n')
    
    
    
    #PMF
    algoPMF = SVD(biased=False)
    start_time = time.time()
    perfPMF = evaluate(algoPMF,data3Folds,measures=['RMSE','MAE'])
    end_time = time.time()
    print_perf(perfPMF)
    print(end_time - start_time, '\n\n')
    
    print('NMF')
    algoNMF = NMF()
    start_time = time.time()
示例#16
0
 def evaluate(self):
     print_perf(self.metrics(None))
示例#17
0
 def evaluate(self):
   print_perf(self.metrics(None))
data=data.merge(activity_count, on=['user', 'hotel'], how='left')
data['browse']=data.browse.fillna(0)
data=data[['user', 'hotel', 'browse']]


# tentatively CV test for some algorithms
reader = Reader(rating_scale=(0, 1))
data = Dataset.load_from_df(data, reader)

data_cv=data
data_cv.split(n_folds=5)

# SVD test
svd = SVD()
perf = evaluate(svd, data, measures=['RMSE'])
print_perf(perf)      # MSE 0.052

param_svd = {'n_factors': [50, 100], 'lr_all': [0.003, 0.005],
              'reg_all': [0.05, 0.1, 0.5]}
gs = GridSearch(SVD, param_svd, measures=['RMSE'])
gs.evaluate(data_cv) # RMSE 0.2272 ~ 0.2284, after many tests notice 0.2272 is a benchmark, 100, 0.003, 0.1

# Co-clustering test
coc=CoClustering()
perf = evaluate(coc, data, measures=['RMSE'])
print_perf(perf)     # MSE 0.053

param_svd = {'n_cltr_u': [3, 5, 7], 'n_cltr_i': [3, 5, 7],
              'n_epochs': [10, 20]}
gs = GridSearch(CoClustering, param_svd, measures=['RMSE'])
gs.evaluate(data_cv)  # generally worse than SVD here, especially for larger cluster numbers
示例#19
0
#file_path = os.path.expanduser('restaurant_ratings')
reader = Reader(line_format='user item rating timestamp', sep='\t')
data = Dataset.load_from_file('restaurant_ratings.txt', reader=reader)

data.split(n_folds=3)

#Starting dataframe to store needed values
df = pd.DataFrame([],
                  index=[0, 1, 2, 3, 4, 5, 6, 7],
                  columns=[
                      'Algorithm', 'RMSE Fold 1', 'RMSE Fold 2', 'RMSE Fold 3',
                      'RMSE Mean', 'MAE Fold 1', 'MAE Fold 2', 'MAE Fold 3',
                      'MAE Mean'
                  ])
'''
#SVD algorithm
algo = SVD()
perf = evaluate(algo,data,measures=['RMSE','MAE'])
print_perf(perf)
setDF(perf,'SVD',0)

print '\n'
#PMF algorithm
algo = SVD(biased=False)
perf = evaluate(algo,data,measures=['RMSE','MAE'])
print_perf(perf)
setDF(perf,'PMF',1)

print '\n'
#NMF algorithm
示例#20
0
import os

# 指定文件所在路径
file_path = os.path.expanduser('Surprise.csv')
# 告诉文本阅读器,文本的格式是怎么样的
reader = Reader(line_format='user item rating', sep=',')
# 加载数据
data = Dataset.load_from_file(file_path, reader=reader)

#data = Dataset.load_builtin('ml-100k')
### 使用NormalPredictor
from surprise import NormalPredictor

algo = NormalPredictor()
perf = cross_validate(algo, data, measures=['RMSE', 'MAE', 'FCP'], cv=3)
print_perf(perf)

### 使用BaselineOnly
from surprise import BaselineOnly

algo = BaselineOnly()
perf = cross_validate(algo, data, measures=['RMSE', 'MAE', 'FCP'], cv=3)
print_perf(perf)

### 使用基础版协同过滤
from surprise import KNNBasic, evaluate

algo = KNNBasic()
perf = cross_validate(algo, data, measures=['RMSE', 'MAE', 'FCP'], cv=3)
print_perf(perf)
示例#21
0
def surprise_algorithms_print_perf():
    print('Surprise Algorithms (Tabla de resultados finales)...')
    print('Que data desea utilizar?')
    print('(1) Android')
    print('(2) WordPress')
    data_utilizar = input()

    # Funcion de encoding para no tener error de lectura del archivo.
    reload(sys)
    sys.setdefaultencoding('utf8')

    if data_utilizar == 1:
        file_path = configuration.FILE_PATH_ANDROID
        reader = Reader(line_format='user item rating', sep='\t')
    else:
        file_path = configuration.FILE_PATH_WORDPRESS
        reader = Reader(line_format='user item rating', sep=',')

    # Dataset
    data = Dataset.load_from_file(file_path, reader=reader)
    data.split(n_folds=5)

    # BaselineOnly
    algo_normal_predictor = NormalPredictor()
    perf_normal_predictor = evaluate(algo_normal_predictor,
                                     data,
                                     measures=['RMSE', 'MAE'],
                                     verbose=False)

    # SVD
    algo_svd = SVD()
    perf_svd = evaluate(algo_svd,
                        data,
                        measures=['RMSE', 'MAE'],
                        verbose=False)

    # BaselineOnly
    algo_baseline_only = BaselineOnly()
    perf_baseline_only = evaluate(algo_baseline_only,
                                  data,
                                  measures=['RMSE', 'MAE'],
                                  verbose=False)

    # SVDpp
    algo_svdpp = SVDpp()
    perf_svdpp = evaluate(algo_svdpp,
                          data,
                          measures=['RMSE', 'MAE'],
                          verbose=False)

    # NMF
    algo_nmf = NMF()
    perf_nmf = evaluate(algo_nmf,
                        data,
                        measures=['RMSE', 'MAE'],
                        verbose=False)

    # SlopeOne
    algo_slope_one = SlopeOne()
    perf_slope_one = evaluate(algo_slope_one,
                              data,
                              measures=['RMSE', 'MAE'],
                              verbose=False)

    # CoClustering
    algo_coclustering = CoClustering()
    perf_coclustering = evaluate(algo_coclustering,
                                 data,
                                 measures=['RMSE', 'MAE'],
                                 verbose=False)
    """Segmento que utiliza KNN para el analisis:
        'k' Es el numero maximo de vecinos a tomar en cuenta para la agregacion
        'min_k' El numero minimo de vecinos a tomar en cuenta para la agregacion.
            Si no hay suficientes vecinos,la predicción se establece en la media global de todas las calificaciones
        'sim_options' son las opciones de similitud que utiliza el knn
        'bsl_options' configuracion de las estimaciones de base"""

    k = 40
    min_k = 1
    sim_options = {
        'name': 'pearson_baseline',
        'user_based': 0  # no shrinkage
    }
    bsl_options = {'method': 'als', 'n_epochs': 5, 'reg_u': 12, 'reg_i': 5}

    algo_knn_basic = KNNBasic(k=k, min_k=k, sim_options=sim_options)
    perf_knn_basic = evaluate(algo_knn_basic,
                              data,
                              measures=['RMSE', 'MAE'],
                              verbose=False)

    algo_knn_with_means = KNNWithMeans(k=k, min_k=k, sim_options=sim_options)
    perf_knn_with_means = evaluate(algo_knn_with_means,
                                   data,
                                   measures=['RMSE', 'MAE'],
                                   verbose=False)

    algo_knn_base_line = KNNBaseline(k=k,
                                     min_k=k,
                                     sim_options=sim_options,
                                     bsl_options=bsl_options)
    perf_knn_base_line = evaluate(algo_knn_base_line,
                                  data,
                                  measures=['RMSE', 'MAE'],
                                  verbose=False)
    """Imprimiendo resultados de los algoritmos"""
    print('')
    print('Printing results from algorithms...')
    print('- Normal predictor')
    print_perf(perf_normal_predictor)
    print('')
    print('- Normal SVD')
    print_perf(perf_svd)
    print('')
    print('- Normal Baseline Only')
    print_perf(perf_baseline_only)
    print('')
    print('- Normal SVD++')
    print_perf(perf_svdpp)
    print('')
    print('- Normal NMF')
    print_perf(perf_nmf)
    print('')
    print('- Normal Slope One')
    print_perf(perf_slope_one)
    print('')
    print('- Normal Co-Clustering')
    print_perf(perf_coclustering)
    print('')
    print('- Normal KNN Basic')
    print_perf(perf_knn_basic)
    print('')
    print('- Normal KNN With Means')
    print_perf(perf_knn_with_means)
    print('')
    print('- Normal KNN Base Line')
    print_perf(perf_knn_base_line)
示例#22
0
import pandas as pd
from surprise import prediction_algorithms as pa
from surprise import Dataset, Reader, GridSearch
from surprise import evaluate, print_perf
import datetime

data = pd.read_csv('./movielens_small/ratings.csv')
df = pd.DataFrame(data)
df.drop('timestamp', axis=1, inplace=True)
print df.head()

reader = Reader(rating_scale=(1, 5))
dataset = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], reader)
dataset.split(n_folds=5)
"""
#Sample Run
algo = pa.KNNBasic(k=10, min_k=5)
perf = evaluate(algo, dataset, measures=['MAE', 'RMSE', 'FCP'])
print_perf(perf)
"""

similarities = ['cosine', 'msd', 'pearson', 'pearson_baseline']
user_based = [True, False]

start_time = ('Timestamp: {:%Y-%b-%d %H:%M:%S}'.format(
    datetime.datetime.now()))
sim_options = {'name': similarities, 'user_based': user_based}
param_grid = {
    'k': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    'min_k': [5],
    'sim_options': sim_options
from surprise import SVD
from surprise import Dataset, print_perf
from surprise.model_selection import cross_validate

# 默认载入movielens数据集
data = Dataset.load_builtin('ml-100k')
algo = SVD()
# 在数据集上测试一下效果
perf = cross_validate(algo, data, measures=['RMSE'], cv=3)# RMSE(均方根误差)
#输出结果
print_perf(perf)
示例#24
0
 def eval(self):
     # Evaluate performances of our algorithm on the dataset.
     perf = evaluate(self.svd, self.data, measures=['RMSE'])
     print_perf(perf)
示例#25
0
from surprise import KNNBasic
from surprise import Dataset
from surprise import evaluate, print_perf
from surprise import Reader
import os
#load data from a file
file_path = os.path.expanduser('restaurant_ratings.txt')
reader = Reader(line_format='user item rating timestamp', sep='\t')
data = Dataset.load_from_file(file_path, reader=reader)
data.split(n_folds=3)
algo = KNNBasic(sim_options={'name': 'pearson', 'user_based': True})
perf = evaluate(algo, data, measures=['RMSE', 'MAE'])
print_perf(perf)
示例#26
0
def number14():
    data.split(n_folds=3)
    ubcf_msd_algo = KNNBasic(sim_options={'name': 'MSD', 'user_based': True})
    perf = evaluate(ubcf_msd_algo, data, measures=['RMSE', 'MAE'])
    print_perf(perf)

    ubcf_cosine_algo = KNNBasic(sim_options={
        'name': 'cosine',
        'user_based': True
    })
    perf = evaluate(ubcf_cosine_algo, data, measures=['RMSE', 'MAE'])
    print_perf(perf)

    ubcf_pearson_algo = KNNBasic(sim_options={
        'name': 'pearson',
        'user_based': True
    })
    perf = evaluate(ubcf_pearson_algo, data, measures=['RMSE', 'MAE'])
    print_perf(perf)

    ibcf_msd_algo = KNNBasic(sim_options={'name': 'MSD', 'user_based': False})
    perf = evaluate(ibcf_msd_algo, data, measures=['RMSE', 'MAE'])
    print_perf(perf)

    ibcf_cosine_algo = KNNBasic(sim_options={
        'name': 'cosine',
        'user_based': False
    })
    perf = evaluate(ibcf_cosine_algo, data, measures=['RMSE', 'MAE'])
    print_perf(perf)

    ibcf_pearson_algo = KNNBasic(sim_options={
        'name': 'pearson',
        'user_based': False
    })
    perf = evaluate(ibcf_pearson_algo, data, measures=['RMSE', 'MAE'])
    print_perf(perf)