def evaluate_model(self, data, algo): raw_ratings = data.raw_ratings # A = 90% of the data, B = 10% of the data threshold = int(.9 * len(raw_ratings)) A_raw_ratings = raw_ratings[:threshold] B_raw_ratings = raw_ratings[threshold:] data.raw_ratings = A_raw_ratings # train data # retrain on the whole set A trainset = data.build_full_trainset() algo.fit(trainset) # Compute biased accuracy on A testset = trainset.build_testset() predictions = algo.test(testset) print('Biased accuracy on A,', end=' ') accuracy.rmse(predictions, verbose=True) accuracy.mae(predictions, verbose=True) print('len(predictions)') print(len(predictions)) # Compute unbiased accuracy on B testset = data.construct_testset( B_raw_ratings) # testset is now the set B predictions = algo.test(testset) print('Unbiased accuracy on B,', end=' ') accuracy.rmse(predictions, verbose=True) accuracy.mae(predictions, verbose=True) print('len(predictions)') print(len(predictions))
def KNN_pred(self, is_total=0, combin_func='avg'): names = locals() r = Reader(rating_scale=(1, 5)) df = self.testdatas total_test = np.array(df[['uid', 'iid', 'total']]) total_p = self.algos[0].test(total_test) for i in range(1, self.no_of_criteria + 1): # names['c' + str(i) + '_test'] = np.array(df[['uid','iid', 'c' + str(i)]]) names['c' + str(i) + '_test'] = Dataset.load_from_df( df[['uid', 'iid', 'c' + str(i)]], reader=r) names['c' + str(i) + '_test'] = names.get('c' + str(i) + '_test').build_full_trainset() names['c' + str(i) + '_test'] = names.get('c' + str(i) + '_test').build_ names['c' + str(i) + '_p'] = self.algos[i].test( names.get('c' + str(i) + '_test')) multi_p = [] if is_total == 0: if combin_func == 'avg': for i in range(len(total_p)): s = 0 for j in range(1, self.no_of_criteria + 1): s = s + names.get('c' + str(j) + '_p')[i].est avg = s / self.no_of_criteria p = predictions.Prediction(total_p[i].uid, total_p[i].iid, total_p[i].r_ui, avg, total_p[i].details) multi_p.append(p) elif combin_func == 'total_reg': k = self.k b = self.b for i in range(len(total_p)): s = 0 for j in range(self.no_of_criteria): s = s + k[j] * names.get('c' + str(j + 1) + '_p')[i].est s = s + b p = predictions.Prediction(total_p[i].uid, total_p[i].iid, total_p[i].r_ui, s, total_p[i].details) multi_p.append(p) else: if combin_func == 'avg': for i in range(len(total_p)): s = 0 for j in range(1, self.no_of_criteria + 1): s = s + names.get('c' + str(j) + '_p')[i].est avg = (s + total_p[i].est) / (self.no_of_criteria + 1) p = predictions.Prediction(total_p[i].uid, total_p[i].iid, total_p[i].r_ui, avg, total_p[i].details) multi_p.append(p) else: print('总分作为准则不适合用于回归聚合函数') s_mae = round(accuracy.mae(total_p), 4) m_mae = round(accuracy.mae(multi_p), 4) return s_mae, m_mae, total_p, multi_p
def baseline(trainset, testset): algo = BaselineOnly() algo.fit(trainset) print("Predictions") predictions = algo.test(testset) accuracy.rmse(predictions) accuracy.mae(predictions) return(predictions)
def svdalgorithm(trainset, testset): algo = SVD() algo.fit(trainset) print("Predictions") predictions = algo.test(testset) accuracy.rmse(predictions) accuracy.mae(predictions) return(predictions)
def chose_yahoo(file_path): # mae= [] # rmse = [] reader = Reader(line_format='timestamp user item rating', sep='\t')#timestamp #载入数据,包括多准则评分:故事,角色,表演,画面,音乐,以及整体评分 story = Dataset.load_from_file(file_path + 'story.txt', reader=reader) role = Dataset.load_from_file(file_path + 'role.txt', reader=reader) show = Dataset.load_from_file(file_path + 'show.txt', reader=reader) image = Dataset.load_from_file(file_path + 'image.txt', reader=reader) music = Dataset.load_from_file(file_path + 'music.txt', reader=reader) total = Dataset.load_from_file(file_path + 'total.txt', reader=reader) # print('载入数据成功!\n') #按2:8拆分数据 random_states = 180 story_train, story_test = train_test_split(story, random_state = random_states) role_train, role_test = train_test_split(role, random_state = random_states) show_train, show_test = train_test_split(show, random_state = random_states) image_train, image_test = train_test_split(image, random_state = random_states) music_train, music_test = train_test_split(music, random_state = random_states) total_train, total_test = train_test_split(total, random_state = random_states) # print('数据划分成功!\n') #选择的是基于项目的协同过滤算法,项目相似度计算采用cosine方法 sim_options = {'name': 'pearson',#用皮尔森基线相似度避免出现过拟合 'user_based': False} # 基于用户的协同过滤算法 algo1 = KNNWithMeans(sim_options=sim_options) algo2 = KNNWithMeans(sim_options=sim_options) algo3 = KNNWithMeans(sim_options=sim_options) algo4 = KNNWithMeans(sim_options=sim_options) algo5 = KNNWithMeans(sim_options=sim_options) algo6 = KNNWithMeans(sim_options=sim_options) algo1.fit(story_train) algo2.fit(role_train) algo3.fit(show_train) algo4.fit(image_train) algo5.fit(music_train) algo6.fit(total_train) story_p = algo1.test(story_test) role_p = algo2.test(role_test) show_p = algo3.test(show_test) image_p =algo4.test(image_test) music_p = algo5.test(music_test) single_p = algo6.test(total_test) # rmse.append(accuracy.rmse(single_p)) #平均法 # multi_p = avg(story_p, role_p, show_p, image_p, music_p, single_p) #整体回归 P = combine(story_p, role_p, show_p, image_p, music_p, single_p) df = pd.read_csv(file_path + 'all.txt', sep = '\t', names = ['id', 'uid', 'mid', 'total', 'story', 'role', 'show', 'image', 'music']) k, b = totalRegModel(df) multi_p = totalReg(P, k, b, single_p) #基于每个用户的回归 mae = (accuracy.mae(single_p),accuracy.mae(multi_p)) # rmse.append(accuracy.rmse(multi_p)) return mae#, rmse
def generate_svd_recommendation_df() -> pd.DataFrame: # Prepare input DataFrame and algorithm score_df = genearte_score_df() svd_data = MyDataSet(score_df) #Try SVD algo = SVD() full_train_set = svd_data.build_full_trainset() test_set = full_train_set.build_anti_testset() # 5 fold validation score = cross_validate(algo, svd_data, measures=['RMSE', 'MAE'], cv=5, verbose=True) # Fitting the SVD algo.fit(full_train_set) predictions = algo.test(test_set) # Then compute RMSE accuracy.rmse(predictions) # Generate recommendation DataFrame recommendation_df_svd = get_top_n(predictions, n=5) #print (recommendation_df) #Try the NMF nmf_cv = cross_validate(NMF(), svd_data, cv=5, n_jobs=5, verbose=False) algo = NMF() full_train_set = svd_data.build_full_trainset() test_set = full_train_set.build_anti_testset() # 5 fold validation score = cross_validate(algo, svd_data, measures=['RMSE', 'MAE'], cv=5, verbose=True) # Fitting the SVD algo.fit(full_train_set) predictions = algo.test(test_set) # Then compute RMSE accuracy.rmse(predictions) accuracy.mae(predictions) # Generate recommendation DataFrame recommendation_df_svd = get_top_n(predictions, n=5) #print (recommendation_df) #--------------------------------------------------- # as per - https://bmanohar16.github.io/blog/recsys-evaluation-in-surprise knnbasic_cv = cross_validate(KNNBasic(), svd_data, cv=5, n_jobs=5, verbose=False) knnmeans_cv = cross_validate(KNNWithMeans(), svd_data, cv=5, n_jobs=5, verbose=False) knnz_cv = cross_validate(KNNWithZScore(), svd_data, cv=5, n_jobs=5, verbose=False) # Matrix Factorization Based Algorithms svd_cv = cross_validate(SVD(), svd_data, cv=5, n_jobs=5, verbose=False) svdpp_cv = cross_validate(SVDpp(),svd_data, cv=5, n_jobs=5, verbose=False) nmf_cv = cross_validate(NMF(), svd_data, cv=5, n_jobs=5, verbose=False) #Other Collaborative Filtering Algorithms slope_cv = cross_validate(SlopeOne(), svd_data, cv=5, n_jobs=5, verbose=False) coclus_cv = cross_validate(CoClustering(), svd_data, cv=5, n_jobs=5, verbose=False)
def test_mae(): """Tests for the MAE function.""" predictions = [pred(0, 0), pred(1, 1), pred(2, 2), pred(100, 100)] assert mae(predictions) == 0 predictions = [pred(0, 0), pred(0, 2)] assert mae(predictions) == abs(0 - 2) / 2 predictions = [pred(2, 0), pred(3, 4)] assert mae(predictions) == (abs(2 - 0) + abs(3 - 4)) / 2 with pytest.raises(ValueError): mae([])
def test_mae(): """Tests for the MAE function.""" predictions = [pred(0, 0), pred(1, 1), pred(2, 2), pred(100, 100)] assert mae(predictions) == 0 predictions = [pred(0, 0), pred(0, 2)] assert mae(predictions) == abs(0 - 2) / 2 predictions = [pred(2, 0), pred(3, 4)] assert mae(predictions) == (abs(2 - 0) + abs(3 - 4)) / 2 with pytest.raises(ValueError): mae([])
def test_model(model): reader = Reader(line_format='user item rating', sep=',', skip_lines=1) fold_files = [('~/Desktop/Tufts/Fall2018/COMP135/Project3/trainset.csv', '~/Desktop/Tufts/Fall2018/COMP135/Project3/testset.csv')] pdkfold = sp.model_selection.split.PredefinedKFold() clf = model.best_estimator['mae'] data = Dataset.load_from_folds(fold_files, reader=reader) for train, test in pdkfold.split(data): clf.fit(train) preds = clf.test(test) accuracy.mae(preds)
def generate_test_score(test_preds, error_metric): if error_metric == 'rmse': return accuracy.rmse(test_preds) elif error_metric == 'mae': return accuracy.mae(test_preds) elif error_metric == 'fcp': return accuracy.fcp(test_preds)
def use_pearson_baseline(): start = time.time() performance = [] data = Dataset.load_builtin('ml-100k') trainset = data.build_full_trainset() print('Using Pearson baseline') sim_options = { 'name': 'pearson_baseline', 'shrinkage': 0 # no shrinkage } algo_pearson = KNNBasic(sim_options=sim_options) algo_pearson.fit(trainset) testset = trainset.build_anti_testset() predictions_KNN = algo_pearson.test(testset) accuracy_rmse = accuracy.rmse(predictions_KNN) accuracy_mae = accuracy.mae(predictions_KNN) performance.append(accuracy_rmse) performance.append(accuracy_mae) end = time.time() performance.append(end - start) return performance
def run_with_diff_k(self, algo, args, range_, folds=2, test_filter=None, threshold=2, msg=None, modal_name=None): arg_name = { 'KNN': 'k', 'NMF': 'n_factors', 'SVD': 'n_factors' }[modal_name] rmse_by_k = [] mae_by_k = [] k_values = [] for k in range(*range_): k_values.append(k) args.update({arg_name: k}) modal = algo(**args) kf = KFold(n_splits=folds) rmse_by_fold = [] mae_by_fold = [] for trainset, testset in kf.split(self.data): modal.fit(trainset) if test_filter: testset = test_filter(testset, threshold) predictions = modal.test(testset) rmse_by_fold.append(accuracy.rmse(predictions, verbose=True)) mae_by_fold.append(accuracy.mae(predictions, verbose=True)) rmse_by_k.append(np.mean(rmse_by_fold)) mae_by_k.append(np.mean(mae_by_fold)) plt.plot(k_values, rmse_by_k) plt.plot(k_values, mae_by_k) plt.legend(['RMSE', 'MAE']) plt.title(msg) plt.show()
def get_svd_recommender(df, test_size=0.25, path="", exists=False): """ builds and trains an SVD recommender :param df: a dataframe containing user ID's, beer ID's and ratings :param test_size: the fraction of samples that should be reserved for testing :param path: the path to an existing svd recommender that was saved to a file :param exists: whether or not to upload the algo from a saved file :return: trained recommender, list of predictions, and the root mean square error of the recommender """ if exists: return dump.load(path)[1] # allows surprise to read df reader = Reader(rating_scale=(1, 5)) # must load in particular column order data = Dataset.load_from_df(df[['user_id', 'beer_id', 'user_score']], reader) trainset, testset = train_test_split(data, test_size=test_size) algo = SVD() # Train the algorithm on the trainset algo.fit(trainset) # and predict ratings for the testset. test() returns a list of prediction objects # which have several attributes such as est (the prediction) and r_ui (the true rating) predictions = algo.test(testset) # rmse below 1 is considered low rmse = accuracy.rmse(predictions) mae = accuracy.mae(predictions) return algo, predictions, rmse
def use_cosine_similarity(): start = time.time() performance = [] data = Dataset.load_builtin('ml-100k') trainset = data.build_full_trainset() print('Using cosine similarity') sim_options = { 'name': 'cosine', 'user_based': False # compute similarities between items } algo_cosine = KNNBasic(sim_options=sim_options) algo_cosine.fit(trainset) testset = trainset.build_anti_testset() predictions_KNN = algo_cosine.test(testset) accuracy_rmse = accuracy.rmse(predictions_KNN) accuracy_mae = accuracy.mae(predictions_KNN) performance.append(accuracy_rmse) performance.append(accuracy_mae) end = time.time() performance.append(end - start) return performance
def recommender_nmf_baseline(self, train_file, test_file, output): train, test, train_dataset, test_dataset = prepare_datasets( train_file, test_file) # Use user_based true/false to switch between user-based or item-based collaborative filtering algo_nmf_baseline = NMF() algo_nmf_baseline.fit(train) #not_seen_elems = self.merge_train_set(train_dataset, test_dataset) #predictions_precision_svd = algo_svd.test(not_seen_elems, test, verbose=False, not_seen_flag=True) predictions_nmf_baseline = algo_nmf_baseline.test(test, verbose=False) #precisions, recalls = self.precision_recall_at_k(predictions_precision_svd, 10, threshold=0.0) # Precision and recall can then be averaged over all users #precision_avg = sum(prec for prec in precisions.values()) / len(precisions) #recall_avg = sum(rec for rec in recalls.values()) / len(recalls) #print('Precision: ' + str(precision_avg) + ' Recall: ' + str(recall_avg) + ' RMSE: ' + str( # rmse(predictions_svd, verbose=False)) + ' MAE: ' + str(mae(predictions_svd, verbose=False))) print('NMF BASELINE: ' + ' RMSE ' + str(rmse(predictions_nmf_baseline, verbose=False)) + ' MAE ' + str(mae(predictions_nmf_baseline, verbose=False))) return algo_nmf_baseline
def acc(df, alg, algname, n_train, n_users, cutoff=.5): np.random.seed(0) train, test = subswipe_data(df, n_train=n_train, n_test=test_count, n_users=n_users) alg.fit(train) predictions = alg.test(test) # Change predictions to binary choice of left or right. Prediction class derives from NamedTuple. predictions = [ Prediction(prediction.uid, prediction.iid, prediction.r_ui, int(prediction.est < cutoff), prediction.details) for prediction in predictions ] # Mean absolute error. mae = accuracy.mae(predictions) df_predicted = pd.DataFrame(columns=["uid", "iid", "predicted", "actual"]) for prediction in predictions: df_predicted = df_predicted.append( { "uid": prediction.uid, "iid": prediction.iid, "predicted": prediction.est, "actual": df_swipes[prediction.uid].loc[prediction.iid] }, ignore_index=True ) acc_dict = {"algname": algname, "n_train": n_train, "n_users": n_users, "acc": mae} print(acc_dict) return acc_dict
def surprise_algos(train, test, svdpp=False): train_set, test_set = get_train_test(train, test) algos = [NormalPredictor, BaselineOnly, SlopeOne, NMF, SVD] if svdpp: algos.append(SVDpp) values = {} values['Method'] = [] values['RMSE'] = [] values['MAE'] = [] for algo_constructor in algos: name = get_name(algo_constructor) print(name) try: algo = algo_constructor(random_state=0) except: algo = algo_constructor() algo.fit(train_set) predictions = algo.test(test_set) rmse = accuracy.rmse(predictions) mae = accuracy.mae(predictions) values['Method'].append(name) values['RMSE'].append(rmse) values['MAE'].append(mae) return pd.DataFrame(values).sort_values('RMSE', ascending=False).set_index('Method')
def use_sgd(): start = time.time() performance = [] data = Dataset.load_builtin('ml-100k') trainset = data.build_full_trainset() print('Using SGD') bsl_options = { 'method': 'sgd', 'learning_rate': .005, } algo_SGD = BaselineOnly(bsl_options=bsl_options) algo_SGD.fit(trainset) testset = trainset.build_anti_testset() predictions_SGD = algo_SGD.test(testset) accuracy_rmse = accuracy.rmse(predictions_SGD) accuracy_mae = accuracy.mae(predictions_SGD) performance.append(accuracy_rmse) performance.append(accuracy_mae) end = time.time() performance.append(end - start) return performance
def use_als(): start = time.time() performance = [] data = Dataset.load_builtin('ml-100k') trainset = data.build_full_trainset() print('Using ALS') bsl_options = {'method': 'als', 'n_epochs': 20, 'reg_u': 12, 'reg_i': 5} algo_ALS = BaselineOnly(bsl_options=bsl_options) algo_ALS.fit(trainset) testset = trainset.build_anti_testset() predictions_ALS = algo_ALS.test(testset) accuracy_rmse \ = accuracy.rmse(predictions_ALS) accuracy_mae = accuracy.mae(predictions_ALS) performance.append(accuracy_rmse) performance.append(accuracy_mae) end = time.time() performance.append(end - start) return performance
def test_model(self): # Checks the predicted values against the test set # Returns Root Mean Square Error (RMSE) accuracy predictions = self.model.test(self.test) return accuracy.mae(predictions, verbose=False), accuracy.rmse(predictions, verbose=False)
def use_algo(algo, name): start = time.time() algo.fit(trainset) predictions = algo.test(testset) end = time.time() total_time = end - start rmse = accuracy.rmse(predictions, verbose=False) mae = accuracy.mae(predictions, verbose=False) ex_ee = extraction_efficiency(algo, train_affinities, validation_affinities, surprise_get_topk, items) predictions = algo.test(trainset_for_testing) train_rmse = accuracy.rmse(predictions, verbose=False) train_mae = accuracy.mae(predictions, verbose=False) return {"algo": name, "rmse": rmse, "mae": mae, "map": ex_ee["map"], "retrieval_time": ex_ee["retrieval_time"], "train_rmse": train_rmse, "train_mae": train_mae, "time": total_time}
def main(): parser = argparse.ArgumentParser() parser.add_argument("--train_file_path", default="data/train.csv", help="training file path") parser.add_argument("--test_file_path", default="data/test.csv", help="testing file path") parser.add_argument("--approach", default="SVD", help="Baseline | SVD | SlopeOne | NMF | CoClustering") parser.add_argument("--output_ranking_file", default="ranking", help="output ranking for test") bsl_options = {'method': 'sgd', 'n_epochs': 20, 'reg_u': 100, 'reg_i': 50} options = { "Baseline": BaselineOnly(bsl_options, verbose=True), "SVD": SVD(verbose=True, n_factors=20, n_epochs=3), "SlopeOne": SlopeOne(), "NMF": NMF(), "CoClustering": CoClustering() } args = parser.parse_args() reader = Reader(line_format='user item rating timestamp', sep='\t') algo = options[args.approach] train_data = Dataset.load_from_file(args.train_file_path, reader=reader) test_data = Dataset.load_from_file(args.test_file_path, reader=reader) train_set = train_data.build_full_trainset() test_set = test_data.build_full_trainset().build_testset() print("training....") algo.fit(train_set) print("testing...") predictions = algo.test(test_set) accuracy.mae(predictions, verbose=True) accuracy.rmse(predictions, verbose=True) ### Extra Credit output_ranking(predictions, args.output_ranking_file + "_" + args.approach + ".out") precisions, recalls = precision_recall_at_k(predictions, k=10, threshold=2.5) print("Precision:", sum(prec for prec in precisions.values()) / len(precisions)) print("Recall:", sum(rec for rec in recalls.values()) / len(recalls)) print("F-measure:", f_measure(precisions, recalls)) print("conversion_rate:", get_conversion_rate(predictions, k=10)) print("ndcg:", get_ndcg(predictions, k_highest_scores=10))
def svdalgorithm(trainset, testset): print("\n" + "-" * 5 + " SVD algorithm using surprise package " + "-" * 5) algo = SVD() algo.fit(trainset) predictions = algo.test(testset) rmse = accuracy.rmse(predictions) mae = accuracy.mae(predictions) return rmse, mae, predictions
def evaluate_cold_users(self): print('evaluating cold users', self.name, '... ', end='') start = time.time() self.cold_mae = accuracy.mae(self.cold_predictions, verbose=False) self.cold_rmse = accuracy.rmse(self.cold_predictions, verbose=False) precisions_and_recalls = [precision_recall_at_k(self.cold_predictions, k) for k in self.ks] self.cold_MAPs, self.cold_MARs = zip(*precisions_and_recalls) end = time.time() print('done in ', round(end-start), 'seconds')
def svdpp(trainset, testset): # Matrix factorization - SVD++ print("\n" + "-" * 5 + " SVD++ algorithm using surprise package " + "-" * 5) algo = SVDpp() algo.fit(trainset) predictions = algo.test(testset) rmse = accuracy.rmse(predictions) mae = accuracy.mae(predictions) return rmse, mae, predictions
def slopeOne(trainset, testset): # Slope One print("\n" + "-" * 5 + " SlopeOne algorithm using surprise package " + "-" * 5) algo = SlopeOne() algo.fit(trainset) predictions = algo.test(testset) rmse = accuracy.rmse(predictions) mae = accuracy.mae(predictions) return rmse, mae, predictions
def coClustering(trainset, testset): # CoClustering print("\n" + "-" * 5 + " CoClustering algorithm using surprise package " + "-" * 5) algo = CoClustering() algo.fit(trainset) predictions = algo.test(testset) rmse = accuracy.rmse(predictions) mae = accuracy.mae(predictions) return rmse, mae, predictions
def baseline(trainset, testset): print("\n" + "-" * 5 + " Baseline algorithm using surprise package " + "-" * 5) algo = BaselineOnly() algo.fit(trainset) predictions = algo.test(testset) rmse = accuracy.rmse(predictions) mae = accuracy.mae(predictions) return rmse, mae, predictions
def main(): # save path to training data csv # convert to panda Dataframe to bypass an error #file_path = os.path.expanduser('../data/train.csv') #df = pd.read_csv(path=file_path, sep = ';') # pickle_dict = pickle.load('../data/train_update.csv') # df = pd.DataFrame(ratings_dict) # load dataset into dataframe train = pd.read_csv('../data/train_update.csv', sep=';') test = pd.read_csv('../data/test_update.csv', sep=';') reader = Reader(rating_scale=(0, 10)) print train_set = Dataset.load_from_df(train[['User-ID', 'ISBN', 'Book-Rating']], reader=reader) test_set = Dataset.load_from_df(test[['User-ID', 'ISBN', 'Book-Rating']], reader=reader) # load data from file # data = Dataset.load_from_df(df[['User-ID', 'ISBN', 'Book-Rating']], reader=reader) # to use when train on full train set # trainset = train_set.build_full_trainset() # validationset = trainset.build_testset() # create classifier (using a basic k nearest neighbors approach) algo = KNNBasic() trainset, testset = train_test_split(train_set, test_size=.9, random_state=1234) algo.fit(train_set) #cross_validate(algo, trainset, verbose=True) predictions = algo.test(testset) # compute MAE and RMSE accuracy.mae(predictions) accuracy.rmse(predictions)
def fit_model(data): train, test = train_test_split(data, test_size=0.25) svd = SVD(n_epochs=25, lr_all=0.01, reg_all=0.4) svd.fit(train) pred = svd.test(test) print('RMSE for test set: {}'.format(accuracy.rmse(pred))) print('MAE for test set: {}'.format(accuracy.mae(pred))) # save model path = '../Models/Collaborative_filtering2.model' pickle.dump(svd, open(path, 'wb')) print("Model is saved to: {}".format(path))
def metric(predictions, verbose=True, metric_type="rmse"): assert metric_type in {"mse", "fcp", "mae", "rmse"} if metric_type == "mse": metric = accuracy.mse(predictions=predictions, verbose=verbose) elif metric_type == "fcp": metric = accuracy.fcp(predictions=predictions, verbose=verbose) elif metric_type == "mae": metric = accuracy.mae(predictions=predictions, verbose=verbose) else: metric = accuracy.rmse(predictions=predictions, verbose=verbose) return metric
algos_name.append('SVDpp') algos.append(SVDpp(n_factors=1, random_state=1)) #algos_name.append('KNN') #algos.append(KNNBasic()) for name, algo in zip(algos_name, algos): print('===', name) trainset, testset = train_test_split(data, test_size=0.2, random_state=1) # train and test algorithm. predictions = algo.fit(trainset).test(testset) # Compute and print Root Mean Absolute Error accuracy.mae(predictions, verbose=True) # predict pred_test = [] for u,b in zip(user_test, book_test): pred_test.append(algo.predict(u,b).est) pred_test = np.array(pred_test) pred_test[pred_test > 10] = 10 pred_test[pred_test < 1] = 1 # write output output_path_raw = os.path.join('outputs', 'subm_surprise_'+name+'_raw.csv') np.savetxt(output_path_raw, pred_test, fmt='%1.4f') print('Ouput written to %s' % output_path_raw) output_path_round = os.path.join('outputs', 'subm_surprise_'+name+'_round.csv') np.savetxt(output_path_round, np.around(pred_test), fmt='%d')