def calculate_stats_users(pct_train): dat_file = 'user_data_working.csv' data = Data() data.load(dat_file, sep=',', format={ 'col': 0, 'row': 1, 'value': 2, 'ids': int }) train, test = data.split_train_test(percent=pct_train) svd = SVD() svd.set_data(train) svd.compute(k=100, min_values=2, pre_normalize=None, mean_center=True, post_normalize=False) rmse = RMSE() mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: continue print 'RMSE=%s' % rmse.compute() print 'MAE=%s\n' % mae.compute()
def ex1(dat_file='./ml-1m/ratings.dat', pct_train=0.5): data = Data() data.load(dat_file, sep='::', format={'col':0, 'row':1, 'value':2,'ids':int}) # create train/test split train, test = data.split_train_test(percent=pct_train) # create svd K=100 svd = SVD() svd.set_data(train) svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True) # evaluate performance rmse = RMSE() mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: continue print 'RMSE=%s' % rmse.compute() print 'MAE=%s' % mae.compute()
def evaluate(data, count=5, K=100): results = [] for i in range(count): train, test = data.split_train_test(percent=PERCENT_TRAIN) print len(data.get()), len(train.get()), len(test.get()) #test_in_train(test, train) #print train.get() svd = SVD() svd.set_data(train) svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True) #Evaluation using prediction-based metrics rmse = RMSE() mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: #print "keyerror: ===========================================================>" continue try: rsu = {} rsu["RMSE"] = rmse.compute() rsu["MAE"] = mae.compute() print rsu results.append(rsu) except: print "one error....++++++++++++++++++++++++++++++++++++++++++++++++++++" return results
def test_SVD(svd, train, test, pct_train): rmse = RMSE() mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: continue print 'RMSE=%s' % rmse.compute() print 'MAE=%s\n' % mae.compute()
def test_SVD(svd,train,test,pct_train): rmse = RMSE() mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: continue print 'RMSE=%s' % rmse.compute() print 'MAE=%s\n' % mae.compute()
def eval_rmse(self): # Evaluation using prediction-based metrics rmse = RMSE() mae = MAE() for rating, item_id, user_id in self.test.get(): try: pred_rating = self.svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: continue print 'RMSE=%s' % rmse.compute() print 'MAE=%s' % mae.compute()
def eval_reco(model, test): """ Compute RMSE and MAE on test set """ #Evaluation using prediction-based metrics rmse = RMSE() mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = model.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: continue return rmse, mae
def ex1(dat_file=DATA_DIR + 'ml-1m-ratings.dat', pct_train=0.5): data = Data() data.load(dat_file, sep='::', format={ 'col': 0, 'row': 1, 'value': 2, 'ids': int }) # About format parameter: # 'row': 1 -> Rows in matrix come from column 1 in ratings.dat file # 'col': 0 -> Cols in matrix come from column 0 in ratings.dat file # 'value': 2 -> Values (Mij) in matrix come from column 2 in ratings.dat # file # 'ids': int -> Ids (row and col ids) are integers (not strings) # create train/test split train, test = data.split_train_test(percent=pct_train) # create svd K = 100 svd = SVD() svd.set_data(train) svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True) # evaluate performance rmse = RMSE() # mae is mean ABSOLUTE error # ... in this case it will return 1.09 which means there is an error of almost 1 point out of 5 mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: continue print 'RMSE=%s' % rmse.compute() print 'MAE=%s' % mae.compute()
def evaluate(_svd, _testData, verbose=False): global rmse, mae, rating, item_id, user_id, pred_rating rmse = RMSE() mae = MAE() for rating, item_id, user_id in _testData.get(): try: pred_rating = _svd.predict(item_id, user_id, MIN_VALUE=0, MAX_VALUE=10) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) if verbose: print item_id, user_id, rating, pred_rating except Exception as e: print 'ERROR occurred:', e.message print 'RMSE=%s' % rmse.compute() print 'MAE=%s' % mae.compute()
def calculate_stats_users(pct_train): dat_file = 'user_data_working.csv' data = Data() data.load(dat_file, sep=',', format={'col':0, 'row':1, 'value':2,'ids':int}) train, test = data.split_train_test(percent=pct_train) svd = SVD() svd.set_data(train) svd.compute(k=100, min_values=2, pre_normalize=None, mean_center=True, post_normalize=False) rmse = RMSE() mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: continue print 'RMSE=%s' % rmse.compute() print 'MAE=%s\n' % mae.compute()
def evaluate(clf, _testData, verbose = False): rmse = RMSE() mae = MAE() numErrors = 0 for rating, item_id, user_id in _testData.get(): try: pred_rating = clf.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) if verbose: print item_id, user_id, rating, pred_rating except KeyError as e: if verbose: print 'ERROR occurred:', e.message numErrors += 1 print '\n%i/%i data points raised errors.' % (numErrors, len(_testData)) print 'RMSE=%s' % rmse.compute() print 'MAE=%s' % mae.compute()
def ex1(dat_file='ml-1m/ratings.dat', pct_train=0.5): data = Data() data.load(dat_file, sep='::', format={'col':0, 'row':1, 'value':2, 'ids':int}) # About format parameter: # 'row': 1 -> Rows in matrix come from column 1 in ratings.dat file # 'col': 0 -> Cols in matrix come from column 0 in ratings.dat file # 'value': 2 -> Values (Mij) in matrix come from column 2 in ratings.dat # file # 'ids': int -> Ids (row and col ids) are integers (not strings) # create train/test split train, test = data.split_train_test(percent=pct_train) # create svd K = 100 svd = SVD() svd.set_data(train) svd.compute( k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True) # evaluate performance rmse = RMSE() mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: continue print 'RMSE=%s' % rmse.compute() print 'MAE=%s' % mae.compute()
class RecommendSystem(object): def __init__(self, filename, sep, **format): # 文件信息 self.filename = filename self.sep = sep self.format = format # 初始化矩阵分解 self.svd = SVD() # 矩阵信息 self.k = 100 # 矩阵的隐因子睡昂 self.min_values = 10 # 删除评分少于10人的电影 self.post_normalize = False # 设置是否加载模型标志 self.load_model = False # 初始化均方误差 self.rmse = RMSE() def get_data(self): # 如果模型不存在,则需要加载数据 if not os.path.exists(filename): if not os.path.exists(self.filename): sys.exit() # SVD加载数据 # self.svd.load_data(filename=self.filename, sep=self.sep, format=self.format) data = Data() data.load(self.filename, sep=self.sep, format=self.format) # 分割数据集 train, test = data.split_train_test(percent=80) return train, test else: # 直接加载模型 self.svd.load_model(filename) # 将是否加载模型设为True self.load_model = True return None, None def train(self, train): """ 训练数据 :param train: 训练集 :return: """ if not self.load_model: # svd去获取训练数据集 self.svd.set_data(train) # 注意传入的文件名字,不是带后缀名 self.svd.compute(k=self.k, min_values=self.min_values, post_normalize=self.post_normalize, savefile=filename[:-4]) return None def recommend_to_user(self, userid): """ 推荐结果 :param usrid: 用于ID :return: None """ recommend_list = self.svd.recommend(userid, is_row=False) # 打印电影的名称,和预测的评分 # 构建电影名字的列表 movies_list = [] for line in open("./data/ml-1m/movies.dat", "r"): movies_list.append(' '.join(line.split("::")[1:2])) # 依次取出推荐ID for itemid, rating in recommend_list: print "给你推荐的电影叫%s, 预测你对它的评分是%f" % (movies_list[itemid], rating) return None def rs_predict(self, userid, itemid): """ 得出评分 :param userid: 用户ID :param itemid: 物品ID :return: 评分 """ score = self.svd.predict(itemid, userid) return score def evaluation(self, test): """ 均方误差评估模型 :param test: 测试数据 :return: None """ if not self.load_model: # 获取测试数据中的id,rat, <rat, row(itemid), col(userid)> for rating, itemid, userid in test.get(): try: # rating真是值 score = self.rs_predict(userid, itemid) # 添加所有的测试数据 self.rmse.add(rating, score) except KeyError: continue error = self.rmse.compute() print "均方误差为:%s" % error return None
mean_center=True, post_normalize=True) # Evaluate rmse_svd = RMSE() mae_svd = MAE() rmse_svd_neig = RMSE() mae_svd_neig = MAE() i = 1 total = len(test.get()) print 'Total Test ratings: %s' % total for rating, item_id, user_id in test: try: pred_rating_svd = svd.predict(item_id, user_id) rmse_svd.add(rating, pred_rating_svd) mae_svd.add(rating, pred_rating_svd) pred_rating_svd_neig = svd_neig.predict(item_id, user_id) #Koren & co. if pred_rating_svd_neig is not nan: rmse_svd_neig.add(rating, pred_rating_svd_neig) mae_svd_neig.add(rating, pred_rating_svd_neig) print "\rProcessed test rating %d" % i, sys.stdout.flush() i += 1 except KeyError: continue
result = RMSE() i = 0 for rating, item_id, user_id in test: if len(dist[i]) < 5: if item_id in train_item.keys(): pred_rating = statistics.mean(train_item[item_id].values()) elif user_id in train_user.keys(): pred_rating = statistics.mean(train_user[user_id].values()) else: pred_rating = average else: ratings = [] for j in range(0, k): if j == len(dist[i]): break ratings.append(train_item[dist[i][j][0]][user_id]) pred_rating = statistics.mean(ratings) result.add(rating, pred_rating) i += 1 rmse[k].append(result.compute()) print "RMSE = "+str(rmse[k][-1]) worksheet.write(k-2, p, rmse[k][-1]) m = 1 for k in range(3, 46): worksheet.write(k-2, 0, k) result = statistics.mean(rmse[k]) worksheet.write(m, 12, result) m += 1 print str(k)+"NN: Average RMSE=%s" % result workbook.close()
class TestPrediction(Test): def __init__(self): super(TestPrediction, self).__init__() # Prediction-based metrics: MAE, RMSE, Pearson self.mae = MAE(self.DATA_PRED) self.rmse = RMSE(self.DATA_PRED) self.R = 3 # Real Rating (ground truth) self.R_PRED = 2.1 # Predicted Rating # test_PRED MAE def test_PRED_MAE_compute_one(self): assert_equal(self.mae.compute(self.R, self.R_PRED), 0.9) def test_PRED_MAE_compute_one_empty_datasets(self): mae = MAE() assert_equal(mae.compute(self.R, self.R_PRED), 0.9) def test_PRED_MAE_compute_all(self): assert_equal(self.mae.compute(), 0.7) def test_PRED_MAE_nan(self): mae = MAE() mae.add(2.0, nan) assert_equal(mae.get_test(), []) assert_equal(mae.get_ground_truth(), []) def test_PRED_MAE_load(self): mae = MAE() mae.load(self.GT_DATA, self.TEST_DATA) assert_equal(mae.compute(), 0.7) def test_PRED_MAE_load_test(self): mae = MAE() mae.load_test(self.TEST_DATA) assert_equal(len(mae.get_test()), len(self.TEST_DATA)) assert_equal(len(mae.get_ground_truth()), 0) assert_raises(ValueError, mae.compute) #Raise: GT is empty! def test_PRED_MAE_load_test_and_ground_truth(self): mae = MAE() mae.load_test(self.TEST_DATA) mae.load_ground_truth(self.GT_DATA) assert_equal(mae.compute(), 0.7) def test_PRED_MAE_add_entry(self): self.mae.add(1, 4) #1: GT rating, 4: Predicted rating assert_equal(len(self.mae.get_test()), len(self.DATA_PRED) + 1) assert_equal(self.mae.compute(), 1.083333) def test_PRED_MAE_different_list_sizes(self): mae = MAE() GT = [3, 1, 5, 2] # GT list has one element less than self.TEST_DATA mae.load(GT, self.TEST_DATA) assert_raises(ValueError, mae.compute) # test_PRED RMSE def test_PRED_RMSE_compute_one(self): #Even though rmse has data, we only compute these two param values assert_equal(self.rmse.compute(self.R, self.R_PRED), 0.9) def test_PRED_RMSE_compute_one_empty_datasets(self): rmse = RMSE() assert_equal(rmse.compute(self.R, self.R_PRED), 0.9) def test_PRED_RMSE_compute_all(self): assert_equal(self.rmse.compute(), 0.891067) def test_PRED_RMSE_load_test(self): rmse = RMSE() self.TEST_DATA = [2.3, 0.9, 4.9, 0.9, 1.5] rmse.load_test(self.TEST_DATA) assert_equal(len(rmse.get_test()), len(self.TEST_DATA)) def test_PRED_RMSE_add_entry(self): self.rmse.add(1, 4) assert_equal(len(self.rmse.get_test()), len(self.DATA_PRED) + 1) assert_equal(self.rmse.compute(), 1.470261) def test_PRED_RMSE_different_list_sizes(self): rmse = RMSE() GT = [3, 1, 5, 2] # GT list has one element less than self.TEST_DATA rmse.load(GT, self.TEST_DATA) assert_raises(ValueError, rmse.compute) def test_PRED_RMSE_numpy_array(self): rmse = RMSE() rmse.load(array(self.GT_DATA), array(self.TEST_DATA)) assert (rmse.compute(), 0.891067)
svd.compute(k=K, min_values=None, pre_normalize=None, mean_center=True, post_normalize=True) svd_neig.compute(k=K, min_values=None, pre_normalize=None, mean_center=True, post_normalize=True) # Evaluate rmse_svd = RMSE() mae_svd = MAE() rmse_svd_neig = RMSE() mae_svd_neig = MAE() i = 1 total = len(test.get()) print "Total Test ratings: %s" % total for rating, item_id, user_id in test: try: pred_rating_svd = svd.predict(item_id, user_id) rmse_svd.add(rating, pred_rating_svd) mae_svd.add(rating, pred_rating_svd) pred_rating_svd_neig = svd_neig.predict(item_id, user_id) # Koren & co. if pred_rating_svd_neig is not nan: rmse_svd_neig.add(rating, pred_rating_svd_neig) mae_svd_neig.add(rating, pred_rating_svd_neig) print "\rProcessed test rating %d" % i, sys.stdout.flush() i += 1 except KeyError: continue rmse_svd_all.append(rmse_svd.compute())
#3.10 [items_full[str(x[0])].get_data() for x in films] #3.11 get_name_item_reviewed(10,user_full,items_full) #3.12 items_full[str(2628)].get_data() users_for_star_wars = svd.recommend(2628,only_unknowns=True) users_for_star_wars #3.13 movies_reviewed_by_sw_rec =[get_name_item_reviewed(x[0],user_full,items_full) for x in users_for_star_wars] movies_flatten = [movie for movie_list in movies_reviewed_by_sw_rec for movie in movie_list] movie_aggregate = movies_by_category(movies_flatten, 3) movies_sort = sorted(movie_aggregate,key=lambda x: x[1], reverse=True) movies_sort #3.14 from recsys.evaluation.prediction import RMSE err = RMSE() for rating, item_id, user_id in data.get(): try: prediction = svd.predict(item_id, user_id) err.add(rating, prediction) except KeyError, k: continue print 'RMSE is ' + str(err.compute())
class RecommendSystem(object): def __init__(self, filename, sep, **format): self.filename = filename self.sep = sep self.format = format # 训练参数 self.k = 100 self.min_values = 10 self.post_normalize = True self.svd = SVD() # 判断是否加载 self.is_load = False # 添加数据处理 self.data = Data() # 添加模型评估 self.rmse = RMSE() def get_data(self): """ 获取数据 :return: None """ # 如果模型不存在 if not os.path.exists(tmpfile): # 如果数据文件不存在 if not os.path.exists(self.filename): sys.exit() # self.svd.load_data(filename=self.filename, sep=self.sep, format=self.format) # 使用Data()来获取数据 self.data.load(self.filename, sep=self.sep, format=self.format) train, test = self.data.split_train_test(percent=80) return train, test else: self.svd.load_model(tmpfile) self.is_load = True return None, None def train(self, train): """ 训练模型 :param train: 训练数据 :return: None """ if not self.is_load: self.svd.set_data(train) self.svd.compute(k=self.k, min_values=self.min_values, post_normalize=self.post_normalize, savefile=tmpfile[:-4]) return None def rs_predict(self, itemid, userid): """ 评分预测 :param itemid: 电影id :param userid: 用户id :return: None """ score = self.svd.predict(itemid, userid) print "推荐的分数为:%f" % score return score def recommend_to_user(self, userid): """ 推荐给用户 :param userid: 用户id :return: None """ recommend_list = self.svd.recommend(userid, is_row=False) # 读取文件里的电影名称 movie_list = [] for line in open(moviefile, "r"): movie_list.append(' '.join(line.split("::")[1:2])) # 推荐具体电影名字和分数 for itemid, rate in recommend_list: print "给您推荐了%s,我们预测分数为%s" % (movie_list[itemid], rate) return None def evaluation(self, test): """ 模型的评估 :param test: 测试集 :return: None """ # 如果模型不是直接加载 if not self.is_load: # 循环取出测试集里面的元组数据<评分,电影,用户> for value, itemid, userid in test.get(): try: predict = self.rs_predict(itemid, userid) self.rmse.add(value, predict) except KeyError: continue # 计算返回误差(均方误差) error = self.rmse.compute() print "模型误差为%s:" % error return None
#Load SVD from /tmp svd2 = SVD(filename='/tmp/movielens') # Loading already computed SVD model #Predict User rating for given user and movie: USERID = 2 ITEMID= 1 # Toy Story rating1=svd2.predict(ITEMID, USERID, 0.0, 5.0) print 'Predicted rating=%f'% rating1 flag=0 #Retrieve actual rating for given user and movie for rating, item_id, user_id in data.get(): if user_id == USERID and item_id == ITEMID: rat = rating #print 'Actual rating=%f' % rating flag=1 break if flag == 1: print 'Actual rating=%f'% rat else : sys.exit("No actual rating available") #Evaluating prediction rmse = RMSE() mae = MAE() rmse.add(rating1, rat) mae.add(rating1, rat) print 'RMSE=%s' % rmse.compute() print 'MAE=%s' % mae.compute()
class TestPrediction(Test): def __init__(self): super(TestPrediction, self).__init__() # Prediction-based metrics: MAE, RMSE, Pearson self.mae = MAE(self.DATA_PRED) self.rmse = RMSE(self.DATA_PRED) self.R = 3 # Real Rating (ground truth) self.R_PRED = 2.1 # Predicted Rating # test_PRED MAE def test_PRED_MAE_compute_one(self): assert_equal(self.mae.compute(self.R, self.R_PRED), 0.9) def test_PRED_MAE_compute_one_empty_datasets(self): mae = MAE() assert_equal(mae.compute(self.R, self.R_PRED), 0.9) def test_PRED_MAE_compute_all(self): assert_equal(self.mae.compute(), 0.7) def test_PRED_MAE_nan(self): mae = MAE() mae.add(2.0, nan) assert_equal(mae.get_test(), []) assert_equal(mae.get_ground_truth(), []) def test_PRED_MAE_load(self): mae = MAE() mae.load(self.GT_DATA, self.TEST_DATA) assert_equal(mae.compute(), 0.7) def test_PRED_MAE_load_test(self): mae = MAE() mae.load_test(self.TEST_DATA) assert_equal(len(mae.get_test()), len(self.TEST_DATA)) assert_equal(len(mae.get_ground_truth()), 0) assert_raises(ValueError, mae.compute) #Raise: GT is empty! def test_PRED_MAE_load_test_and_ground_truth(self): mae = MAE() mae.load_test(self.TEST_DATA) mae.load_ground_truth(self.GT_DATA) assert_equal(mae.compute(), 0.7) def test_PRED_MAE_add_entry(self): self.mae.add(1, 4) #1: GT rating, 4: Predicted rating assert_equal(len(self.mae.get_test()), len(self.DATA_PRED)+1) assert_equal(self.mae.compute(), 1.083333) def test_PRED_MAE_different_list_sizes(self): mae = MAE() GT = [3, 1, 5, 2] # GT list has one element less than self.TEST_DATA mae.load(GT, self.TEST_DATA) assert_raises(ValueError, mae.compute) # test_PRED RMSE def test_PRED_RMSE_compute_one(self): #Even though rmse has data, we only compute these two param values assert_equal(self.rmse.compute(self.R, self.R_PRED), 0.9) def test_PRED_RMSE_compute_one_empty_datasets(self): rmse = RMSE() assert_equal(rmse.compute(self.R, self.R_PRED), 0.9) def test_PRED_RMSE_compute_all(self): assert_equal(self.rmse.compute(), 0.891067) def test_PRED_RMSE_load_test(self): rmse = RMSE() self.TEST_DATA = [2.3, 0.9, 4.9, 0.9, 1.5] rmse.load_test(self.TEST_DATA) assert_equal(len(rmse.get_test()), len(self.TEST_DATA)) def test_PRED_RMSE_add_entry(self): self.rmse.add(1,4) assert_equal(len(self.rmse.get_test()), len(self.DATA_PRED)+1) assert_equal(self.rmse.compute(), 1.470261) def test_PRED_RMSE_different_list_sizes(self): rmse = RMSE() GT = [3, 1, 5, 2] # GT list has one element less than self.TEST_DATA rmse.load(GT, self.TEST_DATA) assert_raises(ValueError, rmse.compute) def test_PRED_RMSE_numpy_array(self): rmse = RMSE() rmse.load(array(self.GT_DATA), array(self.TEST_DATA)) assert(rmse.compute(), 0.891067)
get_name_item_reviewed(10, user_full, items_full) #3.12 items_full[str(2628)].get_data() users_for_star_wars = svd.recommend(2628, only_unknowns=True) users_for_star_wars #3.13 movies_reviewed_by_sw_rec = [ get_name_item_reviewed(x[0], user_full, items_full) for x in users_for_star_wars ] movies_flatten = [ movie for movie_list in movies_reviewed_by_sw_rec for movie in movie_list ] movie_aggregate = movies_by_category(movies_flatten, 3) movies_sort = sorted(movie_aggregate, key=lambda x: x[1], reverse=True) movies_sort #3.14 from recsys.evaluation.prediction import RMSE err = RMSE() for rating, item_id, user_id in data.get(): try: prediction = svd.predict(item_id, user_id) err.add(rating, prediction) except KeyError, k: continue print 'RMSE is ' + str(err.compute())
if item_id in train_item.keys(): for user in train_item[item_id].keys(): sim = similarity(user_id, user) if sim >= 0: if len(dist) < k: dist[user] = sim mindist = min(dist, key=dist.get) if dist[mindist] < sim: del dist[mindist] dist[user] = sim ratings = [] for user in dist.keys(): ratings.append(train_user[user][item_id]) print dist if len(dist) < 3: pred_rating = statistics.mean(train_user[user_id].values()) else: pred_rating = statistics.mean(ratings) print pred_rating rmse.add(rating, pred_rating) except KeyError: continue r = rmse.compute() #rmsem.append(r) print "RMSE=%s\n" % r #print 'RMSE=%s' % statistics.mean(rmsem) print("Running Time: %s seconds" % (time.time() - start_time))
MAX_RATING) # predicted rating value print svd.get_matrix().value(ITEMID, USERID) # real rating value print '' print 'GENERATING RECOMMENDATION' print svd.recommend(USERID, n=5, only_unknowns=True, is_row=False) #Evaluation using prediction-based metrics rmse = RMSE() mae = MAE() spearman = SpearmanRho() kendall = KendallTau() #decision = PrecisionRecallF1() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) spearman.add(rating, pred_rating) kendall.add(rating, pred_rating) except KeyError: continue print '' print 'EVALUATION RESULT' print 'RMSE=%s' % rmse.compute() print 'MAE=%s' % mae.compute() print 'Spearman\'s rho=%s' % spearman.compute() print 'Kendall-tau=%s' % kendall.compute() #print decision.compute() print ''