Пример #1
0
def calculate_stats_users(pct_train):
    dat_file = 'user_data_working.csv'
    data = Data()
    data.load(dat_file,
              sep=',',
              format={
                  'col': 0,
                  'row': 1,
                  'value': 2,
                  'ids': int
              })
    train, test = data.split_train_test(percent=pct_train)
    svd = SVD()
    svd.set_data(train)
    svd.compute(k=100,
                min_values=2,
                pre_normalize=None,
                mean_center=True,
                post_normalize=False)
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s\n' % mae.compute()
Пример #2
0
def ex1(dat_file='./ml-1m/ratings.dat',
        pct_train=0.5):

    data = Data()
    data.load(dat_file, sep='::', format={'col':0, 'row':1, 'value':2,'ids':int})
       

    # create train/test split
    train, test = data.split_train_test(percent=pct_train)

    # create svd
    K=100
    svd = SVD()
    svd.set_data(train)
    svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)

    # evaluate performance
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s' % mae.compute()
Пример #3
0
def evaluate(data, count=5, K=100):
    results = []

    for i in range(count):
        train, test = data.split_train_test(percent=PERCENT_TRAIN)
        print len(data.get()), len(train.get()), len(test.get())
        #test_in_train(test, train)
        #print train.get()
        svd = SVD()
        svd.set_data(train)
        svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)

        #Evaluation using prediction-based metrics
        rmse = RMSE()
        mae = MAE()
        for rating, item_id, user_id in test.get():
            try:
                pred_rating = svd.predict(item_id, user_id)
                rmse.add(rating, pred_rating)
                mae.add(rating, pred_rating)
            except KeyError:
                #print "keyerror: ===========================================================>"
                continue
        try:
            rsu = {}
            rsu["RMSE"] = rmse.compute()
            rsu["MAE"] = mae.compute()
            print rsu
            results.append(rsu)
        except:
            print "one error....++++++++++++++++++++++++++++++++++++++++++++++++++++"
        

    return results
Пример #4
0
def test_SVD(svd, train, test, pct_train):
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s\n' % mae.compute()
Пример #5
0
def test_SVD(svd,train,test,pct_train):
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():      
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s\n' % mae.compute()
Пример #6
0
 def eval_rmse(self):
     # Evaluation using prediction-based metrics
     rmse = RMSE()
     mae = MAE()
     for rating, item_id, user_id in self.test.get():
         try:
             pred_rating = self.svd.predict(item_id, user_id)
             rmse.add(rating, pred_rating)
             mae.add(rating, pred_rating)
         except KeyError:
             continue
     print 'RMSE=%s' % rmse.compute()
     print 'MAE=%s' % mae.compute()
Пример #7
0
def eval_reco(model, test):
    """ Compute RMSE and MAE on test set
    """

    #Evaluation using prediction-based metrics
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = model.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    return rmse, mae
Пример #8
0
def eval_reco(model, test):
    """ Compute RMSE and MAE on test set
    """

    #Evaluation using prediction-based metrics
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = model.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    return rmse, mae
Пример #9
0
def ex1(dat_file=DATA_DIR + 'ml-1m-ratings.dat', pct_train=0.5):

    data = Data()
    data.load(dat_file,
              sep='::',
              format={
                  'col': 0,
                  'row': 1,
                  'value': 2,
                  'ids': int
              })
    # About format parameter:
    #   'row': 1 -> Rows in matrix come from column 1 in ratings.dat file
    #   'col': 0 -> Cols in matrix come from column 0 in ratings.dat file
    #   'value': 2 -> Values (Mij) in matrix come from column 2 in ratings.dat
    #   file
    #   'ids': int -> Ids (row and col ids) are integers (not strings)

    # create train/test split
    train, test = data.split_train_test(percent=pct_train)

    # create svd
    K = 100
    svd = SVD()
    svd.set_data(train)
    svd.compute(k=K,
                min_values=5,
                pre_normalize=None,
                mean_center=True,
                post_normalize=True)

    # evaluate performance
    rmse = RMSE()
    # mae is mean ABSOLUTE error
    # ... in this case it will return 1.09 which means there is an error of almost 1 point out of 5
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s' % mae.compute()
Пример #10
0
def evaluate(_svd, _testData, verbose=False):
    global rmse, mae, rating, item_id, user_id, pred_rating
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in _testData.get():
        try:
            pred_rating = _svd.predict(item_id, user_id, MIN_VALUE=0, MAX_VALUE=10)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)

            if verbose:
                print item_id, user_id, rating, pred_rating
        except Exception as e:
            print 'ERROR occurred:', e.message

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s' % mae.compute()
Пример #11
0
def calculate_stats_users(pct_train):
    dat_file = 'user_data_working.csv'
    data = Data()
    data.load(dat_file, sep=',', format={'col':0, 'row':1, 'value':2,'ids':int})
    train, test = data.split_train_test(percent=pct_train)               
    svd = SVD()
    svd.set_data(train)
    svd.compute(k=100, min_values=2, pre_normalize=None, mean_center=True,
    post_normalize=False)
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():      
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s\n' % mae.compute()
Пример #12
0
def evaluate(clf, _testData, verbose = False):

    rmse = RMSE()
    mae = MAE()
    numErrors = 0

    for rating, item_id, user_id in _testData.get():
        try:
            pred_rating = clf.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)

            if verbose:
                print item_id, user_id, rating, pred_rating
        except KeyError as e:
            if verbose:
                print 'ERROR occurred:', e.message
            numErrors += 1

    print '\n%i/%i data points raised errors.' % (numErrors, len(_testData))
    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s' % mae.compute()
Пример #13
0
def ex1(dat_file='ml-1m/ratings.dat',
        pct_train=0.5):

    data = Data()
    data.load(dat_file, sep='::', format={'col':0, 'row':1, 'value':2,
    'ids':int})
        # About format parameter:
        #   'row': 1 -> Rows in matrix come from column 1 in ratings.dat file
        #   'col': 0 -> Cols in matrix come from column 0 in ratings.dat file
        #   'value': 2 -> Values (Mij) in matrix come from column 2 in ratings.dat
        #   file
        #   'ids': int -> Ids (row and col ids) are integers (not strings)

    # create train/test split
    train, test = data.split_train_test(percent=pct_train)

    # create svd
    K = 100
    svd = SVD()
    svd.set_data(train)
    svd.compute(
        k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)

    # evaluate performance
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s' % mae.compute()
Пример #14
0
Файл: day_07.py Проект: lmlzk/ML
class RecommendSystem(object):
    def __init__(self, filename, sep, **format):
        # 文件信息
        self.filename = filename
        self.sep = sep
        self.format = format

        # 初始化矩阵分解
        self.svd = SVD()

        # 矩阵信息
        self.k = 100  #  矩阵的隐因子睡昂
        self.min_values = 10  #  删除评分少于10人的电影
        self.post_normalize = False

        # 设置是否加载模型标志
        self.load_model = False

        # 初始化均方误差
        self.rmse = RMSE()

    def get_data(self):
        # 如果模型不存在,则需要加载数据
        if not os.path.exists(filename):
            if not os.path.exists(self.filename):
                sys.exit()
            # SVD加载数据
            # self.svd.load_data(filename=self.filename, sep=self.sep, format=self.format)
            data = Data()

            data.load(self.filename, sep=self.sep, format=self.format)

            # 分割数据集
            train, test = data.split_train_test(percent=80)

            return train, test

        else:
            # 直接加载模型
            self.svd.load_model(filename)

            # 将是否加载模型设为True
            self.load_model = True

            return None, None

    def train(self, train):
        """
        训练数据
        :param train: 训练集
        :return:
        """
        if not self.load_model:
            # svd去获取训练数据集
            self.svd.set_data(train)
            # 注意传入的文件名字,不是带后缀名
            self.svd.compute(k=self.k,
                             min_values=self.min_values,
                             post_normalize=self.post_normalize,
                             savefile=filename[:-4])
        return None

    def recommend_to_user(self, userid):
        """
        推荐结果
        :param usrid: 用于ID
        :return: None
        """

        recommend_list = self.svd.recommend(userid, is_row=False)

        # 打印电影的名称,和预测的评分

        # 构建电影名字的列表
        movies_list = []

        for line in open("./data/ml-1m/movies.dat", "r"):
            movies_list.append(' '.join(line.split("::")[1:2]))

        # 依次取出推荐ID
        for itemid, rating in recommend_list:

            print "给你推荐的电影叫%s, 预测你对它的评分是%f" % (movies_list[itemid], rating)

        return None

    def rs_predict(self, userid, itemid):
        """
        得出评分
        :param userid: 用户ID
        :param itemid: 物品ID
        :return: 评分
        """
        score = self.svd.predict(itemid, userid)

        return score

    def evaluation(self, test):
        """
        均方误差评估模型
        :param test: 测试数据
        :return: None
        """
        if not self.load_model:
            # 获取测试数据中的id,rat, <rat, row(itemid), col(userid)>
            for rating, itemid, userid in test.get():
                try:
                    # rating真是值
                    score = self.rs_predict(userid, itemid)

                    # 添加所有的测试数据
                    self.rmse.add(rating, score)
                except KeyError:
                    continue

            error = self.rmse.compute()

            print "均方误差为:%s" % error

        return None
Пример #15
0
                     mean_center=True,
                     post_normalize=True)

    # Evaluate
    rmse_svd = RMSE()
    mae_svd = MAE()
    rmse_svd_neig = RMSE()
    mae_svd_neig = MAE()

    i = 1
    total = len(test.get())
    print 'Total Test ratings: %s' % total
    for rating, item_id, user_id in test:
        try:
            pred_rating_svd = svd.predict(item_id, user_id)
            rmse_svd.add(rating, pred_rating_svd)
            mae_svd.add(rating, pred_rating_svd)

            pred_rating_svd_neig = svd_neig.predict(item_id,
                                                    user_id)  #Koren & co.
            if pred_rating_svd_neig is not nan:
                rmse_svd_neig.add(rating, pred_rating_svd_neig)
                mae_svd_neig.add(rating, pred_rating_svd_neig)

            print "\rProcessed test rating %d" % i,
            sys.stdout.flush()

            i += 1
        except KeyError:
            continue
        result = RMSE()
        i = 0
        for rating, item_id, user_id in test:
            if len(dist[i]) < 5:
                if item_id in train_item.keys():
                    pred_rating = statistics.mean(train_item[item_id].values())
                elif user_id in train_user.keys():
                    pred_rating = statistics.mean(train_user[user_id].values())
                else:
                    pred_rating = average
            else:
                ratings = []
                for j in range(0, k):
                    if j == len(dist[i]):
                        break
                    ratings.append(train_item[dist[i][j][0]][user_id])
                pred_rating = statistics.mean(ratings)
            result.add(rating, pred_rating)
            i += 1
        rmse[k].append(result.compute())
        print "RMSE = "+str(rmse[k][-1])
        worksheet.write(k-2, p, rmse[k][-1])

m = 1
for k in range(3, 46):
    worksheet.write(k-2, 0, k)
    result = statistics.mean(rmse[k])
    worksheet.write(m, 12, result)
    m += 1
    print str(k)+"NN: Average RMSE=%s" % result
workbook.close()
Пример #17
0
class TestPrediction(Test):
    def __init__(self):
        super(TestPrediction, self).__init__()
        # Prediction-based metrics: MAE, RMSE, Pearson
        self.mae = MAE(self.DATA_PRED)
        self.rmse = RMSE(self.DATA_PRED)

        self.R = 3  # Real Rating (ground truth)
        self.R_PRED = 2.1  # Predicted Rating

    # test_PRED MAE
    def test_PRED_MAE_compute_one(self):
        assert_equal(self.mae.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_MAE_compute_one_empty_datasets(self):
        mae = MAE()
        assert_equal(mae.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_MAE_compute_all(self):
        assert_equal(self.mae.compute(), 0.7)

    def test_PRED_MAE_nan(self):
        mae = MAE()
        mae.add(2.0, nan)
        assert_equal(mae.get_test(), [])
        assert_equal(mae.get_ground_truth(), [])

    def test_PRED_MAE_load(self):
        mae = MAE()
        mae.load(self.GT_DATA, self.TEST_DATA)
        assert_equal(mae.compute(), 0.7)

    def test_PRED_MAE_load_test(self):
        mae = MAE()
        mae.load_test(self.TEST_DATA)
        assert_equal(len(mae.get_test()), len(self.TEST_DATA))
        assert_equal(len(mae.get_ground_truth()), 0)
        assert_raises(ValueError, mae.compute)  #Raise: GT is empty!

    def test_PRED_MAE_load_test_and_ground_truth(self):
        mae = MAE()
        mae.load_test(self.TEST_DATA)
        mae.load_ground_truth(self.GT_DATA)
        assert_equal(mae.compute(), 0.7)

    def test_PRED_MAE_add_entry(self):
        self.mae.add(1, 4)  #1: GT rating, 4: Predicted rating
        assert_equal(len(self.mae.get_test()), len(self.DATA_PRED) + 1)
        assert_equal(self.mae.compute(), 1.083333)

    def test_PRED_MAE_different_list_sizes(self):
        mae = MAE()
        GT = [3, 1, 5, 2]
        # GT list has one element less than self.TEST_DATA
        mae.load(GT, self.TEST_DATA)
        assert_raises(ValueError, mae.compute)

    # test_PRED RMSE
    def test_PRED_RMSE_compute_one(self):
        #Even though rmse has data, we only compute these two param values
        assert_equal(self.rmse.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_RMSE_compute_one_empty_datasets(self):
        rmse = RMSE()
        assert_equal(rmse.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_RMSE_compute_all(self):
        assert_equal(self.rmse.compute(), 0.891067)

    def test_PRED_RMSE_load_test(self):
        rmse = RMSE()
        self.TEST_DATA = [2.3, 0.9, 4.9, 0.9, 1.5]
        rmse.load_test(self.TEST_DATA)
        assert_equal(len(rmse.get_test()), len(self.TEST_DATA))

    def test_PRED_RMSE_add_entry(self):
        self.rmse.add(1, 4)
        assert_equal(len(self.rmse.get_test()), len(self.DATA_PRED) + 1)
        assert_equal(self.rmse.compute(), 1.470261)

    def test_PRED_RMSE_different_list_sizes(self):
        rmse = RMSE()
        GT = [3, 1, 5, 2]
        # GT list has one element less than self.TEST_DATA
        rmse.load(GT, self.TEST_DATA)
        assert_raises(ValueError, rmse.compute)

    def test_PRED_RMSE_numpy_array(self):
        rmse = RMSE()
        rmse.load(array(self.GT_DATA), array(self.TEST_DATA))
        assert (rmse.compute(), 0.891067)
Пример #18
0
    svd.compute(k=K, min_values=None, pre_normalize=None, mean_center=True, post_normalize=True)
    svd_neig.compute(k=K, min_values=None, pre_normalize=None, mean_center=True, post_normalize=True)

    # Evaluate
    rmse_svd = RMSE()
    mae_svd = MAE()
    rmse_svd_neig = RMSE()
    mae_svd_neig = MAE()

    i = 1
    total = len(test.get())
    print "Total Test ratings: %s" % total
    for rating, item_id, user_id in test:
        try:
            pred_rating_svd = svd.predict(item_id, user_id)
            rmse_svd.add(rating, pred_rating_svd)
            mae_svd.add(rating, pred_rating_svd)

            pred_rating_svd_neig = svd_neig.predict(item_id, user_id)  # Koren & co.
            if pred_rating_svd_neig is not nan:
                rmse_svd_neig.add(rating, pred_rating_svd_neig)
                mae_svd_neig.add(rating, pred_rating_svd_neig)

            print "\rProcessed test rating %d" % i,
            sys.stdout.flush()

            i += 1
        except KeyError:
            continue

        rmse_svd_all.append(rmse_svd.compute())
Пример #19
0
#3.10
[items_full[str(x[0])].get_data() for x in films]

#3.11
get_name_item_reviewed(10,user_full,items_full)

#3.12
items_full[str(2628)].get_data()
users_for_star_wars = svd.recommend(2628,only_unknowns=True)
users_for_star_wars

#3.13
movies_reviewed_by_sw_rec  =[get_name_item_reviewed(x[0],user_full,items_full) for x in users_for_star_wars]
movies_flatten = [movie for movie_list in movies_reviewed_by_sw_rec for movie in movie_list]
movie_aggregate = movies_by_category(movies_flatten, 3)
movies_sort = sorted(movie_aggregate,key=lambda x: x[1], reverse=True)
movies_sort

#3.14
from recsys.evaluation.prediction import RMSE
err = RMSE()
for rating, item_id, user_id in data.get():
    try:
        prediction = svd.predict(item_id, user_id)
        err.add(rating, prediction)
    except KeyError, k:
        continue

print 'RMSE is ' + str(err.compute())
Пример #20
0
class RecommendSystem(object):
    def __init__(self, filename, sep, **format):
        self.filename = filename
        self.sep = sep
        self.format = format

        # 训练参数
        self.k = 100
        self.min_values = 10
        self.post_normalize = True

        self.svd = SVD()

        # 判断是否加载
        self.is_load = False

        # 添加数据处理
        self.data = Data()

        # 添加模型评估
        self.rmse = RMSE()

    def get_data(self):
        """
        获取数据
        :return: None
        """
        # 如果模型不存在
        if not os.path.exists(tmpfile):
            # 如果数据文件不存在
            if not os.path.exists(self.filename):
                sys.exit()
            # self.svd.load_data(filename=self.filename, sep=self.sep, format=self.format)
            # 使用Data()来获取数据
            self.data.load(self.filename, sep=self.sep, format=self.format)
            train, test = self.data.split_train_test(percent=80)
            return train, test
        else:
            self.svd.load_model(tmpfile)
            self.is_load = True
            return None, None

    def train(self, train):
        """
        训练模型
        :param train: 训练数据
        :return: None
        """
        if not self.is_load:
            self.svd.set_data(train)
            self.svd.compute(k=self.k,
                             min_values=self.min_values,
                             post_normalize=self.post_normalize,
                             savefile=tmpfile[:-4])
        return None

    def rs_predict(self, itemid, userid):
        """
        评分预测
        :param itemid: 电影id
        :param userid: 用户id
        :return: None
        """
        score = self.svd.predict(itemid, userid)
        print "推荐的分数为:%f" % score
        return score

    def recommend_to_user(self, userid):
        """
        推荐给用户
        :param userid: 用户id
        :return: None
        """
        recommend_list = self.svd.recommend(userid, is_row=False)

        # 读取文件里的电影名称
        movie_list = []

        for line in open(moviefile, "r"):
            movie_list.append(' '.join(line.split("::")[1:2]))

        # 推荐具体电影名字和分数
        for itemid, rate in recommend_list:
            print "给您推荐了%s,我们预测分数为%s" % (movie_list[itemid], rate)
        return None

    def evaluation(self, test):
        """
        模型的评估
        :param test: 测试集
        :return: None
        """
        # 如果模型不是直接加载
        if not self.is_load:

            # 循环取出测试集里面的元组数据<评分,电影,用户>
            for value, itemid, userid in test.get():
                try:
                    predict = self.rs_predict(itemid, userid)
                    self.rmse.add(value, predict)
                except KeyError:
                    continue
            # 计算返回误差(均方误差)
            error = self.rmse.compute()

            print "模型误差为%s:" % error

        return None
Пример #21
0
#Load SVD from /tmp
svd2 = SVD(filename='/tmp/movielens') # Loading already computed SVD model

#Predict User rating for given user and movie:
USERID = 2   
ITEMID= 1 # Toy Story
rating1=svd2.predict(ITEMID, USERID, 0.0, 5.0)
print 'Predicted rating=%f'% rating1

flag=0
#Retrieve actual rating for given user and movie
for rating, item_id, user_id in data.get():
	if user_id == USERID and item_id == ITEMID:
		rat = rating
		#print 'Actual rating=%f' % rating
		flag=1
		break
		
if flag == 1:
	print 'Actual rating=%f'% rat
else :
	sys.exit("No actual rating available")

#Evaluating prediction
rmse = RMSE()
mae = MAE()
rmse.add(rating1, rat)
mae.add(rating1, rat)
print 'RMSE=%s' % rmse.compute()
print 'MAE=%s' % mae.compute()
Пример #22
0
class TestPrediction(Test):
    def __init__(self):
        super(TestPrediction, self).__init__()
        # Prediction-based metrics: MAE, RMSE, Pearson
        self.mae = MAE(self.DATA_PRED)
        self.rmse = RMSE(self.DATA_PRED)

        self.R = 3        # Real Rating (ground truth)
        self.R_PRED = 2.1 # Predicted Rating

    # test_PRED MAE
    def test_PRED_MAE_compute_one(self):
        assert_equal(self.mae.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_MAE_compute_one_empty_datasets(self):
        mae = MAE()
        assert_equal(mae.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_MAE_compute_all(self):
        assert_equal(self.mae.compute(), 0.7)

    def test_PRED_MAE_nan(self):
        mae = MAE()
        mae.add(2.0, nan)
        assert_equal(mae.get_test(), [])
        assert_equal(mae.get_ground_truth(), [])

    def test_PRED_MAE_load(self):
        mae = MAE()
        mae.load(self.GT_DATA, self.TEST_DATA)
        assert_equal(mae.compute(), 0.7)

    def test_PRED_MAE_load_test(self):
        mae = MAE()
        mae.load_test(self.TEST_DATA)
        assert_equal(len(mae.get_test()), len(self.TEST_DATA))
        assert_equal(len(mae.get_ground_truth()), 0)
        assert_raises(ValueError, mae.compute) #Raise: GT is empty!

    def test_PRED_MAE_load_test_and_ground_truth(self):
        mae = MAE()
        mae.load_test(self.TEST_DATA)
        mae.load_ground_truth(self.GT_DATA)
        assert_equal(mae.compute(), 0.7)

    def test_PRED_MAE_add_entry(self):
        self.mae.add(1, 4) #1: GT rating, 4: Predicted rating
        assert_equal(len(self.mae.get_test()), len(self.DATA_PRED)+1)
        assert_equal(self.mae.compute(), 1.083333)

    def test_PRED_MAE_different_list_sizes(self):
        mae = MAE()
        GT = [3, 1, 5, 2]
        # GT list has one element less than self.TEST_DATA
        mae.load(GT, self.TEST_DATA)
        assert_raises(ValueError, mae.compute)

    # test_PRED RMSE
    def test_PRED_RMSE_compute_one(self):
        #Even though rmse has data, we only compute these two param values
        assert_equal(self.rmse.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_RMSE_compute_one_empty_datasets(self):
        rmse = RMSE()
        assert_equal(rmse.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_RMSE_compute_all(self):
        assert_equal(self.rmse.compute(), 0.891067)

    def test_PRED_RMSE_load_test(self):
        rmse = RMSE()
        self.TEST_DATA = [2.3, 0.9, 4.9, 0.9, 1.5]
        rmse.load_test(self.TEST_DATA)
        assert_equal(len(rmse.get_test()), len(self.TEST_DATA))

    def test_PRED_RMSE_add_entry(self):
        self.rmse.add(1,4)
        assert_equal(len(self.rmse.get_test()), len(self.DATA_PRED)+1)
        assert_equal(self.rmse.compute(), 1.470261)

    def test_PRED_RMSE_different_list_sizes(self):
        rmse = RMSE()
        GT = [3, 1, 5, 2]
        # GT list has one element less than self.TEST_DATA
        rmse.load(GT, self.TEST_DATA)
        assert_raises(ValueError, rmse.compute)

    def test_PRED_RMSE_numpy_array(self):
        rmse = RMSE()
        rmse.load(array(self.GT_DATA), array(self.TEST_DATA))
        assert(rmse.compute(), 0.891067)
Пример #23
0
get_name_item_reviewed(10, user_full, items_full)

#3.12
items_full[str(2628)].get_data()
users_for_star_wars = svd.recommend(2628, only_unknowns=True)
users_for_star_wars

#3.13
movies_reviewed_by_sw_rec = [
    get_name_item_reviewed(x[0], user_full, items_full)
    for x in users_for_star_wars
]
movies_flatten = [
    movie for movie_list in movies_reviewed_by_sw_rec for movie in movie_list
]
movie_aggregate = movies_by_category(movies_flatten, 3)
movies_sort = sorted(movie_aggregate, key=lambda x: x[1], reverse=True)
movies_sort

#3.14
from recsys.evaluation.prediction import RMSE
err = RMSE()
for rating, item_id, user_id in data.get():
    try:
        prediction = svd.predict(item_id, user_id)
        err.add(rating, prediction)
    except KeyError, k:
        continue

print 'RMSE is ' + str(err.compute())
Пример #24
0
        if item_id in train_item.keys():
            for user in train_item[item_id].keys():
                sim = similarity(user_id, user)
                if sim >= 0:
                    if len(dist) < k:
                        dist[user] = sim
                    mindist = min(dist, key=dist.get)
                    if dist[mindist] < sim:
                        del dist[mindist]
                        dist[user] = sim

            ratings = []
            for user in dist.keys():
                ratings.append(train_user[user][item_id])

        print dist
        if len(dist) < 3:
            pred_rating = statistics.mean(train_user[user_id].values())
        else:
            pred_rating = statistics.mean(ratings)
        print pred_rating

        rmse.add(rating, pred_rating)
    except KeyError:
        continue
r = rmse.compute()
#rmsem.append(r)
print "RMSE=%s\n" % r
#print 'RMSE=%s' % statistics.mean(rmsem)
print("Running Time: %s seconds" % (time.time() - start_time))
Пример #25
0
                  MAX_RATING)  # predicted rating value
print svd.get_matrix().value(ITEMID, USERID)  # real rating value

print ''
print 'GENERATING RECOMMENDATION'
print svd.recommend(USERID, n=5, only_unknowns=True, is_row=False)

#Evaluation using prediction-based metrics
rmse = RMSE()
mae = MAE()
spearman = SpearmanRho()
kendall = KendallTau()
#decision = PrecisionRecallF1()
for rating, item_id, user_id in test.get():
    try:
        pred_rating = svd.predict(item_id, user_id)
        rmse.add(rating, pred_rating)
        mae.add(rating, pred_rating)
        spearman.add(rating, pred_rating)
        kendall.add(rating, pred_rating)
    except KeyError:
        continue

print ''
print 'EVALUATION RESULT'
print 'RMSE=%s' % rmse.compute()
print 'MAE=%s' % mae.compute()
print 'Spearman\'s rho=%s' % spearman.compute()
print 'Kendall-tau=%s' % kendall.compute()
#print decision.compute()
print ''