Exemplo n.º 1
0
def getAllUserRating(n=1,k=20, similarity=sim_pearson):
    if(n==1):
        traindata = loadMovieLensTrain('ml-100k/u1.base')  # 加载训练集
        testdata = loadMovieLensTest('ml-100k/u1.test')  # 加载测试集
    else:
        pivot = 0.7
        traindata={}
        testdata={}
        for line in loadfile('ml-1m/ratings.dat'):
            user, movie, rating, _ = line.split('::')
            # split the data by pivot
            if (random.random() < pivot):
                traindata.setdefault(user, {})
                traindata[user][movie] = int(rating)
            else:
                testdata.setdefault(user, {})
                testdata[user][movie] = int(rating)
    inAllnum = 0
    records=[]
    for userid in testdata:  # test集中每个用户
        for item in testdata[userid]:  # 对于test集合中每一个项目用base数据集,CF预测评分
            rating = getRating(traindata, userid, item, k)  # 基于训练集预测用户评分(用户数目<=K)
            records.append([userid,item,testdata[userid][item],rating])
            inAllnum = inAllnum + 1
    #np.savetxt("records.txt",records,fmt='%1.4e')
    #print("-------------Completed!!-----------", inAllnum)
    SaveRecords(records)
    return records
Exemplo n.º 2
0
def getAllUserRating(fileTrain='u1.base', fileTest='u1.test',k=20, similarity=sim_pearson):
    traindata = loadMovieLensTrain(fileTrain)  # 加载训练集
    testdata = loadMovieLensTest(fileTest)  # 加载测试集
    inAllnum = 0
    records=[]
    for userid in testdata:  # test集中每个项目
        for item in testdata[userid]:  # 对于test集合中每一个项目用base数据集,CF预测评分
            rating = getRating(traindata, userid, item, k)  # 基于训练集预测用户评分(用户数目<=K)
            records.append([userid,item,testdata[userid][item],rating])
            inAllnum = inAllnum + 1
    #np.savetxt("records.txt",records,fmt='%1.4e')
    print("-------------Completed!!-----------", inAllnum)
    return records
Exemplo n.º 3
0
def getAllUserRating(trainfile, testfile, k=20, similarity=sim_pearson):
    traindata = loadMovieLensTrain(trainfile)  # 加载训练集
    testdata = loadMovieLensTest(testfile)  # 加载测试集
    inAllnum = 0
    records = []
    for userid in testdata:  # test集中每个用户
        for item in testdata[userid]:  # 对于test集合中每一个项目用base数据集,CF预测评分
            rating = getRating(traindata, userid, item, k,
                               similarity)  # 基于训练集预测用户评分(用户数目<=K)
            records.append([userid, item, testdata[userid][item], rating])
            inAllnum = inAllnum + 1
    SaveRecords(records)
    return records
Exemplo n.º 4
0
def getAllUserRating(fileTrain='u1.base', fileTest='u1.test', fileResult='result.txt', similarity=sim_pearson):
    prefer1 = loadMovieLensTrain(fileTrain)         # 加载训练集 
    prefer2 = loadMovieLensTest(fileTest)           # 加载测试集  
    inAllnum = 0

    file = open(fileResult, 'a')
    file.write("%s\n"%("------------------------------------------------------"))
    
    for userid in prefer2:             #test集中每个用户
        for item in prefer2[userid]:   #对于test集合中每一个项目用base数据集,CF预测评分
            rating = getRating(prefer1, userid, item, 20)   #基于训练集预测用户评分(用户数目<=K)
            file.write('%s\t%s\t%s\n'%(userid, item, rating))
            inAllnum = inAllnum +1
    file.close()
    print("-------------Completed!!-----------", inAllnum)
Exemplo n.º 5
0
def getAllUserRating(fileTrain='u1.base',
                     fileTest='u1.test',
                     fileResult='result.txt',
                     similarity=sim_pearson):
    prefer1 = loadMovieLensTrain(fileTrain)
    prefer2 = loadMovieLensTest(fileTest)
    inAllnum = 0

    file = open(fileResult, 'a')
    file.write("%s\n" %
               ("------------------------------------------------------"))

    for userid in prefer2:
        for item in prefer2[userid]:
            rating = getRating(
                prefer1, userid, item,
                20)  # predict the user's rating based on the train data
            file.write('%s\t%s\t%s\n' % (userid, item, rating))
            inAllnum = inAllnum + 1
    file.close()
    print("-------------Completed!!-----------", inAllnum)
Exemplo n.º 6
0
 def readData(self, train=None, test=None):
     self.trainfile = train or self.trainfile
     self.testfile = test or self.testfile
     self.traindata = loadMovieLensTrain(train)  # 加载训练集
     self.testdata = loadMovieLensTest(test)  # 加载测试集
Exemplo n.º 7
0
def loadMovieLensResult(fileName='result.txt'):
    str1 = './'  # 目录的相对地址
    prefer = {}
    for line in open(str1 + fileName, 'r'):  # 打开指定文件
        (userid, movieid, rating) = line.split('\t')  # 数据集中每行有4项 用户id 、项目 id 、 评分
        prefer.setdefault(userid, {})  # 设置字典的默认格式,元素是user:{}字典
        prefer[userid][movieid] = float(rating)
    return prefer  # 格式如{'user1':{itemid:rating, itemid2:rating, ,,}, {,,,}}


if __name__ == "__main__":
    print("\n--------------准确率生成中 -----------\n")
    count = 0
    win_count = 0
    prefer_result = loadMovieLensResult()
    prefer_test = loadMovieLensTest()
    for item_1 in prefer_result:
        for item_2 in prefer_test:
            if item_1 == item_2:
                for item1 in prefer_result[item_1]:
                    for item2 in prefer_test[item_2]:
                        if item1 == item2:
                            dis = abs(prefer_result[item_1][item1] - prefer_test[item_2][item2])
                            if dis < 1:
                                win_count += 1
                            count += 1
    print('准确率为:' + str(float(win_count)/count))
    print('预测次数为:' + str(count))
    print('正确预测次数为:' + str(win_count))