Exemplo n.º 1
0
def testSystem(F_list, step_list,LearnRating_list,penalty_list):
    # 建立 用户 到 物品 的倒排表
    trainDataSet = FO.readDataSet('trainDataSet.txt')
    testDataSet = FO.readDataSet('testDataSet.txt')

    user_items = dict()
    for user_item, rating in trainDataSet.items():
        user = user_item[0]
        item = user_item[1]
        if user not in user_items:
            user_items[user] = list()
        user_items[user].append((item, rating))

    mu = calMu(trainDataSet)

    for F in F_list:
        for step in step_list:
            for LearnRating in LearnRating_list:
                for penalty in penalty_list:
                    bu, bi, p, q = LearningBiasLFM(user_items, trainDataSet, testDataSet, F, step, LearnRating, penalty, mu)
                    error = 0
                    for user_item, rating in testDataSet.items():
                        predictRating = Predict(user_item[0], user_item[1], p, q, bu, bi, mu)
                        error += (predictRating - rating) ** 2
                    RMSE = math.sqrt(error / len(testDataSet))
                    print('RMSE=%s' % RMSE)
                    with open('result_ItemCF.txt','a') as fileObject:
                        fileObject.write(str(F)+',')
                        fileObject.write(str(step)+',')
                        fileObject.write(str(LearnRating)+',')
                        fileObject.write(str(penalty)+',')
                        fileObject.write(str(mu)+',')
                        fileObject.write(str(RMSE)+'\n')
Exemplo n.º 2
0
def initializeSystem(F_list, step_list, LearnRating_list, penalty_list):
    trainDataSet = FO.readDataSet('trainDataSet.txt')
    testDataSet = FO.readDataSet('testDataSet.txt')
    dataSet = {**trainDataSet, **testDataSet}

    mu = calMu(dataSet)  #
    user_items = dict()
    for user_item, rating in dataSet.items():
        user = user_item[0]
        item = user_item[1]
        if user not in user_items:
            user_items[user] = list()
        user_items[user].append((item, rating))

    for F in F_list:
        for step in step_list:
            for LearnRating in LearnRating_list:
                for penalty in penalty_list:
                    print(F, step, LearnRating, penalty)
                    bu, bi, p, q = LearningBiasLFM(user_items, dataSet, dict(), F, step, LearnRating, penalty,mu)
                    path = 'SVD++/Model_Parameter/' + str(F) + '-' + str(step) + '-' + str(LearnRating) + '-' + str(penalty)
                    if not os.path.exists(path):
                        os.makedirs(path)
                    FO.WriteBuDict(path, bu)
                    FO.WriteBiDict(path, bi)
                    FO.WritePDict(path, p, F)
                    FO.WriteQDict(path, q, F)
Exemplo n.º 3
0
def fillMatrix(F_list, step_list, LearnRating_list, penalty_list, N = 30):
    trainDataSet = FO.readDataSet('trainDataSet.txt')
    testDataSet = FO.readDataSet('testDataSet.txt')
    userList = FO.readUserList('data/users.txt')
    itemList = FO.readItemList('data/movies.txt')  # 获取物品列表
    dataSet = {**trainDataSet, **testDataSet}
    mu = calMu(dataSet)

    for F in F_list:
        for step in step_list:
            for LearnRating in LearnRating_list:
                for penalty in penalty_list:
                    print(F,step,LearnRating,penalty)
                    path = 'SVD++/Matrix/' + str(F) + '-' + str(step) + '-' + str(LearnRating) + '-' + str(penalty)
                    if not os.path.exists(path):
                        os.makedirs(path)
                    bu, bi, p, q = ReadParameter(F, step, LearnRating, penalty)
                    for user in userList:
                        print(user)
                        rank = dict()
                        userRated = set([user_item[1] for user_item, value in dataSet.items() if user_item[0] == user])
                        UnRatedList = itemList - userRated
                        for item in UnRatedList:
                            rating = Predict(user, item, p, q, bu, bi, mu)
                            if math.isnan(float(rating)):
                                continue
                            rank[item] = round(rating)
                        if len(rank) == 0:
                            continue
                        if N > len(UnRatedList):
                            N = len(UnRatedList)
                        chooseList = random.sample(UnRatedList, N)
                        with open(path + '/new_ratings.txt', 'a') as fileObject:
                            for choose in chooseList:
                                fileObject.write(str(user) + '::' + str(choose) + '::' + str(rank[choose]) + '\n')
Exemplo n.º 4
0
def initializeSystem(F_list, step_list, LearnRating_list, penalty_list):
    trainDataSet = FO.readDataSet('trainDataSet.txt')
    testDataSet = FO.readDataSet('testDataSet.txt')
    dataSet = {**trainDataSet, **testDataSet}
    mu = calMu(dataSet)
    for F in F_list:
        for step in step_list:
            for LearnRating in LearnRating_list:
                for penalty in penalty_list:
                    print(F, step, LearnRating, penalty)
                    bu, bi, p, q = LearningBiasLFM(dataSet, dict(), F, step,
                                                   LearnRating, penalty, mu)
                    path = 'SVD/Model_Parameter/' + str(F) + '-' + str(
                        step) + '-' + str(LearnRating) + '-' + str(penalty)
                    if not os.path.exists(path):
                        os.makedirs(path)
                    FO.WriteBuDict(path, bu)
                    FO.WriteBiDict(path, bi)
                    FO.WritePDict(path, p, F)
                    FO.WriteQDict(path, q, F)
Exemplo n.º 5
0
def calUserRatingMean(dataSet):
    userList = FO.readUserList('data/users.txt')
    userMeanDict = dict()
    for user in userList:
        print(user)
        UserRating = [
            value for user_item, value in dataSet.items()
            if user_item[0] == user
        ]
        userMeanDict[user] = sum(UserRating) / len(UserRating)
    return userMeanDict


if __name__ == '__main__':
    trainDataSet = FO.readDataSet('trainDataSet.txt')
    testDataSet = FO.readDataSet('testDataSet.txt')
    #addDataSet = FO.readDataSet('SVD++/Matrix/5-5-0.01-0.1/new_ratings.txt')
    trainDataSet = {**trainDataSet, **testDataSet}
    userMeanDict = calUserRatingMean(trainDataSet)
    FO.WirteRatingMean('UserCF/UserMean/DataSetUserRating_mean.txt',
                       userMeanDict)
    #calUserSimilarity(trainDataSet)
    '''
    userList = FO.readUserList('data/users.txt')
    
    userMeanDict =calUserRatingMean(trainDataSet)
    FO.WirteUserRatingMean('trainDataSetUserRatingMean.txt',userMeanDict)

    data = FO.readFromFile('data/ratings.txt', 'ratings')
    allUserMeanDict = calUserRatingMean(data)
    trainDataSet = FO.readDataSet('UserCF/trainDataSet.txt')
    testDataSet = FO.readDataSet('UserCF/testDataSet.txt')
    calUserSimilarity(trainDataSet,'explicit',cosSim)
    '''
    '''
    trainDataSet = FO.readDataSet('UserCF/trainDataSet.txt')
    testDataSet = FO.readDataSet('UserCF/testDataSet.txt')
    itemList = FO.readItemList('data/movies.txt')
    rank = predictSystem(trainDataSet, testDataSet, 1, itemList)
    print(rank)
    '''

    timeList = []  # 记录程序运行时间
    timeList.append(datetime.datetime.now())
    # 加载数据
    trainDataSet = FO.readDataSet('UserCF/trainDataSet.txt')
    testDataSet = FO.readDataSet('UserCF/testDataSet.txt')
    KList = [10, 20, 30, 40]
    NList = [10, 20, 30, 40]
    type_list = ['implicit', 'cosSim', 'ecludSim']
    for type in type_list:
        for k in KList:
            for n in NList:
                recall, precision, coverage, popularity = testSystem(
                    trainDataSet, testDataSet, K=k, N=n, type=type)
                with open('UserCF/Result/result_' + type + '.txt',
                          'a') as fileObject:
                    fileObject.write(
                        str(k) + ',' + str(n) + ',' + str(recall) + ',' +
                        str(precision) + ',' + str(coverage) + ',' +
                        str(popularity) + '\n')
Exemplo n.º 7
0
            W = FO.readItemSimilarity(item, type)  # 获取当前用户与其他用户的相似度
            topItem = dict(
                sorted(W.items(), key=operator.itemgetter(1),
                       reverse=True)[:K])
            topSimItem = set([items[1] for items, value in topItem.items()])
            topLoveItem = userLove & topSimItem
            if len(topLoveItem) == 0:
                continue
            # print(topLoveItem)
            for LoveItem in topLoveItem:
                wji = W[item, LoveItem]
                rui = dataSet[user, LoveItem]
                rank[item] = rank.get(item, 0) + wji * rui
    return dict(
        sorted(rank.items(), key=operator.itemgetter(1), reverse=True)[:N])


if __name__ == '__main__':

    trainDataSet = FO.readDataSet('ItemCF/trainDataSet.txt')
    testDataSet = FO.readDataSet('ItemCF/testDataSet.txt')
    '''
    calItemSimilarity(trainDataSet, 'implicit')  # 计算隐式相似度
    calItemSimilarity(trainDataSet, 'explicit', cosSim)  # 以余弦相似度计算显式相似度
    calItemSimilarity(trainDataSet, 'explicit', ecludSim)  # 以欧氏距离计算显式相似度
    '''
    itemList = FO.readItemList('data/movies.txt')
    for i in range(1, 11):
        rank = predictSystem(trainDataSet, testDataSet, i, itemList)
        print(rank)
Exemplo n.º 8
0
            value = sum(itemRating) / len(itemRating)
        itemMeanDict[item] = value
    return itemMeanDict

def calUserRatingMean(dataSet):
    userList = FO.readUserList('data/users.txt')
    userMeanDict = dict()
    for user in userList:
        print(user)
        UserRating = [value for user_item, value in dataSet.items() if user_item[0] == user]
        userMeanDict[user] = sum(UserRating) / len(UserRating)
    return userMeanDict


if __name__ == '__main__':
    trainDataSet = FO.readDataSet('trainDataSet.txt')
    testDataSet = FO.readDataSet('testDataSet.txt')
    addDataSet = FO.readDataSet('SVD++/Matrix/10-10-0.01-0.1/new_ratings.txt')
    trainDataSet = {**trainDataSet, **addDataSet}


    itemMeanDict = calItemRatingMean(trainDataSet)
    FO.WirteRatingMean('ItemCF/ItemMean/add_trainDataSet_mean.txt', itemMeanDict)

    userMeanDict = calUserRatingMean(trainDataSet)
    FO.WirteRatingMean('ItemCF/UserMean/trainDataSet_mean.txt', userMeanDict)


    calItemSimilarity(trainDataSet)
    '''
    itemMeanDict = calItemRatingMean(trainDataSet)