示例#1
0
 def test1(self):
     import numpy as np
     fileIndex = 1
     for i in xrange(1):
         #             userEui = np.loadtxt(ROOT_DIRECTORY + "/userEui" + str(fileIndex) + ".txt")
         userEui = np.loadtxt(ROOT_DIRECTORY + "/userEui.txt")
         #             wsEui = np.loadtxt(ROOT_DIRECTORY + "/wsEui" + str(fileIndex) + ".txt")
         wsEui = np.loadtxt(ROOT_DIRECTORY + "/wsEui.txt")
         fileIndex = fileIndex + 1
         alpha = 0.4
         count = 0
         totalQos = 0
         testArr = paper.loadTest(ROOT_DIRECTORY + '/test1.txt')
         minMae = 1000
         minAlpha = 10
         for i in xrange(11):
             eui = []
             alpha = i / 10.0
             totalQos = 0
             for index, value in enumerate(userEui):
                 totalQos = totalQos + testArr[index][2]
                 eui.append(testArr[index][2] - alpha * value -
                            (1 - alpha) * wsEui[index])
             eui = np.array(eui)
             mae, rmse = paper.maeAndRmse(eui)
             if mae < minMae:
                 minMae = mae
                 minAlpha = alpha
             print "alpha:" + str(alpha) + "\t" + "MAE:" + str(
                 mae) + "\tRMSE:" + str(rmse) + "\tNMAE:" + str(
                     totalQos / len(testArr))
         print "最小mae:" + str(minMae) + "\t alpha=" + str(minAlpha)
示例#2
0
 def testTp(self):
     simCalMethod = paper.simPCC
     sparseness = 20
     fileNumbers = 10
     for i in range(1, fileNumbers + 1):
         #文件对象
         print i,
         euiFileName = 'throught/ipcc/euislopeone-%d-%d.txt' % (sparseness,
                                                                i)
         pfEui = open(euiFileName, 'w')
         #load data
         trainFileName = r'throught/training%d-%d.txt' % (sparseness, i)
         trainArrayObj = paper.createArrayObj(trainFileName)
         testFileName = r'throught/test%d-%d.txt' % (sparseness, i)
         testArrayObj = paper.loadTest(testFileName)
         #相似度矩阵数据
         wsSimFileName = 'throught/ipcc/simArrayWs-%d-%d.txt' % (sparseness,
                                                                 i)
         wsSimArrayObj = paper.createSimArray(trainArrayObj.T, simCalMethod)
         paper.save(wsSimArrayObj, wsSimFileName)
         #            wsSimArrayObj = paper.load(wsSimFileName)
         #计算预测准确
         print calMaeAndRmse(trainArrayObj, testArrayObj, wsSimArrayObj,
                             pfEui)
         pfEui.close()
     print 'ok'
示例#3
0
def calmaeAndNmae(euiFile, testFileName):
    import paper
    import numpy as np
    #cal the mae
    eui = np.loadtxt(euiFile)
    mae = np.mean(np.abs(eui))
    #cal the NMAE
    testArrayObj = paper.loadTest(testFileName)
    mean = np.mean(testArrayObj[:, 2])
    nmae = mae/mean
    return mae, nmae
示例#4
0
 def testTp(self):
     sparseness = 5
     fileNumbers = 2
     for i in range(1, fileNumbers + 1):
         p, q = loadPQTp(sparseness, i)
         pfEui = 'throught/eui/euipq-%d-%d.txt' % (sparseness, i)
         pf = open(pfEui, 'w')
         #测试测试数据
         testFileName = r'throught/test%d-%d.txt' % (sparseness, i)
         testArrayObj = paper.loadTest(testFileName)
         predWithPQTp(testArrayObj, pf, p, q)
         pf.close()
示例#5
0
 def testTp(self):
     for sparseness in  [5, 10, 15, 20]:
         for i in range(1, fileNumbers+1):
             #文件对象
             print i, 
             euiFileName = 'throught/weightedslopeone/euislopeone-%d-%d.txt' % (sparseness,i)
             pfEui = open(euiFileName, 'w')
             #load data
             trainFileName = r'throught/training%d-%d.txt' % (sparseness, i)
             trainArrayObj = paper.createArrayObj(trainFileName)
             testFileName = r'throught/test%d-%d.txt' % (sparseness, i)
             testArrayObj = paper.loadTest(testFileName)
             #计算预测准确
             print calMaeAndRmse(trainArrayObj, testArrayObj, pfEui) 
             pfEui.close()
         print 'ok'
示例#6
0
文件: user_mean.py 项目: fishsey/code
 def testRt(self):
     sparseness = 5
     fileNumbers = 10
     for i in range(1, fileNumbers + 1):
         #文件对象
         print i,
         euiFileName = 'rt/userMean/euislopeone-%d-%d.txt' % (sparseness, i)
         pfEui = open(euiFileName, 'w')
         #load data
         trainFileName = r'rt/sparseness%d/training%d.txt' % (sparseness, i)
         trainArrayObj = paper.createArrayObj(trainFileName)
         testFileName = r'rt/sparseness%d/test%d.txt' % (sparseness, i)
         testArrayObj = paper.loadTest(testFileName)
         #计算预测准确
         print calMaeAndRmse(trainArrayObj, testArrayObj, pfEui)
         pfEui.close()
     print 'ok'
示例#7
0
 def testRt(self):
     for sparseness in [5, 10, 15, 20]:
         for i in range(1, fileNumbers + 1):
             #文件对象
             print i,
             euiFileName = 'rt/mf/euislopeone-%d-%d.txt' % (sparseness, i)
             pfEui = open(euiFileName, 'w')
             #load data
             trainFileName = r'rt/sparseness%d/training%d.txt' % (
                 sparseness, i)
             trainArrayObj = paper.createArrayObj(trainFileName)
             testFileName = r'rt/sparseness%d/test%d.txt' % (sparseness, i)
             testArrayObj = paper.loadTest(testFileName)
             #计算预测准确
             p, q = learningAddIndicateFunctionlfm(trainArrayObj)
             maeAndRmseRt(p, q, testArrayObj, pfEui)
             pfEui.close()
         print 'ok'
示例#8
0
 def testTp(self):
     for sparseness in [5, 10, 15, 20]:
         for i in range(1, fileNumbers + 1):
             #文件对象
             print i,
             euiFileName = 'throught/mf/euislopeone-%d-%d.txt' % (
                 sparseness, i)
             pfEui = open(euiFileName, 'w')
             #load data
             trainFileName = r'throught/training%d-%d.txt' % (sparseness, i)
             trainArrayObj = paper.createArrayObj(trainFileName)
             testFileName = r'throught/test%d-%d.txt' % (sparseness, i)
             testArrayObj = paper.loadTest(testFileName)
             #计算预测准确
             trainArrayObj[trainArrayObj != NoneValue] = (
                 trainArrayObj[trainArrayObj != NoneValue] -
                 44.034) / 107.439
             p, q = learningAddIndicateFunctionlfm(trainArrayObj)
             maeAndRmseTp(p, q, testArrayObj, pfEui)
             pfEui.close()
         print 'ok'
示例#9
0
    def test1(self):
        import numpy as np
        trainingArr = createArrayObj(ROOT_DIRECTORY + '/training1.txt', 339,
                                     5825)
        simUserArr = np.loadtxt(ROOT_DIRECTORY + "/simUserArr.txt")
        simWsArr = np.loadtxt(ROOT_DIRECTORY + "/simWsArr.txt")
        webServerMapping = np.loadtxt(ROOT_DIRECTORY + "/webServerMapping.txt",
                                      dtype=str,
                                      delimiter="\t")
        maxSimUser = np.argsort(simUserArr, axis=1)[:, 0 - 10:]

        testArr = paper.loadTest(ROOT_DIRECTORY + '/test1.txt')
        #标签
        pui = np.loadtxt(ROOT_DIRECTORY + "/label.txt")
        #标签-概率
        probability = np.loadtxt(ROOT_DIRECTORY + "/probability.txt")
        # 取出相似度最高的k个用户
        k_user = 10
        k_ws = 10
        result = []
        fileIndex = 1
        for i in xrange(1):
            k_user = 10 + i * 10
            for j in xrange(1):
                k_ws = 50 + j * 10
                simUser = np.argsort(simUserArr, axis=1)[:, 0 - k_user:]
                simWs = np.argsort(simWsArr, axis=1)[:, 0 - k_ws:]

                eui = []
                # 用户-服务
                for index, value in enumerate(testArr):
                    uId = value[0]
                    wsId = value[1]
                    label = pui[index]
                    fenzi = 0
                    fenmu = 0
                    # 相似用户-相似度
                    for i, otherUId in enumerate(simUser[uId]):
                        #pow(simUserArr[uId][otherUId], 3)
                        #simUserArr[uId][otherUId]
                        if trainingArr[otherUId][wsId] != NoneValue and int(
                                trainingArr[otherUId][wsId]) in label:
                            fenzi += simUserArr[uId][otherUId] * trainingArr[
                                otherUId][wsId] * probability[otherUId][
                                    np.argwhere(label == int(
                                        trainingArr[otherUId][wsId]))[0, 0]]
                            fenmu += simUserArr[uId][otherUId] * probability[
                                otherUId][np.argwhere(label == int(
                                    trainingArr[otherUId][wsId]))[0, 0]]
                    if fenmu != 0:
                        predictValue = fenzi / fenmu
                        r = predictValue - value[2]
                        if r < 0:
                            r = 0 - r
                        eui.append(r)
                    else:
                        total = 0
                        count = 0
                        for i, otherUId in enumerate(maxSimUser[uId]):
                            for m, n in enumerate(simWs[wsId]):
                                #                                 if webServerMapping[wsId][2] != webServerMapping[n][2] and webServerMapping[wsId][4] != webServerMapping[n][4]:
                                #                                     continue;
                                #maxSimUser[uId][0]
                                if trainingArr[otherUId][n] != NoneValue:
                                    count = count + 1
                                    total += trainingArr[otherUId][n]
                        if count != 0:
                            avg = total / count
                            if avg < 0:
                                avg = 0 - avg
                            eui.append(avg)
                print len(eui)
                eui = np.array(eui)
                mae, rmse = paper.maeAndRmse(eui)
                result.append("相似用户数:" + str(k_user) + "\t相似服务数:" + str(k_ws) +
                              "\t" + "MAE:" + str(mae) + "\tRMSE:" + str(rmse))
                print "相似用户数:" + str(k_user) + "\t相似服务数:" + str(
                    k_ws) + "\t" + "MAE:" + str(mae) + "\tRMSE:" + str(rmse)
                #                 np.savetxt(ROOT_DIRECTORY + "/result.txt", result, fmt='%s', delimiter="\t")
                np.savetxt(ROOT_DIRECTORY + "/userEui.txt",
                           eui,
                           fmt='%s',
                           delimiter="\t")
                #                 np.savetxt(ROOT_DIRECTORY + "/userEui" + str(fileIndex) + ".txt", eui, fmt='%s', delimiter="\t")
                fileIndex = fileIndex + 1
示例#10
0
def loadDataSet(trainFile, testFile):
    import paper
    trainArray = paper.createArrayObj(trainFile)
    testObj = paper.loadTest(testFile)
    return trainArray, testObj
示例#11
0
def loadData(*dataFile):
    import paper
    train = paper.loadTest(dataFile[0])
    test = paper.loadTest(dataFile[1])
    return train, test
示例#12
0
文件: upcc.py 项目: fishsey/code
if __name__ == '__main__':
    simCalMethod = paper.simPCC
    fileNumbers = 10
    for sparseness in [5, 10, 15, 20]:
        #文件对象
        for i in range(1, fileNumbers + 1):
            print i,
            #            euiFileName = 'rt/upcc/euislopeone-%d-%d.txt' % (sparseness,i)
            euiFileName = 'throught/upcc/euislopeone-%d-%d.txt' % (sparseness,
                                                                   i)
            pfEui = open(euiFileName, 'w')
            #load data
            #            trainFileName = r'rt/sparseness%d/training%d.txt' % (sparseness, i)
            #            testFileName = r'rt/sparseness%d/test%d.txt' % (sparseness, i)
            #throught
            trainFileName = r'throught/training%d-%d.txt' % (sparseness, i)
            testFileName = r'throught/test%d-%d.txt' % (sparseness, i)
            trainArrayObj = paper.createArrayObj(trainFileName)
            testArrayObj = paper.loadTest(testFileName)
            #相似度矩阵数据
            #            userSimFileName = 'rt/upcc/simArrayUser-%s-%d.txt' % (sparseness,i)
            userSimFileName = 'throught/upcc/simArrayUser-%s-%d.txt' % (
                sparseness, i)
            userSimArrayObj = paper.createSimArray(trainArrayObj, simCalMethod)
            paper.save(userSimArrayObj, userSimFileName)
            #        userSimArrayObj = paper.load(userSimFileName)
            #计算预测准确
            mae, rmse = calMaeAndRmse()
            print mae, rmse
            pfEui.close()
    print 'ok'
示例#13
0
文件: slopeone.py 项目: fishsey/code
    return pui


import math
import numpy as np
import paper
if __name__ == '__main__':
    NoneValue = 111111.0
    userNum = 339
    wsNum = 5825
    for sparseness in [5]:
        for num in range(1, 2):
            #文件对象
            #load train and test
            trainFile = r'rt/sparseness%d/training%d.txt' % (sparseness, num)
            trainArrayObj = paper.createArrayObj(trainFile)
            testFile = r'rt/sparseness%d/test%d.txt' % (sparseness, num)
            testArrayObj = paper.loadTest(testFile)
            #load train info and test info
            trainFileInfo = r'data/sparseness%d/training%d.txt' % (sparseness,
                                                                   num)
            trainInfoObj = paper.loadTest(trainFileInfo)
            testFileInfo = r'data/sparseness%d/test%d.txt' % (sparseness, num)
            testInfoObj = paper.loadTest(testFileInfo)
            #classify result
            yClassify = classify(trainInfoObj, testInfoObj)
            mae, rmse = calMaeAndRmse(trainArrayObj, testArrayObj, yClassify)
            print
            print mae, rmse
        print 'ok'
示例#14
0
    def test1(self):
        import numpy as np
        trainingArr = createArrayObj(ROOT_DIRECTORY + '/training1.txt', 339,
                                     5825)
        simArr = createSimArray(trainingArr, paper.simMinkowskiDist, 2)
        webServerMapping = np.loadtxt(ROOT_DIRECTORY + "/webServerMapping.txt",
                                      dtype=str,
                                      delimiter="\t")
        # 取出相似度最高的k个用户
        k = 10
        simUser = np.argsort(simArr, axis=1)[:, 0 - k:]
        testArr = paper.loadTest(ROOT_DIRECTORY + '/test1.txt')
        pui = np.loadtxt(ROOT_DIRECTORY + "/label.txt")
        probability = np.loadtxt(ROOT_DIRECTORY + "/probability.txt")
        eui = []
        # 用户-服务
        for index, value in enumerate(testArr):
            uId = value[0]
            wsId = value[1]
            label = pui[index]
            fenzi = 0
            fenmu = 0
            # 相似用户-相似度
            for i, otherUId in enumerate(simUser[uId]):
                if trainingArr[otherUId][wsId] != NoneValue and int(
                        trainingArr[otherUId][wsId]) in label:
                    fenzi += simArr[uId][otherUId] * trainingArr[otherUId][
                        wsId] * probability[otherUId][np.argwhere(
                            label == int(trainingArr[otherUId][wsId]))[0, 0]]
                    fenmu += simArr[uId][otherUId] * probability[otherUId][
                        np.argwhere(
                            label == int(trainingArr[otherUId][wsId]))[0, 0]]
            if fenmu != 0:
                predictValue = fenzi / fenmu
                eui.append(predictValue - value[2])


#             else:
#                 if webServerMapping[wsId - 1][2] != 'not found':
#                     ii = 2
#                 else:
#                     ii = 4
#                 score = [ ]
#                 for g, h in enumerate(webServerMapping):
#                     if h[ii] == webServerMapping[wsId - 1][ii]:
#                         # 取出和i同个as的其他服务
#                         score.append(int(h[0]))
#                 total = 0
#                 count = 0
#                 for i, otherUId in enumerate(simUser[uId]):
#                     for m, n in enumerate(score):
#                         if trainingArr[otherUId][n] != NoneValue:
#                             count = count + 1
#                             total += trainingArr[otherUId][n]
#                 if count != 0:
#                     avg = total / count;
#                     eui.append(avg)
        print len(eui)
        eui = np.array(eui)
        mae, rmse = paper.maeAndRmse(eui)
        print "MAE:" + str(mae) + "\tRMSE:" + str(rmse)