def test1(self): import numpy as np fileIndex = 1 for i in xrange(1): # userEui = np.loadtxt(ROOT_DIRECTORY + "/userEui" + str(fileIndex) + ".txt") userEui = np.loadtxt(ROOT_DIRECTORY + "/userEui.txt") # wsEui = np.loadtxt(ROOT_DIRECTORY + "/wsEui" + str(fileIndex) + ".txt") wsEui = np.loadtxt(ROOT_DIRECTORY + "/wsEui.txt") fileIndex = fileIndex + 1 alpha = 0.4 count = 0 totalQos = 0 testArr = paper.loadTest(ROOT_DIRECTORY + '/test1.txt') minMae = 1000 minAlpha = 10 for i in xrange(11): eui = [] alpha = i / 10.0 totalQos = 0 for index, value in enumerate(userEui): totalQos = totalQos + testArr[index][2] eui.append(testArr[index][2] - alpha * value - (1 - alpha) * wsEui[index]) eui = np.array(eui) mae, rmse = paper.maeAndRmse(eui) if mae < minMae: minMae = mae minAlpha = alpha print "alpha:" + str(alpha) + "\t" + "MAE:" + str( mae) + "\tRMSE:" + str(rmse) + "\tNMAE:" + str( totalQos / len(testArr)) print "最小mae:" + str(minMae) + "\t alpha=" + str(minAlpha)
def testTp(self): simCalMethod = paper.simPCC sparseness = 20 fileNumbers = 10 for i in range(1, fileNumbers + 1): #文件对象 print i, euiFileName = 'throught/ipcc/euislopeone-%d-%d.txt' % (sparseness, i) pfEui = open(euiFileName, 'w') #load data trainFileName = r'throught/training%d-%d.txt' % (sparseness, i) trainArrayObj = paper.createArrayObj(trainFileName) testFileName = r'throught/test%d-%d.txt' % (sparseness, i) testArrayObj = paper.loadTest(testFileName) #相似度矩阵数据 wsSimFileName = 'throught/ipcc/simArrayWs-%d-%d.txt' % (sparseness, i) wsSimArrayObj = paper.createSimArray(trainArrayObj.T, simCalMethod) paper.save(wsSimArrayObj, wsSimFileName) # wsSimArrayObj = paper.load(wsSimFileName) #计算预测准确 print calMaeAndRmse(trainArrayObj, testArrayObj, wsSimArrayObj, pfEui) pfEui.close() print 'ok'
def calmaeAndNmae(euiFile, testFileName): import paper import numpy as np #cal the mae eui = np.loadtxt(euiFile) mae = np.mean(np.abs(eui)) #cal the NMAE testArrayObj = paper.loadTest(testFileName) mean = np.mean(testArrayObj[:, 2]) nmae = mae/mean return mae, nmae
def testTp(self): sparseness = 5 fileNumbers = 2 for i in range(1, fileNumbers + 1): p, q = loadPQTp(sparseness, i) pfEui = 'throught/eui/euipq-%d-%d.txt' % (sparseness, i) pf = open(pfEui, 'w') #测试测试数据 testFileName = r'throught/test%d-%d.txt' % (sparseness, i) testArrayObj = paper.loadTest(testFileName) predWithPQTp(testArrayObj, pf, p, q) pf.close()
def testTp(self): for sparseness in [5, 10, 15, 20]: for i in range(1, fileNumbers+1): #文件对象 print i, euiFileName = 'throught/weightedslopeone/euislopeone-%d-%d.txt' % (sparseness,i) pfEui = open(euiFileName, 'w') #load data trainFileName = r'throught/training%d-%d.txt' % (sparseness, i) trainArrayObj = paper.createArrayObj(trainFileName) testFileName = r'throught/test%d-%d.txt' % (sparseness, i) testArrayObj = paper.loadTest(testFileName) #计算预测准确 print calMaeAndRmse(trainArrayObj, testArrayObj, pfEui) pfEui.close() print 'ok'
def testRt(self): sparseness = 5 fileNumbers = 10 for i in range(1, fileNumbers + 1): #文件对象 print i, euiFileName = 'rt/userMean/euislopeone-%d-%d.txt' % (sparseness, i) pfEui = open(euiFileName, 'w') #load data trainFileName = r'rt/sparseness%d/training%d.txt' % (sparseness, i) trainArrayObj = paper.createArrayObj(trainFileName) testFileName = r'rt/sparseness%d/test%d.txt' % (sparseness, i) testArrayObj = paper.loadTest(testFileName) #计算预测准确 print calMaeAndRmse(trainArrayObj, testArrayObj, pfEui) pfEui.close() print 'ok'
def testRt(self): for sparseness in [5, 10, 15, 20]: for i in range(1, fileNumbers + 1): #文件对象 print i, euiFileName = 'rt/mf/euislopeone-%d-%d.txt' % (sparseness, i) pfEui = open(euiFileName, 'w') #load data trainFileName = r'rt/sparseness%d/training%d.txt' % ( sparseness, i) trainArrayObj = paper.createArrayObj(trainFileName) testFileName = r'rt/sparseness%d/test%d.txt' % (sparseness, i) testArrayObj = paper.loadTest(testFileName) #计算预测准确 p, q = learningAddIndicateFunctionlfm(trainArrayObj) maeAndRmseRt(p, q, testArrayObj, pfEui) pfEui.close() print 'ok'
def testTp(self): for sparseness in [5, 10, 15, 20]: for i in range(1, fileNumbers + 1): #文件对象 print i, euiFileName = 'throught/mf/euislopeone-%d-%d.txt' % ( sparseness, i) pfEui = open(euiFileName, 'w') #load data trainFileName = r'throught/training%d-%d.txt' % (sparseness, i) trainArrayObj = paper.createArrayObj(trainFileName) testFileName = r'throught/test%d-%d.txt' % (sparseness, i) testArrayObj = paper.loadTest(testFileName) #计算预测准确 trainArrayObj[trainArrayObj != NoneValue] = ( trainArrayObj[trainArrayObj != NoneValue] - 44.034) / 107.439 p, q = learningAddIndicateFunctionlfm(trainArrayObj) maeAndRmseTp(p, q, testArrayObj, pfEui) pfEui.close() print 'ok'
def test1(self): import numpy as np trainingArr = createArrayObj(ROOT_DIRECTORY + '/training1.txt', 339, 5825) simUserArr = np.loadtxt(ROOT_DIRECTORY + "/simUserArr.txt") simWsArr = np.loadtxt(ROOT_DIRECTORY + "/simWsArr.txt") webServerMapping = np.loadtxt(ROOT_DIRECTORY + "/webServerMapping.txt", dtype=str, delimiter="\t") maxSimUser = np.argsort(simUserArr, axis=1)[:, 0 - 10:] testArr = paper.loadTest(ROOT_DIRECTORY + '/test1.txt') #标签 pui = np.loadtxt(ROOT_DIRECTORY + "/label.txt") #标签-概率 probability = np.loadtxt(ROOT_DIRECTORY + "/probability.txt") # 取出相似度最高的k个用户 k_user = 10 k_ws = 10 result = [] fileIndex = 1 for i in xrange(1): k_user = 10 + i * 10 for j in xrange(1): k_ws = 50 + j * 10 simUser = np.argsort(simUserArr, axis=1)[:, 0 - k_user:] simWs = np.argsort(simWsArr, axis=1)[:, 0 - k_ws:] eui = [] # 用户-服务 for index, value in enumerate(testArr): uId = value[0] wsId = value[1] label = pui[index] fenzi = 0 fenmu = 0 # 相似用户-相似度 for i, otherUId in enumerate(simUser[uId]): #pow(simUserArr[uId][otherUId], 3) #simUserArr[uId][otherUId] if trainingArr[otherUId][wsId] != NoneValue and int( trainingArr[otherUId][wsId]) in label: fenzi += simUserArr[uId][otherUId] * trainingArr[ otherUId][wsId] * probability[otherUId][ np.argwhere(label == int( trainingArr[otherUId][wsId]))[0, 0]] fenmu += simUserArr[uId][otherUId] * probability[ otherUId][np.argwhere(label == int( trainingArr[otherUId][wsId]))[0, 0]] if fenmu != 0: predictValue = fenzi / fenmu r = predictValue - value[2] if r < 0: r = 0 - r eui.append(r) else: total = 0 count = 0 for i, otherUId in enumerate(maxSimUser[uId]): for m, n in enumerate(simWs[wsId]): # if webServerMapping[wsId][2] != webServerMapping[n][2] and webServerMapping[wsId][4] != webServerMapping[n][4]: # continue; #maxSimUser[uId][0] if trainingArr[otherUId][n] != NoneValue: count = count + 1 total += trainingArr[otherUId][n] if count != 0: avg = total / count if avg < 0: avg = 0 - avg eui.append(avg) print len(eui) eui = np.array(eui) mae, rmse = paper.maeAndRmse(eui) result.append("相似用户数:" + str(k_user) + "\t相似服务数:" + str(k_ws) + "\t" + "MAE:" + str(mae) + "\tRMSE:" + str(rmse)) print "相似用户数:" + str(k_user) + "\t相似服务数:" + str( k_ws) + "\t" + "MAE:" + str(mae) + "\tRMSE:" + str(rmse) # np.savetxt(ROOT_DIRECTORY + "/result.txt", result, fmt='%s', delimiter="\t") np.savetxt(ROOT_DIRECTORY + "/userEui.txt", eui, fmt='%s', delimiter="\t") # np.savetxt(ROOT_DIRECTORY + "/userEui" + str(fileIndex) + ".txt", eui, fmt='%s', delimiter="\t") fileIndex = fileIndex + 1
def loadDataSet(trainFile, testFile): import paper trainArray = paper.createArrayObj(trainFile) testObj = paper.loadTest(testFile) return trainArray, testObj
def loadData(*dataFile): import paper train = paper.loadTest(dataFile[0]) test = paper.loadTest(dataFile[1]) return train, test
if __name__ == '__main__': simCalMethod = paper.simPCC fileNumbers = 10 for sparseness in [5, 10, 15, 20]: #文件对象 for i in range(1, fileNumbers + 1): print i, # euiFileName = 'rt/upcc/euislopeone-%d-%d.txt' % (sparseness,i) euiFileName = 'throught/upcc/euislopeone-%d-%d.txt' % (sparseness, i) pfEui = open(euiFileName, 'w') #load data # trainFileName = r'rt/sparseness%d/training%d.txt' % (sparseness, i) # testFileName = r'rt/sparseness%d/test%d.txt' % (sparseness, i) #throught trainFileName = r'throught/training%d-%d.txt' % (sparseness, i) testFileName = r'throught/test%d-%d.txt' % (sparseness, i) trainArrayObj = paper.createArrayObj(trainFileName) testArrayObj = paper.loadTest(testFileName) #相似度矩阵数据 # userSimFileName = 'rt/upcc/simArrayUser-%s-%d.txt' % (sparseness,i) userSimFileName = 'throught/upcc/simArrayUser-%s-%d.txt' % ( sparseness, i) userSimArrayObj = paper.createSimArray(trainArrayObj, simCalMethod) paper.save(userSimArrayObj, userSimFileName) # userSimArrayObj = paper.load(userSimFileName) #计算预测准确 mae, rmse = calMaeAndRmse() print mae, rmse pfEui.close() print 'ok'
return pui import math import numpy as np import paper if __name__ == '__main__': NoneValue = 111111.0 userNum = 339 wsNum = 5825 for sparseness in [5]: for num in range(1, 2): #文件对象 #load train and test trainFile = r'rt/sparseness%d/training%d.txt' % (sparseness, num) trainArrayObj = paper.createArrayObj(trainFile) testFile = r'rt/sparseness%d/test%d.txt' % (sparseness, num) testArrayObj = paper.loadTest(testFile) #load train info and test info trainFileInfo = r'data/sparseness%d/training%d.txt' % (sparseness, num) trainInfoObj = paper.loadTest(trainFileInfo) testFileInfo = r'data/sparseness%d/test%d.txt' % (sparseness, num) testInfoObj = paper.loadTest(testFileInfo) #classify result yClassify = classify(trainInfoObj, testInfoObj) mae, rmse = calMaeAndRmse(trainArrayObj, testArrayObj, yClassify) print print mae, rmse print 'ok'
def test1(self): import numpy as np trainingArr = createArrayObj(ROOT_DIRECTORY + '/training1.txt', 339, 5825) simArr = createSimArray(trainingArr, paper.simMinkowskiDist, 2) webServerMapping = np.loadtxt(ROOT_DIRECTORY + "/webServerMapping.txt", dtype=str, delimiter="\t") # 取出相似度最高的k个用户 k = 10 simUser = np.argsort(simArr, axis=1)[:, 0 - k:] testArr = paper.loadTest(ROOT_DIRECTORY + '/test1.txt') pui = np.loadtxt(ROOT_DIRECTORY + "/label.txt") probability = np.loadtxt(ROOT_DIRECTORY + "/probability.txt") eui = [] # 用户-服务 for index, value in enumerate(testArr): uId = value[0] wsId = value[1] label = pui[index] fenzi = 0 fenmu = 0 # 相似用户-相似度 for i, otherUId in enumerate(simUser[uId]): if trainingArr[otherUId][wsId] != NoneValue and int( trainingArr[otherUId][wsId]) in label: fenzi += simArr[uId][otherUId] * trainingArr[otherUId][ wsId] * probability[otherUId][np.argwhere( label == int(trainingArr[otherUId][wsId]))[0, 0]] fenmu += simArr[uId][otherUId] * probability[otherUId][ np.argwhere( label == int(trainingArr[otherUId][wsId]))[0, 0]] if fenmu != 0: predictValue = fenzi / fenmu eui.append(predictValue - value[2]) # else: # if webServerMapping[wsId - 1][2] != 'not found': # ii = 2 # else: # ii = 4 # score = [ ] # for g, h in enumerate(webServerMapping): # if h[ii] == webServerMapping[wsId - 1][ii]: # # 取出和i同个as的其他服务 # score.append(int(h[0])) # total = 0 # count = 0 # for i, otherUId in enumerate(simUser[uId]): # for m, n in enumerate(score): # if trainingArr[otherUId][n] != NoneValue: # count = count + 1 # total += trainingArr[otherUId][n] # if count != 0: # avg = total / count; # eui.append(avg) print len(eui) eui = np.array(eui) mae, rmse = paper.maeAndRmse(eui) print "MAE:" + str(mae) + "\tRMSE:" + str(rmse)