def frequencyOnLabels(trainFile, modify, flag, userNum=339, wsNum=5825, sparess=2): import numpy as np from paper.rendi2 import loadDataset as ld trainData = ld.loadTestList(trainFile, modify=modify) if trainData.shape[1] == 4: trainData = trainData[:, 1:] userFreq = np.zeros((userNum, 21)) wsFreq = np.zeros((wsNum, 21)) for index, line in enumerate(trainData): if flag == True and index % sparess != lineNum: continue user, ws, tui = line user = int(user) ws = int(ws) labelIndex = int(tui) + 1 #labelIndex i means label i-1 userFreq[user, labelIndex] += 1 wsFreq[ws, labelIndex] += 1 return userFreq, wsFreq, trainData
def doMapping(trainFile, testFile, modify=True, flag=True, sparess=10): global userIps # import numpy as np from paper.rendi2 import loadDataset as ld userFreq, wsFreq, trainData = frequencyOnLabels(trainFile, modify=modify, flag=flag, sparess=sparess) #trainFeature result = [] for index, line in enumerate(trainData): temp = [] user, ws, tui = line user = int(user) ws = int(ws) temp.extend(userFreq[user]) temp.extend(userIps[user]) temp.extend(wsFreq[ws]) temp.append(tui) result.append(temp) trainFeature = np.array(result, dtype=float) #testFeature result = [] testData = ld.loadTestList(testFile, modify=modify) for index, line in enumerate(testData): temp = [] user, ws, tui = line user = int(user) ws = int(ws) temp.extend(userFreq[user]) temp.extend(userIps[user]) temp.extend(wsFreq[ws]) temp.append(tui) result.append(temp) testFeature = np.array(result, dtype=float) return trainFeature, testFeature
import time from paper.rendi2 import cfPredictByDBSCAN from paper.rendi2 import cfPredictByEuclid start = time.time() for sparess in [5, 10, 15, 20]: for fileNum in range(1, 11): print sparess, fileNum, '\t', #load the qos dataset trainFile = "dataset/rendi2/train/sparseness%s/training%d.txt" % ( sparess, fileNum) simFile1 = "dataset/rendi2/train/sparseness%s/training%deuSimMatrix" % ( sparess, fileNum) simFile2 = "dataset/rendi2/train/sparseness%s/training%dcooSimMatrix" % ( sparess, fileNum) trainArray = ld.loadArrayObj(trainFile) trainData = ld.loadTestList(trainFile) simArrayEu = np.loadtxt(simFile1) simArrayCoo = np.loadtxt(simFile2) #load the prob dataset kProb = 1 trainProb = "dataset/rendi2/train/sparseness%s/training%d-prob" % ( sparess, fileNum) trainProbData = np.loadtxt(trainProb) labels = np.argsort(trainProbData, axis=1)[:, -kProb:] - 1 #top-k labels labelsProb = np.sort(trainProbData, axis=1)[:, -kProb:] #top-k labels's #cal eui euiProb = 0.0 count = 0.0 pui = []
if __name__ == "__main__": from paper.rendi2 import loadDataset as ld import numpy as np import time start = time.time() for sparess in [5, 10, 15, 20]: for fileNum in range(1, 11): #load the qos dataset trainFile = "dataset/rendi2/train/sparseness%s/training%d.txt" % ( sparess, fileNum) testFile = "dataset/rendi2/test/sparseness%s/test%d.txt" % ( sparess, fileNum) simFile = "dataset/rendi2/train/sparseness%s/training%deuSimMatrix" % ( sparess, fileNum) trainArray = ld.loadArrayObj(trainFile) testData = ld.loadTestList(testFile) simArray = np.loadtxt(simFile) #load the prob dataset kProb = 1 testProb = "dataset/rendi2/test/sparseness%s/test%d-prob" % ( sparess, fileNum) testProbData = np.loadtxt(testProb) labels = np.argsort(testProbData, axis=1)[:, -kProb:] - 1 #top-k labels labelsProb = np.sort(testProbData, axis=1)[:, -kProb:] #top-k labels's #cal eui euiProb = 0.0 count = 0.0 pui = [] k = 2
@author: root """ from paper.rendi2 import loadDataset as ld from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor, RandomForestRegressor, RandomForestClassifier from sklearn.model_selection import GridSearchCV from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.naive_bayes import GaussianNB import numpy as np import time start = time.time() #load the prob dataset trainProb = "dataset/rendi2/training-0501-after-prob" testProb = "dataset/rendi2/test-0501-after-prob" x = ld.loadTestList(trainProb) testX = ld.loadTestList(testProb) #load the qos dataset train = "dataset/rendi2/training1.txt" test = "dataset/rendi2/test1.txt" y = ld.loadTestList(train)[:, -1] testY = ld.loadTestList(test)[:, -1] clf = RandomForestRegressor(oob_score=True, n_jobs=20, n_estimators=100, max_features=0.1, min_samples_split=10) clf.fit(x, y.astype(float))
return None return (np.mat(rating) * np.mat(numbers).T)[0, 0] / sums if __name__ == '__main__': import time from paper.rendi2 import loadDataset as ld import numpy as np start = time.time() NoneValue = 111111 userNum = 339 wsNum = 5825 for sparess in [5]: for fileNum in range(1, 2): # sampleTrainFile = "dataset/rendi2/sample/training-%d-%d" % (sparess, fileNum) trainFile = "dataset/rendi2/train/sparseness%s/training%d.txt" % ( sparess, fileNum) testFile = "dataset/rendi2/puiAnalyze/puiAnalyze-%d-%d" % (sparess, fileNum) simFile = "dataset/rendi2/train/sparseness%s/training%deuSimMatrix" % ( sparess, fileNum) simArray = np.loadtxt(simFile) trainArray = ld.loadArrayObj(trainFile, modify=True) testData = ld.loadTestList(testFile, modify=False) calMaeAndRmse(trainArray, testData, simArray) #end modeling print "during time ... ", time.time() - start