Exemplo n.º 1
0
def frequencyOnLabels(trainFile, modify, flag, userNum=339, wsNum=5825,  sparess=2):
    import numpy as np
    from paper.rendi2 import loadDataset as ld
    trainData = ld.loadTestList(trainFile, modify=modify)
    if trainData.shape[1] == 4:
        trainData = trainData[:, 1:]
    userFreq = np.zeros((userNum, 21))
    wsFreq = np.zeros((wsNum, 21))
    for index, line in enumerate(trainData):
        if flag == True and index % sparess != lineNum:
            continue
        user, ws, tui = line
        user = int(user)
        ws = int(ws)
        labelIndex = int(tui) + 1 #labelIndex i means label i-1
        userFreq[user, labelIndex] += 1
        wsFreq[ws, labelIndex] += 1
    return userFreq, wsFreq, trainData            
Exemplo n.º 2
0
def doMapping(trainFile, testFile, modify=True, flag=True, sparess=10):
    global userIps

    #    import numpy as np
    from paper.rendi2 import loadDataset as ld
    userFreq, wsFreq, trainData = frequencyOnLabels(trainFile,
                                                    modify=modify,
                                                    flag=flag,
                                                    sparess=sparess)
    #trainFeature
    result = []
    for index, line in enumerate(trainData):
        temp = []
        user, ws, tui = line
        user = int(user)
        ws = int(ws)
        temp.extend(userFreq[user])
        temp.extend(userIps[user])
        temp.extend(wsFreq[ws])
        temp.append(tui)
        result.append(temp)
    trainFeature = np.array(result, dtype=float)
    #testFeature
    result = []
    testData = ld.loadTestList(testFile, modify=modify)
    for index, line in enumerate(testData):
        temp = []
        user, ws, tui = line
        user = int(user)
        ws = int(ws)
        temp.extend(userFreq[user])
        temp.extend(userIps[user])
        temp.extend(wsFreq[ws])
        temp.append(tui)
        result.append(temp)
    testFeature = np.array(result, dtype=float)
    return trainFeature, testFeature
Exemplo n.º 3
0
 import time
 from paper.rendi2 import cfPredictByDBSCAN
 from paper.rendi2 import cfPredictByEuclid
 start = time.time()
 for sparess in [5, 10, 15, 20]:
     for fileNum in range(1, 11):
         print sparess, fileNum, '\t',
         #load the qos dataset
         trainFile = "dataset/rendi2/train/sparseness%s/training%d.txt" % (
             sparess, fileNum)
         simFile1 = "dataset/rendi2/train/sparseness%s/training%deuSimMatrix" % (
             sparess, fileNum)
         simFile2 = "dataset/rendi2/train/sparseness%s/training%dcooSimMatrix" % (
             sparess, fileNum)
         trainArray = ld.loadArrayObj(trainFile)
         trainData = ld.loadTestList(trainFile)
         simArrayEu = np.loadtxt(simFile1)
         simArrayCoo = np.loadtxt(simFile2)
         #load the prob dataset
         kProb = 1
         trainProb = "dataset/rendi2/train/sparseness%s/training%d-prob" % (
             sparess, fileNum)
         trainProbData = np.loadtxt(trainProb)
         labels = np.argsort(trainProbData,
                             axis=1)[:, -kProb:] - 1  #top-k labels
         labelsProb = np.sort(trainProbData,
                              axis=1)[:, -kProb:]  #top-k labels's
         #cal eui
         euiProb = 0.0
         count = 0.0
         pui = []
Exemplo n.º 4
0
if __name__ == "__main__":
    from paper.rendi2 import loadDataset as ld
    import numpy as np
    import time
    start = time.time()
    for sparess in [5, 10, 15, 20]:
        for fileNum in range(1, 11):
            #load the qos dataset
            trainFile = "dataset/rendi2/train/sparseness%s/training%d.txt" % (
                sparess, fileNum)
            testFile = "dataset/rendi2/test/sparseness%s/test%d.txt" % (
                sparess, fileNum)
            simFile = "dataset/rendi2/train/sparseness%s/training%deuSimMatrix" % (
                sparess, fileNum)
            trainArray = ld.loadArrayObj(trainFile)
            testData = ld.loadTestList(testFile)
            simArray = np.loadtxt(simFile)
            #load the prob dataset
            kProb = 1
            testProb = "dataset/rendi2/test/sparseness%s/test%d-prob" % (
                sparess, fileNum)
            testProbData = np.loadtxt(testProb)
            labels = np.argsort(testProbData,
                                axis=1)[:, -kProb:] - 1  #top-k labels
            labelsProb = np.sort(testProbData,
                                 axis=1)[:, -kProb:]  #top-k labels's
            #cal eui
            euiProb = 0.0
            count = 0.0
            pui = []
            k = 2
Exemplo n.º 5
0
@author: root
"""

from paper.rendi2 import loadDataset as ld
from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor, RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.naive_bayes import GaussianNB
import numpy as np
import time
start = time.time()

#load the prob dataset
trainProb = "dataset/rendi2/training-0501-after-prob"
testProb = "dataset/rendi2/test-0501-after-prob"
x = ld.loadTestList(trainProb)
testX = ld.loadTestList(testProb)

#load the qos dataset
train = "dataset/rendi2/training1.txt"
test = "dataset/rendi2/test1.txt"
y = ld.loadTestList(train)[:, -1]
testY = ld.loadTestList(test)[:, -1]

clf = RandomForestRegressor(oob_score=True,
                            n_jobs=20,
                            n_estimators=100,
                            max_features=0.1,
                            min_samples_split=10)
clf.fit(x, y.astype(float))
Exemplo n.º 6
0
        return None
    return (np.mat(rating) * np.mat(numbers).T)[0, 0] / sums


if __name__ == '__main__':
    import time
    from paper.rendi2 import loadDataset as ld
    import numpy as np
    start = time.time()
    NoneValue = 111111
    userNum = 339
    wsNum = 5825
    for sparess in [5]:
        for fileNum in range(1, 2):
            #            sampleTrainFile = "dataset/rendi2/sample/training-%d-%d" % (sparess, fileNum)
            trainFile = "dataset/rendi2/train/sparseness%s/training%d.txt" % (
                sparess, fileNum)
            testFile = "dataset/rendi2/puiAnalyze/puiAnalyze-%d-%d" % (sparess,
                                                                       fileNum)
            simFile = "dataset/rendi2/train/sparseness%s/training%deuSimMatrix" % (
                sparess, fileNum)
            simArray = np.loadtxt(simFile)

            trainArray = ld.loadArrayObj(trainFile, modify=True)
            testData = ld.loadTestList(testFile, modify=False)

            calMaeAndRmse(trainArray, testData, simArray)

    #end modeling
    print "during time ... ", time.time() - start