Пример #1
0
    #            X_train, X_test = seqRowOfLabelData[train_index], seqRowOfLabelData[test_index]
    #            y_train, y_test = labelVec[train_index], labelVec[test_index]
    #            #KNN
    ##            neigh = KNeighborsClassifier(n_neighbors=7, metric = 'cosine', weights = 'distance')
    #            clf.fit(X_train, y_train)
    #            labelPredict = clf.predict(X_test)
    #            tmpAccuracyScoreList2.append(accuracy_score(y_test,labelPredict))
    #        sumarizeAccuracy2.append(np.average(tmpAccuracyScoreList2))
    ##

    for k_KNN_feature_selection in range(5, 50):  #k neightbor seq in knn graph
        print k_KNN_feature_selection
        fileName = "..//outputfile//" + version + "//" + testVer + "//knnGraph_" + str(
            k_KNN_feature_selection) + ".npy"
        sortedLaplaFeatureIndexes = np.array(
            algFile.loadSortedLaplaFeatureIndexes(fileName))
        #        sortedFeatureRowMat = np.array([featureRowMatrix[index] for index in sortedLaplaFeatureIndexes])
        sortedFeatureRowMat = np.array(
            featureRowMatrix)[sortedLaplaFeatureIndexes]
        sumarizeAccuracy = []
        sumarizeAccuracy2 = []

        #Laplacian score
        for numOfFeature in kFeatures:
            print numOfFeature
            # build seqRowMat
            seqRowMatrix = np.transpose(sortedFeatureRowMat[:numOfFeature])
            seqRowOfLabelData = seqRowMatrix[:numOfLabelData]

            #        #build classifier
            #        neigh = KNeighborsClassifier(n_neighbors=11, metric = 'cosine', weights = 'distance')
Пример #2
0
        for train_index, test_index in loo.split(seqRowOfLabelData):    
            X_train, X_test = seqRowOfLabelData[train_index], seqRowOfLabelData[test_index]
            y_train, y_test = labelVec[train_index], labelVec[test_index]
            #KNN
            neigh = KNeighborsClassifier(n_neighbors=9, metric = 'cosine', weights = 'distance')
            neigh.fit(X_train, y_train)
            labelPredict = neigh.predict(X_test)     
            tmpAccuracyScoreList2.append(accuracy_score(y_test,labelPredict))
        sumarizeAccuracy2.append(np.average(tmpAccuracyScoreList2))
        
        
        
    for k_KNN_Graph_Preprocess in range(129,130): #k neightbor seq in knn graph
        print k_KNN_Graph_Preprocess
        fileName = "..//outputfile//"+version+ "//"+testVer+"//knnGraph_"+str(k_KNN_Graph_Preprocess) + ".npy"
        sortedLaplaFeatureIndexes = np.array(algFile.loadSortedLaplaFeatureIndexes(fileName))
#        sortedFeatureRowMat = np.array([featureRowMatrix[index] for index in sortedLaplaFeatureIndexes])
        sortedFeatureRowMat = np.array(featureRowMatrix)[sortedLaplaFeatureIndexes]
        sumarizeAccuracy = []

        #Laplacian score
        for numOfFeature in kFeatures:
            # get seqRowMat
            seqRowMatrix = np.transpose(sortedFeatureRowMat[:numOfFeature])       
            seqRowOfLabelData = seqRowMatrix[:numOfLabelData]
            
            #leave one out
            
            tmpAccuracyScoreList = []
            
            for train_index, test_index in loo.split(seqRowOfLabelData):    
Пример #3
0
from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt

if __name__ == '__main__':
    startTime = datetime.now()
    fileh = tables.open_file("../outputfile/encodingFile.h5", mode="r")        
    featureRowMatrix = fileh.root.featureRowMatrix
    sizeOfSeqList = featureRowMatrix.shape[1]
    labelDict = algFile.readLabelDictFromFile()
    labelVec = np.array(classifierFile.convertLabelDict2List(labelDict))
    numOfLabelData = len(labelVec)
    kFeatures = range(1, 175)
    accuracyScoreList = []
    accuracyScoreList2 = []
    sortedLaplaFeatureIndexes = algFile.loadSortedLaplaFeatureIndexes()
    sortedFeatureRowMat = np.array([featureRowMatrix[index] for index in sortedLaplaFeatureIndexes])
    sumarizeAccuracy = []
    sumarizeAccuracy2 = []
    
    #variance
    seqRowMatrixFull = algFile.readSeqRowMatFromFile() 
    sel = VarianceThreshold(threshold=(.8 * (1 - .8)))
    newSeqRowMatrix = sel.fit_transform(seqRowMatrixFull)
    newSeqRowOfLabelData = newSeqRowMatrix[:numOfLabelData]
    
    
    kf = KFold(n_splits=20)
    #Laplacian score
    for numOfFeature in kFeatures:
        print numOfFeature
Пример #4
0
from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt

if __name__ == '__main__':
    startTime = datetime.now()
    fileh = tables.open_file("../outputfile/encodingFile.h5", mode="r")
    featureRowMatrix = fileh.root.featureRowMatrix
    sizeOfSeqList = featureRowMatrix.shape[1]
    labelDict = algFile.readLabelDictFromFile()
    labelVec = np.array(classifierFile.convertLabelDict2List(labelDict))
    numOfLabelData = len(labelVec)
    kFeatures = range(1, 200)
    accuracyScoreList = []
    accuracyScoreList2 = []
    sortedLaplaFeatureIndexes = algFile.loadSortedLaplaFeatureIndexes(
        "3_3.npy")
    sortedFeatureRowMat = np.array(
        [featureRowMatrix[index] for index in sortedLaplaFeatureIndexes])
    sumarizeAccuracy = []
    sumarizeAccuracy2 = []

    #variance
    seqRowMatrixFull = algFile.readSeqRowMatFromFile()
    #    sel = VarianceThreshold(threshold=(.6 * (1 - .6)))
    sel = VarianceThreshold(threshold=0.2)
    newSeqRowMatrix = sel.fit_transform(seqRowMatrixFull)
    newSeqRowOfLabelData = newSeqRowMatrix[:numOfLabelData]

    loo = LeaveOneOut()
    #Laplacian score
    for numOfFeature in kFeatures: