예제 #1
0
def colicTest(trainPath, testPath):
    # read data from training dataset and test dataset
    trainingInputs, trainingLabels = util.loadDataSet(trainPath)
    testInputs, testLabels = util.loadDataSet(testPath)

    logRegModel = HorseLogReg(0.01, 500)
    trainTheta = logRegModel.fit(trainingInputs, trainingLabels)
    errorCount = 0
    numTestVec = len(testLabels)
    for i in range(numTestVec):
        if int(util.classifyVector(testInputs[i],
                                   trainTheta)) != testLabels[i]:
            errorCount += 1
    errorRate = float(errorCount) / float(numTestVec)
    print("the error rate of this test is: %f" % errorRate)
    return errorRate
예제 #2
0
    kNN = KNeighborsClassifier()
    kNN.fit(x, y)

    #test
    testX = testingSet[:, :-1]
    testY = testingSet[:, -1]
    score = kNN.score(testX, testY)
    print("KNN score is : ", score)


if __name__ == "__main__":

    np.set_printoptions(threshold=np.nan)

    trainingSet = util.loadDataSet(constants.CONST_TRAINING_FILENAME)

    testingSet = util.loadDataSet(constants.CONST_TESTING_FILENAME)

    features = util.loadDataSet(constants.CONST_FEATURE_FILENAME)

    #print(features)

    sklearnDt(trainingSet, features, testingSet)

    sklearnSVC(trainingSet, testingSet)

    sklearnKNN(trainingSet, testingSet)

    #calcEntropy(trainingSet)
예제 #3
0
        for cluster in clusters:
            vecR = cluster[1]
            for i in range(2, len(cluster)):
                vecR = list(map(lambda x: x[0] + x[1], zip(vecR, cluster[i])))
            vec = [a / (len(cluster) - 1) for a in vecR]
            Z.append(vec)
            if vec != cluster[0]:
                isEnd = False
        count += 1
        print("第{}轮分类,是否结束:{},聚类中心:{}".format(count, isEnd, Z))
        if isEnd:
            break
    return clusteri, clusters


if __name__ == "__main__":
    #dataSet = [[0,0],[1,0],[0,1],[1,1],[2,1],[1,2],[2,2],[3,2],[6,6],[7,6],[8,6],[6,7],[7,7],[8,7],[9,7],[7,8],[8,8],[9,8],[8,9],[9,9]]
    dataSet = util.loadDataSet("EEG_feature.txt")
    labels = util.loadDataSet("valence_arousal_label.txt")
    data = util.loadDataSet("EEG_pca_feature.txt")
    Z = [dataSet[0], dataSet[1], dataSet[2], dataSet[7]]
    K = 4

    clusteri, clusters = kMeans(dataSet, K, Z)

    util.plotFeature(data, clusteri)
    # for i in range(len(clusteri)):
    #     print("分类:{:d}".format(i))
    #     for inx in clusteri[i]:
    #         print("编号:{:d},标签:{}".format(inx,labels[inx]))
예제 #4
0
    ax = plt.axes()
    ax.set_xticks(pos + (width / 2)) # center the ticks
    ax.set_xticklabels(X_labels)
    plt.bar(pos, frequencies, width, color='r')
    plt.show()
    # TODO the histogram of predictions that predicted too many tags

def testSVM(X_train, Y_train, testingSet):
    print "TESTING SVM"
    classifier = multiLabelClassifier(X_train, Y_train)
    X_test, Y_test = mergeTitlesAndBodies(testingSet)
    print "Parsed the testing data"
    classifier.fit(X_train, Y_train)
    print "Fit the training data"
    predicted = classifier.predict(X_test)
    #printPrediction(X_test, predicted)
    return predicted

if __name__ == '__main__':
    trainingSet = util.loadDataSet('out_1000_0')
    testingSet = util.loadDataSet('out_2000_2000_0')
    #trainingSet = util.loadDataSet('out0')
    #testingSet = util.loadDataSet('out1')
    #my_nb = nb.NaiveBayes(trainingSet, 100)

    #my_nb.train()
    #my_nb.test(testingSet)
    my_base = bp.BaselinePredictor(trainingSet, 100)
    my_base.train()
    my_base.test(testingSet)
예제 #5
0
파일: pca.py 프로젝트: NoneNoneMage/cluster
#coding=utf-8
import numpy as np
from sklearn.decomposition import PCA
import util

dataSet = util.loadDataSet("EEG_feature.txt")
pca = PCA(n_components=2)  #降到2维
pca.fit(dataSet)  #训练
newX = pca.fit_transform(dataSet)  #降维后的数据
# PCA(copy=True, n_components=2, whiten=False)
# print(pca.explained_variance_ratio_)  #输出贡献率
for a in newX:
    print('{}\t{}'.format(a[0], a[1]))
예제 #6
0
파일: predictor.py 프로젝트: burnyxu/aurora
def formatData(str):
	#load features from file
	features = util.loadDataSet(constants.CONST_FEATURE_FILENAME)
	
	return util.formatDescToArray(util.preprocessDesc(str), features)