def colicTest(trainPath, testPath): # read data from training dataset and test dataset trainingInputs, trainingLabels = util.loadDataSet(trainPath) testInputs, testLabels = util.loadDataSet(testPath) logRegModel = HorseLogReg(0.01, 500) trainTheta = logRegModel.fit(trainingInputs, trainingLabels) errorCount = 0 numTestVec = len(testLabels) for i in range(numTestVec): if int(util.classifyVector(testInputs[i], trainTheta)) != testLabels[i]: errorCount += 1 errorRate = float(errorCount) / float(numTestVec) print("the error rate of this test is: %f" % errorRate) return errorRate
kNN = KNeighborsClassifier() kNN.fit(x, y) #test testX = testingSet[:, :-1] testY = testingSet[:, -1] score = kNN.score(testX, testY) print("KNN score is : ", score) if __name__ == "__main__": np.set_printoptions(threshold=np.nan) trainingSet = util.loadDataSet(constants.CONST_TRAINING_FILENAME) testingSet = util.loadDataSet(constants.CONST_TESTING_FILENAME) features = util.loadDataSet(constants.CONST_FEATURE_FILENAME) #print(features) sklearnDt(trainingSet, features, testingSet) sklearnSVC(trainingSet, testingSet) sklearnKNN(trainingSet, testingSet) #calcEntropy(trainingSet)
for cluster in clusters: vecR = cluster[1] for i in range(2, len(cluster)): vecR = list(map(lambda x: x[0] + x[1], zip(vecR, cluster[i]))) vec = [a / (len(cluster) - 1) for a in vecR] Z.append(vec) if vec != cluster[0]: isEnd = False count += 1 print("第{}轮分类,是否结束:{},聚类中心:{}".format(count, isEnd, Z)) if isEnd: break return clusteri, clusters if __name__ == "__main__": #dataSet = [[0,0],[1,0],[0,1],[1,1],[2,1],[1,2],[2,2],[3,2],[6,6],[7,6],[8,6],[6,7],[7,7],[8,7],[9,7],[7,8],[8,8],[9,8],[8,9],[9,9]] dataSet = util.loadDataSet("EEG_feature.txt") labels = util.loadDataSet("valence_arousal_label.txt") data = util.loadDataSet("EEG_pca_feature.txt") Z = [dataSet[0], dataSet[1], dataSet[2], dataSet[7]] K = 4 clusteri, clusters = kMeans(dataSet, K, Z) util.plotFeature(data, clusteri) # for i in range(len(clusteri)): # print("分类:{:d}".format(i)) # for inx in clusteri[i]: # print("编号:{:d},标签:{}".format(inx,labels[inx]))
ax = plt.axes() ax.set_xticks(pos + (width / 2)) # center the ticks ax.set_xticklabels(X_labels) plt.bar(pos, frequencies, width, color='r') plt.show() # TODO the histogram of predictions that predicted too many tags def testSVM(X_train, Y_train, testingSet): print "TESTING SVM" classifier = multiLabelClassifier(X_train, Y_train) X_test, Y_test = mergeTitlesAndBodies(testingSet) print "Parsed the testing data" classifier.fit(X_train, Y_train) print "Fit the training data" predicted = classifier.predict(X_test) #printPrediction(X_test, predicted) return predicted if __name__ == '__main__': trainingSet = util.loadDataSet('out_1000_0') testingSet = util.loadDataSet('out_2000_2000_0') #trainingSet = util.loadDataSet('out0') #testingSet = util.loadDataSet('out1') #my_nb = nb.NaiveBayes(trainingSet, 100) #my_nb.train() #my_nb.test(testingSet) my_base = bp.BaselinePredictor(trainingSet, 100) my_base.train() my_base.test(testingSet)
#coding=utf-8 import numpy as np from sklearn.decomposition import PCA import util dataSet = util.loadDataSet("EEG_feature.txt") pca = PCA(n_components=2) #降到2维 pca.fit(dataSet) #训练 newX = pca.fit_transform(dataSet) #降维后的数据 # PCA(copy=True, n_components=2, whiten=False) # print(pca.explained_variance_ratio_) #输出贡献率 for a in newX: print('{}\t{}'.format(a[0], a[1]))
def formatData(str): #load features from file features = util.loadDataSet(constants.CONST_FEATURE_FILENAME) return util.formatDescToArray(util.preprocessDesc(str), features)