def handwritingClassTest(): hwLabels = [] trainingFileList = listdir('trainingDigits') m = len(trainingFileList) trainingMat = zeros((m,1024)) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i,:] = kNN.img2vector('trainingDigits/%s' % fileNameStr) testFileList = listdir('testDigits') errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = kNN.img2vector('testDigits/%s' % fileNameStr) classifierResult = kNN.classify0(vectorUnderTest, \ trainingMat, hwLabels, 3) print("the classifier came back with: {0}, the real answer is: {1}".format(classifierResult, classNumStr)) if (classifierResult != classNumStr): errorCount += 1.0 print("\nthe total number of errors is: {0}".format(errorCount)) print("\nthe total error rate is: {0}".format(errorCount/float(mTest)))
def mainTest(): hwLabels = [] trainingFileList = listdir('trainingDigits') m = len(trainingFileList) trainingMat = np.zeros((m, 1024)) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i, :] = img2vector('trainingDigits/%s' % fileNameStr) # build the kdtree print "start to build kd tree ..." print hwLabels kd_tree = KDNode() kd_tree = createKDTree(kd_tree, trainingMat, hwLabels) print "kd tree finished" #print_kdtree(kd_tree) #exit(0) testFileList = listdir('testDigits') errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = img2vector('testDigits/%s' % fileNameStr) classifierResult = KNN(kd_tree, 5, vectorUnderTest[0]) print "the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr) if (classifierResult != classNumStr): errorCount += 1.0 print "\nthe total number of errors is: %d" % errorCount print "\nthe total error rate is: %f" % (errorCount / float(mTest))
def testKNN3(self): hwLabels = [] trainingFileList = os.listdir('trainingDigits') m = len(trainingFileList) trainingMat = numpy.zeros((m,1024)) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] #take off .txt classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i,:] = kNN.img2vector('trainingDigits/%s' % fileNameStr) testFileList = os.listdir('testDigits') fileNameStr = testFileList[0] fileStr = fileNameStr.split('.')[0] #take off .txt classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = kNN.img2vector('testDigits/%s' % fileNameStr) c = kNN.classify0(vectorUnderTest, trainingMat, hwLabels, 3) self.assertEqual(c, 0)
def img2vectorTest(): testVector = kNN.img2vector('testDigits/0_13.txt') print(testVector[0, 0:32])
line = line.strip() print line line = line.split() print line print zero[1, ] ##########Ch02########### datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt') if (False): print datingDataMat print datingLabels[0:20] fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2], 15.0 * array(datingLabels), 15.0 * array(datingLabels)) plt.show() normMat, ranges, minVals = kNN.autoNorm(datingDataMat) print normMat print ranges print minVals kNN.datingClassTest() kNN.classifyPerson() testVector = kNN.img2vector('digits/testDigits/0_13.txt') print testVector[0, 0:31] print testVector[0, 32:63] kNN.handwritingClassTest()
from numpy import * from os import listdir # kNN.handwritingClassTest() hwLabels = [] trainingFileList = listdir('trainingDigits') # 加载训练数据 m = len(trainingFileList) trainingMat = zeros((m, 1024)) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] #take off .txt classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i, :] = kNN.img2vector('trainingDigits/%s' % fileNameStr) testFileList = listdir('testDigits') #iterate through the test set errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split('.')[0] #take off .txt classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = kNN.img2vector('testDigits/%s' % fileNameStr) classifierResult = kNN.classify0(vectorUnderTest, trainingMat, hwLabels, 3) print("the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr)) if (classifierResult != classNumStr): errorCount += 1.0
ax = fig.add_subplot(111) ax.scatter(datamat[:,1], datamat[:,2], 15.0*array(labels), 15.0*array(labels)) plt.show() ''' normmat, ranges, minvals = kNN.autoNorm(datamat) print(normmat) print(ranges) print(minvals) # kNN.datingClassTest(0.2,7) def classifyperson(): result = ['not at all', 'small doses', 'large dose'] ffmiles = float(input('frequent filter miles earned per year:')) gametimepercent = float(input('% of time spent on game:')) icecream = float(input('liters of ice cream consumed per year:')) datamat, labels = kNN.file2matrix('datingTestSet.txt') normmat, ranges, minvals = kNN.autoNorm(datamat) inarry = (array([ffmiles, gametimepercent, icecream]) - minvals) / ranges classifyresult = kNN.classify0(inarry, normmat, labels, 3) print("you like this person:", result[classifyresult - 1]) return # classifyperson() testvector = kNN.img2vector('testDigits/0_13.txt') kNN.handwritingClassTest()
#!/usr/bin/python # -*- coding:utf-8 -*- import kNN ''' 我们用来测试数据k-紧邻算法 ''' ''' 从文件中拿数据 ''' reload(kNN) datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt') print datingDataMat normMat, ranges, minVals = kNN.autoNorm((datingDataMat)) print normMat kNN.datingClassTest() print normMat testvector = kNN.img2vector('0_13.txt') print testvector[0, 0:31]
import kNN from numpy import * import operator # 2.2 import matplotlib import matplotlib.pyplot as plt # 2.3 from os import listdir group, labels = kNN.createDataSet() datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt') normMat, ranges, minVals = kNN.autoNorm(datingDataMat) testVector = kNN.img2vector( 'machinelearninginaction/Ch02/digits/testDigits/0_0.txt') # print(group) # print(labels) # print(datingDataMat) # print("=====I'm split line.==========") # print(datingLabels[0:20]) # print(normMat) # print(ranges) # print("=====I'm split line.==========") # print(minVals) # print("=====I'm split line.==========") print(testVector) # 2.1
#coding:utf-8 import kNN testVector = kNN.img2vector('testDigits/0_13.txt') print testVector[0, 0:31] print testVector[0, 32:63] kNN.handwritingClassTest()
import kNN group, labels = kNN.create_dataset() print(kNN.classify0([0, 0], group, labels, 3)) dating_data_mat, dating_labels = kNN.file2matrix('./CH02/datingTestSet.txt') norm_mat, ranges, min_values = kNN.auto_norm(dating_data_mat) kNN.dating_class_test('./CH02/datingTestSet.txt') kNN.classify_person('./CH02/datingTestSet2.txt') test_vec = kNN.img2vector('./CH02/digits/testDigits/0_13.txt') kNN.handwriting_class_test('./CH02/digits/trainingDigits', './CH02/digits/testDigits')
# for i in range(numTestVecs): # classifierResult = kNN.classify0(normanDataSet[i, :], normanDataSet[numTestVecs:, :], datingLabels[numTestVecs:], 3) # # print(classifierResult) # if (classifierResult != datingLabels[i]) : errorCount += 1 # # print("错误率为:%f" % (errorCount/(m*hoRatio))) # persentTags = input('测试') # print(persentTags) # 从文件名解析分类数字(训练集),将特征载入矩阵,标签存入列表 trainingFileList = os.listdir('trainingDigits') m = len(trainingFileList) trainingMat = np.zeros((m, 1024)) hwLabels = [] for i in range(m): classNumStr = int(trainingFileList[i].split('.')[0].split('_')[0]) trainingMat[i, :] = kNN.img2vector('trainingDigits/%s' % trainingFileList[i]) hwLabels.append(classNumStr) # 从文件名解析分类数字(测试集) testFileList = os.listdir('testDigits') mTest = len(testFileList) testMat = np.zeros((m, 1024)) errorCount = 0 for i in range(mTest): classNumStr = int(testFileList[i].split('.')[0].split('_')[0]) vectorUnderTest = kNN.img2vector('testDigits/%s' % testFileList[i]) classifierResult = kNN.classify0(vectorUnderTest, trainingMat, hwLabels, 3) if (classNumStr != classifierResult): errorCount += 1 print('错误率为:%f' % (errorCount / mTest))
ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2]) plt.show() fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2], 15.0 * np.array(datingLabels), 15.0 * np.array(datingLabels)) plt.show() fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datingDataMat[:, 0], datingDataMat[:, 1], 15.0 * np.array(datingLabels), 15.0 * np.array(datingLabels)) plt.show() normMat, ranges, minVals = kNN.autoNorm(datingDataMat) normMat ranges minVals # 调用标准化函数,把数据集处理 #调用分类器 kNN.datingClassTest() # 调用函数,把图像信息转化成向量 tectVector = kNN.img2vector('testDights/0_13.txt') tectVector # 调用函数预测图像中出现的手写字 kNN.handwritingClassTest()
#2.2.3 准备数据:归一化数值 normMat, ranges, minVals = kNN.autoNorm(datingDataMat) print 'normMat:',normMat print 'ranges:',ranges print 'minVals:',minVals #2.2.4 测试算法:作为完整程序验证分类器 kNN.datingClassTest() #2.2.5 使用算法:构建完整可用系统 kNN.classifyPerson() #2.3.1准备数据:将图像转化为测试向量 testVector=kNN.img2vector(homedir+'testDigits/0_13.txt') print 'testVector[0,0:31]:',testVector[0,0:31] #2.3.2 测试算法:使用k近邻算法识别手写数字 kNN. handwritingClassTest() # inX=[0,0] # dataSet=group # k=3 # # dataSetSize = dataSet.shape[0] # print 'dataSetSize:',dataSetSize # diffMat = tile(inX, (dataSetSize, 1)) - dataSet # print 'diffMat:',diffMat # sqDiffMat = diffMat ** 2 # print 'sqDiffMat:',sqDiffMat
datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt') print(datingDataMat) print(datingLabels[0:20]) # scatter plot fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datingDataMat[:, 0], datingDataMat[:, 1], 15.0 * np.array(datingLabels), 15.0 * np.array(datingLabels)) plt.show() # normalization normMat, ranges, minVals = kNN.autoNorm(datingDataMat) print(normMat) print(ranges) print(minVals) # test error rate kNN.datingClassTest() # predict kNN.classifyPerson() # handwriting nums recognition # load daata testVector = kNN.img2vector('dataset/testDigits/0_13.txt') print(testVector[0, 0:31]) # handwriting class test kNN.handwritingClassTest()
def main7(): ''' 手写数字识别系统 ''' testVector = kNN.img2vector('testDigits/0_0.txt') kNN.handwritingClassTest()
#coding=utf-8 from numpy import * import kNN reload(kNN) testVector = kNN.img2vector('testDigits/0_13.txt') #读取文件数据 print(testVector[0, 0:31]) print(testVector[0, 32:63])
# coding: gbk """ This is a test for recognition of handwritten numbers with kNN """ import kNN ################################################################################ # # # 手写数字识别 # # # ################################################################################ # 测试函数 #testVector = kNN.img2vector('./data/digits/0_0.txt') #print(testVector[0,0:31]) #print(testVector[0,32:63]) # 分类器 + 测试 #kNN.handwritingTest() # png 图片数字识别 trainingMat, hwLabels = kNN.handwritingClassTraining() for i in range(10): filepath = kNN.png2vec("./data/digits/MyDigits/%d.png" % i) vecUnderTest = kNN.img2vector(filepath) res = kNN.classify0(vecUnderTest, trainingMat, hwLabels, 3) print("The handwriting number is: %d (%d for real)" % (res, i))