def datingClassTest(): hoRatio = 0.50 #hold out 10% datingDataMat, datingLabels = kNN.file2matrix( 'datingTestSet.txt') #load data setfrom file normMat, ranges, minVals = kNN.autoNorm(datingDataMat) m = normMat.shape[0] numTestVecs = int(m * hoRatio) # 50% train set,50% test set errorCount = 0.0 errorAns = m for k in range(20): errorCount = 0 for i in range(numTestVecs): classifierResult = kNN.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], k + 1) # print "the classifier came back with: %s, the real answer is: %s" % (classifierResult, datingLabels[i]) if (classifierResult != datingLabels[i]): errorCount += 1.0 print "when k is %d the total error rate is: %f" % (k + 1, ( errorCount / float(numTestVecs))) print errorCount if errorCount <= errorAns: errorAns = errorCount ans = k + 1 print "the best k is", ans
def test_autoNorm(self): fileName = "datingTestSet.txt" datingDataMat, datingLabels = kNN.file2matrix(fileName) print("\n datingDataMat == %s" % (datingDataMat)) normDataSet, ranges, minVals = kNN.autoNorm(datingDataMat) print("\n normDataSet == %s \n ranges == %s \n minVals == %s \n" % (normDataSet, ranges, minVals))
def paintDataSet(): datingDataMat, datingLabels = kNN.file2matrix('datingTestSet.txt') fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2], 20.0*array(datingLabels), 20.0*array(datingLabels)) plt.show()
def test_knn_dating(): #%% 利用knn模块的数据读取函数 x, y = knn.file2matrix('datingTestSet2.txt') norm_x, ranges, minVals = knn.autoNorm(x) norm_x = np.array(norm_x) y = np.array(y) #%% 生成测试数据的比例 train_ratio = 0.9 test_ratio = 0.1 size_data = len(norm_x) indices = np.random.permutation(size_data) x_train = norm_x[indices[:int(train_ratio * size_data)]] y_train = y[indices[:int(train_ratio * size_data)]] x_test = norm_x[indices[int(test_ratio * size_data):]] y_test = y[indices[int(test_ratio * size_data):]] #%% # 设置分类器,并填充训练数据 knn_classfier = neighbors.KNeighborsClassifier() knn_classfier.fit(x_train, y_train) #%% # 按比例进行测试数据 result = knn_classfier.predict(x_test) for i in range(100): if result[i] == y_test[i]: print(True) else: print(False)
def autoNormTest(): returnMat, classLabelVector = kNN.file2matrix( "C:\\Users\yangy\PycharmProjects\MLIA\kNN\datingTestSet2.txt") normMat, ranges, minVals = kNN.autoNorm(returnMat) print('normMat:', normMat) print('ranges:', ranges) print('minVals:', minVals)
def datingClassTest(): """ 约会网站测试 :return: """ # 设置测试数据比例 hoRatio = 0.1 # 从文件中加载数据 datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt') # 归一化数据 normMat, ranges, minVals = kNN.autoNorm(datingDataMat) # m = 数据的行数 即第一维矩阵 m = normMat.shape[0] # 设置测试的样本数量 numTestVecs = int(m * hoRatio) print('numTestVecs = ', numTestVecs) errorCount = 0.0 for i in range(numTestVecs): classifierResult = kNN.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3) print('The classifier came back with %d, the real answer is: %d' % (classifierResult, datingLabels[i])) if classifierResult != datingLabels[i]: errorCount += 1.0 print('The total error rate is %f' % (errorCount / float(numTestVecs))) print(errorCount)
def test2(): datingDataMat,datingLabels = kNN.file2matrix('datingTestSet.txt') print(array(datingLabels)) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datingDataMat[:,1],datingDataMat[:,2]) plt.show()
def draw(): fig = plt.figure() ax = fig.add_subplot(111) mt, l = kNN.file2matrix('datingTestSet2.txt') ax.scatter(mt[:, 0], mt[:, 1], 15.0 * np.array(l), 15.0 * np.array(l)) fig.savefig('0_1.png') return
def matplotlibTest(): returnMat, classLabelVector = kNN.file2matrix( "C:\\Users\yangy\PycharmProjects\MLIA\kNN\datingTestSet2.txt") fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(returnMat[:, 0], returnMat[:, 1], 15.0 * np.array(classLabelVector), 15.0 * np.array(classLabelVector)) plt.show()
def feature_show(): dating_data_mat, dating_labels = file2matrix(DATING_DATA) fig = plt.figure() ax = fig.add_subplot(121) ax.scatter(dating_data_mat[:, 1], dating_data_mat[:, 2], 15.0 * array(dating_labels), 15.0 * array(dating_labels)) bx = fig.add_subplot(122) bx.scatter(dating_data_mat[:, 0], dating_data_mat[:, 1], 15.0 * array(dating_labels), 15.0 * array(dating_labels)) plt.show()
def main3(): ''' 将文本数据读入并绘图 ''' datingDataMat,datingLabels = kNN.file2matrix('datingTestSet2.txt') fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2]) ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2],15.0*array(datingLabels), 15.0*array(datingLabels)) #根据类别绘制不同的散点 plt.show()
def classifyperson(): result = ['not at all', 'small doses', 'large dose'] ffmiles = float(input('frequent filter miles earned per year:')) gametimepercent = float(input('% of time spent on game:')) icecream = float(input('liters of ice cream consumed per year:')) datamat, labels = kNN.file2matrix('datingTestSet.txt') normmat, ranges, minvals = kNN.autoNorm(datamat) inarry = (array([ffmiles, gametimepercent, icecream]) - minvals) / ranges classifyresult = kNN.classify0(inarry, normmat, labels, 3) print("you like this person:", result[classifyresult - 1]) return
def classifyPerson(): resultList = ['not at all', 'in small doses', 'in large doses'] percentTats = float(input("percentage of time spent playing video games?")) ffMiles = float(input("frequent filter miles earned per year")) iceCream = float(input("liters of ice cream consumed per year")) datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt') normMat, ranges, minVals = kNN.autoNorm(datingDataMat) inArr = numpy.array([ffMiles, percentTats, iceCream]) classifierResult = kNN.classify0((inArr - minVals) / ranges, normMat, datingDataMat, 3) print("you will probably like this person: ".resultList[classifierResult - 1])
def classifyPerson(): resultlist=['not at all','in small doss','in large does'] percentTats=float(raw_input("percentage of time spent playing video game?")) ffMiles=float(raw_input("frequent filer miles earned per year?")) icecream=float(raw_input("liters of ice cream consumed per year?")) datingDataMat,datingLabel = kNN.file2matrix('datingTestSet2.txt') normat,rangeval,minval=kNN.autonorm(datingDataMat) print "normat:%s" %(normat) inX=array([ffMiles,percentTats,icecream]) retVal=kNN.classify0((inX - minval)/rangeval,normat,datingLabel,3) print "retval[%d]" %(retVal) print "resutl:%s " %(resultlist[retVal])
def classifyPerson(): resultlist = ['not at all','in small doses','in large doses'] games = float(raw_input( "percentage of time spent playing video games?")) flymiles = float(raw_input( "frequent flier miles earned per year?")) icecream = float(raw_input( "liters of ice cream consumed per year?")) datingdata, datinglabel = kNN.file2matrix('datingTestSet2.txt') normdata, ranges, minv = kNN.autoNorm(datingdata) inarr = array([flymiles, games, icecream]) result = kNN.classify0((inarr - minv)/ranges, normdata, datinglabel, 3) print "you will probably like this person:", resultlist[result-1]
def datingClassTest(): hoRatio = 0.10 datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt') normMat, ranges, minVals = kNN.autoNorm(datingDataMat) m = normMat.shape[0] numTestVecs = int(m*hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3) print "the classifier came back with: %d, the real answer is: %d" % (classifierResult, datingLabels[i]) if classifierResult != datingLabels[i]: errorCount += 1.0 print "the total error rate is: %f" % (errorCount/float(numTestVecs))
def test_matplot(self): fileName = "datingTestSet.txt" datingDataMat, datingLabels = kNN.file2matrix(fileName) # 创建一幅图 fig = plt.figure() ax = fig.add_subplot(111) # 散点图使用datingDataMat矩阵的第1、第2列数据(从0开始),分别表示特征值 # 横轴表示“玩视频游戏所耗时间百分比” # 纵轴表示“每周所消费的冰淇淋公升数” 。 # datingDataMat[:, 1] 表示矩阵中所有行中第一列的数据 ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2], 15.0 * array(datingLabels), 15.0 * array(datingLabels)) plt.show()
def datingClassTest(): hoRatio = 0.550 datingDataMat, datingLabels = kNN.file2matrix('datingTestSet.txt') normMat, ranges, minVals = kNN.autoNorm(datingDataMat) m = normMat.shape[0] numTestVecs = int(m * hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = kNN.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3) print 'the classifier came back with: %s, the real answer is: %s' % (classifierResult, datingLabels[i]) if (classifierResult != datingLabels[i]): errorCount += 1.0 print "the total error rate is: %f" % (errorCount / float(numTestVecs))
def showDatingInput(): # 输入测试数据 resultList = ['not at all', 'in small doses', 'in large doses'] mPercentGame = float( raw_input('the percentange of time spent playing vedio games:')) mPercentMiles = float(raw_input('the miles earned every year:')) mpercentIce = float(raw_input('the ice cream consumed per year:')) testArray = [mPercentMiles, mPercentGame, mpercentIce] mat, labels = kNN.file2matrix('datingTestSet2.txt') normMat, mRange, mMin = kNN.autoNum(mat) ansType = kNN.classify0((testArray - mMin) / mRange, normMat, labels, 5) print 'This guy is mostly', resultList[int(ansType) - 1]
def dating_class_test(): dating_data_mat, dating_labels = file2matrix(DATING_DATA) norm_data, ranges, min_val = auto_norm(dating_data_mat) m = norm_data.shape[0] num_test_vecs = int(m * HO_RATIO) error_count = 0.0 for i in range(num_test_vecs): classifier_res = classify0(norm_data[i, :], norm_data[num_test_vecs:m, :], dating_labels[num_test_vecs:m], K) print('the classifier came back with: %d, the real answer is: %d' % (classifier_res, dating_labels[i])) if (classifier_res != dating_labels[i]): error_count += 1.0 print("the total error rate is: %f" % (error_count / float(num_test_vecs)))
def classifyPerson(): #resultList = ['not at all','in small doses', 'in large doses'] percentTats = float(raw_input(\ "percentage of time spent playing video games?")) ffMiles = float(raw_input("frequent flier miles earned per year?") ) #使用sumlime配置的环境python27无法读取输入数据 iceCream = float(raw_input( "liters of ice cream consumed per year?")) #点击*.py运行即可,程序末尾添加待输入 datingDataMat, datingLabels = kNN.file2matrix('datingTestSet.txt') normMat, ranges, minVals = kNN.autoNorm(datingDataMat) inArr = array([ffMiles, percentTats, iceCream]) classifierResult = kNN.classify0((inArr-\ minVals)/ranges,normMat,datingLabels,3) print "You will probably like this person: ",\ classifierResult
def datingSetTest(horate): datingDataMat,datingLabel=kNN.file2matrix('datingTestSet2.txt') print "data[%d]:%s,\nlabel:%s" %(datingDataMat.shape[0],datingDataMat,datingLabel) datingDataMat,rangeval,minval=kNN.autonorm(datingDataMat) print "data[%d]:%s" %(datingDataMat.shape[0],datingDataMat) m=datingDataMat.shape[0] count=int(m*horate) errcount=0.0 for i in range(1,count): retVal=kNN.classify0(datingDataMat[i,:],datingDataMat[count:m,:],datingLabel[count:m],3) print "orignal:%d,calculate:%d"%(datingLabel[i],retVal) if retVal != datingLabel[i]: errcount+=1.0 print "error." print "error rate:%f" %(errcount/float(count))
def classifyPerson(): """ 根据提示输入数据 :return: """ ll = ["不喜欢的人", "魅力一般的人", "具有魅力的人"] x1 = float(raw_input("玩视频游戏所耗时间的百分比?")) x2 = float(raw_input("每年获得的飞行常客里程数为?")) x3 = float(raw_input("每周消费的冰淇淋的功升数为?")) x, y = kNN.file2matrix("datingTestSet2.txt") normX, rage, minV = kNN.autoNorm(x) inX = (array([x1, x2, x3]) - minV) / rage ret = kNN.classify0(inX, x, y, 3) print "你对的喜欢程度可能是:", ll[int(ret) - 1]
def datingClassTest(): """ 分类器测试:约会分类网站的用户数据分类的测试 :return: """ hoRet = 0.10 x, y = kNN.file2matrix("datingTestSet.txt") normX, rage, minV = kNN.autoNorm(x) m = normX.shape[0] errorCount = 0 testNum = int(hoRet * m) for i in range(testNum): yr = kNN.classify0(normX[i, :], normX[testNum:m, :], y, 3) print "第%d个分类为%s,原来分类为%s" % (i, yr, y[i]) if yr != y[i]: errorCount += 1 print "错误数为:%d,数错误率为:%f%% " % (errorCount, float(errorCount) * 100 / m) return
def showDatingTestData(): """ 测试约会案例,文件数据转换成矩阵数据,使用10%数据作为测试集 :return: """ mRatio = 0.1 mat, labels = kNN.file2matrix('datingTestSet2.txt') normMat, mRange, mMin = kNN.autoNum(mat) # 数据归一化 mCount = mat.shape[0] # 数据行数 mTestCount = int(mRatio * mCount) # 测试集数目 mError = 0 # 错误数 for i in range(mTestCount): mResult = kNN.classify0(normMat[i, :], normMat[mTestCount:mCount, :], labels[mTestCount:mCount], 5) if (mResult != labels[i]): mError += 1 print 'The error rate is: %f' % (mError * 1.0 / mTestCount) print 'The total test count is %d and the error count is %d' % (mTestCount, mError)
def datingClassTest(): #选取10%的数据测试分类器 hoRatio = 0.10 #原始文本转换 datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt') #归一化 normMat, ranges, minVals = autoNorm(datingDataMat) #设置测试个数 m = normMat.shape[0] numTestVecs = int(m * hoRatio) errorCount = 0.0 for i in range(numTestVecs): #分类算法 classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],\ datingLabels[numTestVecs:m],3) print ("the classifier came back with: %d, the real answer is: %d"\ % (classifierResult , datingLabels[i])) if (classifierResult != datingLabels[i]): errorCount += 1.0 print("the total error rate is:%f" % (errorCount / float(numTestVecs)))
import os import kNN CURRENT_DIR = os.path.dirname(__file__) groups, labels = kNN.createDataset() print kNN.classify0([0,0,0],groups,labels,3) dataSetFile = os.path.join(CURRENT_DIR + '/datingTestSet.txt') datingDataMat,datingLabels = kNN.file2matrix(dataSetFile) print kNN.classify0([40920, 8.326976, 0.953952],groups,labels,3)
def main4(): datingDataMat,datingLabels = kNN.file2matrix('datingTestSet2.txt') # 获得归一化参数 normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
def file2matrixTest(): returnMat, classLabelVector = kNN.file2matrix( "C:\\Users\yangy\PycharmProjects\MLIA\kNN\datingTestSet2.txt") print('returnMat:', returnMat) print('classLabelVector:', classLabelVector)
''' Created on Jul 26, 2015 @author: selaselah ''' import numpy as np import kNN import matplotlib import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) data,labels = kNN.file2matrix('datingTestSet.txt') #ax.scatter(data[:,1], data[:,2]) ax.scatter(data[:,1], data[:,2], 15.0*array(labels), 15.0*array(labels)) ax.axis([-2,25,-0.2,2.0]) plt.xlabel('Percentage of Time Spent Playing Video Games') plt.ylabel('Liters of Ice Cream Consumed Per Week') plt.show()
from numpy import * import kNN as knnnn array1 = array([[1, 2], [3, 4], [5, 6]]) knnnn.file2matrix( '/Users/Colin_Zhang/Desktop/machinelearninginaction/Ch02/datingTestSet2.txt' )
import numpy import kNN import matplotlib import matplotlib.pyplot as plt fig = plt.figure() ax1 = fig.add_subplot(311) datingDataMat, datingLabels = kNN.file2matrix('f:\\datingTestSet.txt') ax1.scatter(datingDataMat[:, 0], datingDataMat[:, 1], 15.0 * numpy.array(datingLabels), 15.0 * numpy.array(datingLabels)) ax1.set_xlabel('fly') ax2 = fig.add_subplot(312) ax2.scatter(datingDataMat[:, 0], datingDataMat[:, 2], 15.0 * numpy.array(datingLabels), 15.0 * numpy.array(datingLabels)) ax2 = fig.add_subplot(313) ax2.scatter(datingDataMat[:, 1], datingDataMat[:, 2], 15.0 * numpy.array(datingLabels), 15.0 * numpy.array(datingLabels)) plt.show()
import matplotlib import matplotlib.pyplot as plt import kNN from numpy import * datingDataMat,datingLabels=kNN.file2matrix('E:/Personal/BOOK/机器学习/机器学习实战源代码/machinelearninginaction/Ch02/datingTestSet2.txt') fig=plt.figure() ax=fig.add_subplot(111) ax.scatter(datingDataMat[:,1],datingDataMat[:,2],15.0*array(datingLabels),15.0*array(datingLabels)) #plt.show() #print(datingDataMat) norMat,ranges,minVals=kNN.autoNorm(datingDataMat) #print(norMat) kNN.datingClassTest()
# -*- coding: utf-8 -*- """ Created on Thu Jan 18 21:20:35 2018 @author: ldz """ # ============================================================================= '''testDatingClassifier''' # ============================================================================= from kNN import file2matrix, autoNorm, classify0 hoRatio = 0.10 #hold out 10% k = 3 datingDataMat, datingLabels = file2matrix( 'datingTestSet2.txt') #load data setfrom file normMat, ranges, minVals = autoNorm(datingDataMat) m = normMat.shape[0] numTestVecs = int(m * hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], k) print "the classifier came back with: %d, the real answer is: %d" % ( classifierResult, datingLabels[i]) if (classifierResult != datingLabels[i]): errorCount += 1.0 print("the total error rate is: %f" % (errorCount / float(numTestVecs))) print("number of error:" + str(errorCount)) print("number of test:" + str(numTestVecs))
''' File Name: main Description: 主函数,主要调用kNN.py中的函数 Author: jwj Date: 2018/1/18 ''' __author__ = 'jwj' import kNN if __name__ == '__main__': group, labels = kNN.createDataSet() label = kNN.classify([0, 0], group, labels, 3) print(label) dataArray, dataLabels = kNN.file2matrix("datingTestSet2.txt") kNN.autoNorm(dataArray) normMat, ranges, minVals = kNN.autoNorm(dataArray) # print(normMat) # kNN.dataClassTest() # kNN.classifyPerson() kNN.handwritingClassTest()
import sys import kNN from numpy import * import matplotlib import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) mat = kNN.file2matrix('123.txt', 2) '''ax.scatter(mat[:,0], mat[:,1], 2, color ='blue') ''' ax.scatter(mat[:,1], mat[:,0], 2, color ='blue') plt.show()
#Testing with new points testvector = [.2, .2] #first point answer = kNN.classify0(testvector,group, labels, 3) #classify the first point # type "print answer" to see result ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' ) #plot first point #second point - created, classified and plotted testvector = [.5, .5] answer = kNN.classify0(testvector,group, labels, 3) ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' ) #third point - created, classified and plotted testvector = [.75, .75] answer = kNN.classify0(testvector,group, labels, 3) ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' ) '''Perform K-Nearest Neighbor classification on the datingTestSet2 data set. Do not forget to include the data set in the working directory''' datingDataMat,datingLabels = kNN.file2matrix('datingTestSet2.txt') # Load data values and labels from the datingTestSet2.txt datingLabelArray = np.array(datingLabels) colormap2 = { 1:'red', 2:'blue', 3:'green' } #Define color map with 3 colors ColoredDatingLabel = [] for things in datingLabelArray: #Get a vector representing the colors ColoredDatingLabel.append(colormap2[things]) #for each data item ax2 = FigDating.add_subplot(312, xlim=(0,100000), ylim=(0,25)) #create second sub plot ax2.scatter(datingDataMat[:,0], datingDataMat[:,1], s= 20, c= ColoredDatingLabel, marker = 'o' ) #Plot a scatter diagram for the data loaded normMat, ranges, minVals = kNN.autoNorm(datingDataMat) #normalize the data ax3 = FigDating.add_subplot(313, xlim=(0,1), ylim=(0,1)) #create third sub plot ax3.scatter(normMat[:,0], normMat[:,1], s = 20, c= ColoredDatingLabel, marker = 'o' ) #Plot normalized data
import sys import kNN from pylab import * from numpy import * import numpy as np import matplotlib from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt mat, lab = kNN.file2matrix('datingTestSet2.txt') normMat, ranges, minVals = kNN.autoNorm(mat) def randrange(n, vmin, vmax): return (vmax - vmin) * np.random.rand(n) + vmin fig = plt.figure() ax = fig.add_subplot(111, projection='3d') #ax.scatter(normMat[:,0], normMat[:,1], normMat[:,2], 'o', 'c') n = 1 for c, m, zl, zh in [('r', 'o', -50, -25), ('b', '^', -30, -5)]: xs = randrange(n, 23, 32) ys = randrange(n, 0, 100) zs = randrange(n, zl, zh) ClassSet = lab colorSet = [] for label in ClassSet: if label is '1': colorSet.append('r')
import kNN as KNN group, labels = KNN.createDataSet() clas = KNN.classify0([0, 0], group, labels, 3) """ output: B """ """ this file has 3 columns ■ Number of frequent flyer miles earned per year ■ Percentage of time spent playing video games ■ Liters of ice cream consumed per week """ datingDataMat, datingLabels = KNN.file2matrix('data/datingTestSet.txt') """ output: >>> datingDataMat array([[ 7.29170000e+04, 7.10627300e+00, 2.23600000e-01], [ 1.42830000e+04, 2.44186700e+00, 1.90838000e-01], [ 7.34750000e+04, 8.31018900e+00, 8.52795000e-01], ..., [ 1.24290000e+04, 4.43233100e+00, 9.24649000e-01], [ 2.52880000e+04, 1.31899030e+01, 1.05013800e+00], [ 4.91800000e+03, 3.01112400e+00, 1.90663000e-01]]) >>> datingLabels[0:20] ['didntLike', 'smallDoses', 'didntLike', 'largeDoses', 'smallDoses', 'smallDoses', 'didntLike', 'smallDoses', 'didntLike', 'didntLike', 'largeDoses', 'largeDose s', 'largeDoses', 'didntLike', 'didntLike', 'smallDoses', 'smallDoses', 'didntLike', 'smallDoses', 'didntLike'] """
import kNN import matplotlib import matplotlib.pyplot as plt import numpy as np # easy try group, labels = kNN.createDataSet() predict = kNN.classify0([0, 0], group, labels, 3) print(predict) # load data datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt') print(datingDataMat) print(datingLabels[0:20]) # scatter plot fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datingDataMat[:, 0], datingDataMat[:, 1], 15.0 * np.array(datingLabels), 15.0 * np.array(datingLabels)) plt.show() # normalization normMat, ranges, minVals = kNN.autoNorm(datingDataMat) print(normMat) print(ranges) print(minVals) # test error rate kNN.datingClassTest()
if node.right_child != None: self.search(item, node.right_child, nodeList, k) return if __name__ == '__main__': dataList = np.array([[2, 4], [5, 1], [3, 6], [7, 3], [6, 4.3], [2, 1], [1, 7]]) labelList = np.array([0, 0, 0, 0, 1, 1, 1]) kdRoot = kdTree_heap(dataList, labelList) print(kdRoot.transfer_dict(kdRoot.root)) print(kdRoot.length) label, nodeList = kdRoot.knn_algo([6, 3.8], k=3) print(nodeList) datingDataMat, datingLabels = file2matrix('./data/datingTestSet.txt') from sklearn import preprocessing datingDataMat = preprocessing.MinMaxScaler().fit_transform(datingDataMat) numTest = int(0.1 * datingDataMat.shape[0]) kdRoot = kdTree_heap(datingDataMat[numTest:], datingLabels[numTest:]) errorCount = 0 for i in range(numTest): classifierResult, nodeList = kdRoot.knn_algo(datingDataMat[i], k=3) print('the classifier came back with : {}, the real answer is : {}'. format(classifierResult, datingLabels[i])) if classifierResult != datingLabels[i]: errorCount += 1.0 print(errorCount)
import kNN import matplotlib import matplotlib.pyplot as plt from numpy import * from pylab import * data, label = kNN.file2matrix('datingTestSet2.txt') fig = plt.figure(1) ax = fig.add_subplot(211) ax.scatter(data[:, 0], data[:, 1], 15 * array(label), 15 * array(label)) xlabel('fly km') ylabel('play game') #fig=plt.figure(1) ax = fig.add_subplot(212) ax.scatter(data[:, 1], data[:, 2], 15 * array(label), 15 * array(label)) xlabel('play game') ylabel('consume') plt.show()
import sys import kNN from pylab import * from numpy import * import numpy as np import matplotlib from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt mat,lab = kNN.file2matrix('datingTestSet2.txt') normMat, ranges, minVals = kNN.autoNorm(mat) def randrange(n, vmin, vmax): return (vmax - vmin)*np.random.rand(n) + vmin fig = plt.figure() ax = fig.add_subplot(111,projection='3d') #ax.scatter(normMat[:,0], normMat[:,1], normMat[:,2], 'o', 'c') n = 1 for c, m, zl, zh in [('r', 'o', -50, -25), ('b', '^', -30, -5)]: xs = randrange(n, 23, 32) ys = randrange(n, 0, 100) zs = randrange(n, zl, zh) ClassSet=lab colorSet = [] for label in ClassSet: if label is '1': colorSet.append('r') elif label is '2': colorSet.append('b') elif label is '3':
''' @author: laiwei ''' import kNN from numpy import * #group, labels = kNN.createDataSet() #print (group, labels) #print (kNN.classify0([0, 0], group, labels, 3)) #print (kNN.classify0([0.7, 0.8], group, labels, 3)) datingMat, datingLabels = kNN.file2matrix("datingTestSet2.txt") #print(datingMat) #print(datingMat[:,0]) #print(datingLabels) import matplotlib import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) #ax.scatter(datingMat[:,1], datingMat[:,2]) ax.scatter(datingMat[:, 0], datingMat[:, 1], 15.0 * array(datingLabels), 15.0 * array(datingLabels)) plt.show() kNN.datingClassTest() # kNN.handwritingClassTest()
import sys import kNN from numpy import * import matplotlib import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) mat, lab = kNN.file2matrix("datingTestSet2.txt") ax.scatter(mat[:, 1], mat[:, 2], 15.0 * array(map(int, lab)), 15.0 * array(map(int, lab))) plt.show()
# encoding: utf-8 from numpy import * import kNN import matplotlib import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) datingDataMat, datingLabels = kNN.file2matrix('datingTestSet.txt') ## 由于每个指标的范围不一致, 我们这里需要进行归一化特征值 normMat, ranges, minVals = kNN.autoNorm(datingDataMat) ax.scatter(normMat[:, 1], normMat[:,2]) # 添加坐标轴的labels plt.xlabel('Percentage of Time Spent Playing Video Games') plt.ylabel('Liters of Ice Cream Consumed Per Week') plt.show()
import kNN import matplotlib import matplotlib.pyplot as plt from numpy import all import operator from array import array datingDataMat,datingLabels = kNN.file2matrix("test.txt") fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datingDataMat[:,1], datingDataMat[:,2]) plt.show()
import kNN import numpy import matplotlib import matplotlib.pyplot as plt print kNN.classify0([0, 0], numpy.array([[1, 0], [2, 1]]), ['A', 'B'], 1) datingDataMat, datingLabels = kNN.file2matrix('test2.txt') # datingDataMat = numpy.zeros((3,3)) # datingDataMat[2,:] = [2,1,0] # print datingDataMat ,datingDataMat[:,2] print datingLabels print 15.0*numpy.array(datingLabels) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2],15.0*numpy.array(datingLabels),15.0*numpy.array(datingLabels)) plt.show() # ax.scatter([2,3,1],[3,1,2])
# -*- coding: utf-8 -*- ''' Created on 2015年9月29日 @author: rains ''' import matplotlib.pyplot as plt import numpy as np import os import kNN curdir='f:\\project\\python\\machine-learning-in-action/Ch02' mat1,fab1 = kNN.file2matrix(curdir+"/datingTestSet.txt") #查看训练集 # fig=plt.figure() # ax = fig.add_subplot(111) # ax.scatter(mat1[:,0],mat1[:,1],15.0*np.array(fab1),15.0*np.array(fab1)) # plt.show() #测试归一化 mat1 = kNN.autoNorm(mat1) #测试简单的分类器准确率 kNN.datingClassTest() #手写数字识别 #kNN.handwritingClassTest()