def train(): '''对knn进行训练''' datingDataMat, datingLables = knn.file2matrix('datingTestSet2.txt') normMat, rangeVals, minVals = knn.autoNorm(datingDataMat) print normMat print datingLables # knn.plotData(datingDataMat, datingLables) # 用作交叉验证集的数量百分比 hoRatio = 0.10 # 数据集的总数量 m = normMat.shape[0] # 测试集 numTestVecs = int(m * hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = knn.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLables[numTestVecs:m], 3) print '分类器返回: %d, 实际的结果是:%d' % (classifierResult, datingLables[i]) if classifierResult != datingLables[i]: errorCount += 1.0 print '错误率是: %f' % (errorCount / (float(numTestVecs)))
def predict(): resultList = ['一点也不喜欢', '有点喜欢', '非常喜欢'] percentTats = float(raw_input('玩游戏的时间是: ')) ffMiles = float(raw_input('每年的飞行公里数: ')) iceCream = float(raw_input('每年消耗的冰淇淋: ')) datingDataMat, datingLabels = knn.file2matrix('datingTestSet2.txt') normMat, ranges, minVals = knn.autoNorm(datingDataMat) inArr = np.array([ffMiles, percentTats, iceCream]) # 对输入数据的正规化处理 inArrNorm = (inArr - minVals) / ranges classifierResult = knn.classify0(inArrNorm, normMat, datingLabels, 3) print '预测你可能喜欢这个人的程度:', resultList[classifierResult - 1]
def predict(): resultList = ['一点也不喜欢', '有点喜欢', '非常喜欢'] percentTats = float(raw_input('玩游戏的时间是: ')) ffMiles = float(raw_input('每年的飞行公里数: ')) iceCream = float(raw_input('每年消耗的冰淇淋: ')) datingDataMat, datingLabels = knn.file2matrix('datingTestSet2.txt') normMat, ranges, minVals = knn.autoNorm(datingDataMat) inArr = np.array([ffMiles, percentTats, iceCream]) # 对输入数据的正规化处理 inArrNorm = (inArr - minVals) / ranges classifierResult = knn.classify0(inArrNorm, normMat, datingLabels, 3) print '预测你可能喜欢这个人的程度:', resultList[classifierResult-1]
def datingClassTest(): hoRatio = 0.10 datingDataMat, datingLabels = knn.file2matix( '/home/matija/Projects/personal_projects/show-me-the-code/data-science/CollectiveIntelligence/dataSets/datingTestSet1.txt' ) normMat, ranges, minVals = knn.autoNorm(datingDataMat) m = normMat.shape[0] #vectors to test knn clf numTestVecs = int(m * hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = knn.classify0(normMat[i,:], normMat[numTestVecs:m,:]\ ,datingLabels, 3) print "the classifier came back with: %d, the real answer is: %d"\ %(classifierResult, datingLabels[i]) if (classifierResult != datingLabels[i]): errorCount += 1.0 print "the total error rate is: %f" % (errorCount / float(numTestVecs))
def train(): '''对knn进行训练''' datingDataMat, datingLables = knn.file2matrix('datingTestSet2.txt') normMat, rangeVals, minVals = knn.autoNorm(datingDataMat) print normMat print datingLables # knn.plotData(datingDataMat, datingLables) # 用作交叉验证集的数量百分比 hoRatio = 0.10 # 数据集的总数量 m = normMat.shape[0] # 测试集 numTestVecs = int(m*hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = knn.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLables[numTestVecs:m], 3) print '分类器返回: %d, 实际的结果是:%d' % (classifierResult, datingLables[i]) if classifierResult != datingLables[i]: errorCount += 1.0 print '错误率是: %f' % (errorCount / (float(numTestVecs)))
datingDataMat, datingLabels = knn.file2matrix('datingTestSet.txt') print(datingDataMat) print(datingLabels) # 显示 fig = plt.figure() ax = fig.add_subplot(111) # 看不到任何有用的模式信息 ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2]) # 标注上色彩 ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2], 15.0 * array(datingLabels), 15.0 * array(datingLabels)) plt.show() plt.close() # 归一化数据 normMat, ranges, minVals = knn.autoNorm(datingDataMat) print('norm mat:') print(normMat) print('range:') print(ranges) print('norm mat:') print(minVals) # 测试分类器,使用数据集前hoRatio比例做测试集 hoRatio = 0.10 m = normMat.shape[0] numTestVecs = int(m * hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = knn.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3) print("the classifier came back with: %d, the real answer is: %d" % (classifierResult, datingLabels[i]))
# -*- coding: utf-8 -*- """ Created on Fri Sep 21 10:38:54 2018 @author: fsxn2 """ import knn import matplotlib import matplotlib.pyplot as plt #group,labels=knn.createDataSet() #print(knn.classify0([0,0],group,labels,3)) group, labels = knn.file2matrix("input.txt") auto, ranges, minval = knn.autoNorm(group) print(auto) print(ranges) print(minval) #fig=plt.figure() #ax=fig.add_subplot(111) #ax.scatter(group[:,1],group[:2]) #plt.show() print(knn.classify0([1, 0, 3], group, labels, 3))