def train():
    '''对knn进行训练'''

    datingDataMat, datingLables = knn.file2matrix('datingTestSet2.txt')

    normMat, rangeVals, minVals = knn.autoNorm(datingDataMat)

    print normMat
    print datingLables

    # knn.plotData(datingDataMat, datingLables)

    # 用作交叉验证集的数量百分比
    hoRatio = 0.10
    # 数据集的总数量
    m = normMat.shape[0]

    # 测试集
    numTestVecs = int(m * hoRatio)

    errorCount = 0.0

    for i in range(numTestVecs):
        classifierResult = knn.classify0(normMat[i, :],
                                         normMat[numTestVecs:m, :],
                                         datingLables[numTestVecs:m], 3)

        print '分类器返回: %d, 实际的结果是:%d' % (classifierResult, datingLables[i])

        if classifierResult != datingLables[i]:
            errorCount += 1.0

    print '错误率是: %f' % (errorCount / (float(numTestVecs)))
示例#2
0
def digit_class_test():
	'''test the accuracy of the classifier'''
	digits_labels = []
	train_list = listdir('trainingDigits')
	number_samples = len(train_list)

	#get training matrix and training labels
	train_matrix = zeros((number_samples, 1024))
	for i in range(number_samples):
		filename_str = train_list[i]
		file_str = filename_str.split('.')[0]
		class_number_str = int(file_str.split('_')[0])
		digits_labels.append(class_number_str)
		train_matrix[i, :] = image2vector('trainingDigits/%s' % filename_str)

	test_list = listdir('testDigits')
	error_count = 0
	number_test = len(test_list)

	for i in range(number_test):
		filename_str = test_list[i]
		file_str = filename_str.split('.')[0]
		class_number_str = int(file_str.split('_')[0])
		vector_test = image2vector('testDigits/%s' % filename_str)

		classifier_result = knn.classify0(vector_test, train_matrix, 
			digits_labels, 3)
		if(classifier_result != class_number_str):
			error_count += 1

	error_rate = float(error_count) / float(number_test)
	print("Error rate is: " + str(error_rate))
示例#3
0
def handwritingClassTest(k):
    hwLabels = []
    trainingFileList = os.listdir('trainingDigits')
    m = len(trainingFileList)
    trainingMat = zeros((m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        classNumStr = int(fileNameStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i, :] = img2vector('trainingDigits/%s' % fileNameStr)
    testFileList = os.listdir('testDigits')
    errorcount = 0.0
    mTest = len(testFileList)
    for j in range(mTest):
        fileNameStr = testFileList[j]
        classNumStr = int(fileNameStr.split('_')[0])
        vectorUnderTest = img2vector('testDigits/%s' % fileNameStr)
        classifierResult = knn.classify0(vectorUnderTest, trainingMat,
                                         hwLabels, k)
        #print('the classifier came back with: %d, the real number is %d' % (classifierResult, classNumStr))
        if classifierResult != classNumStr:
            errorcount += 1.0
    #print('\nthe total number of errors is: %s' % int(errorcount))
    #print('\nthe total error rate is: %s' % float(errorcount/mTest))
    return float(errorcount / mTest)
def train(trainImagePath, testImagePath):
    hwLabels = []
    trainingFileList = os.listdir(trainImagePath)

    m = len(trainingFileList)

    trainningMat = np.zeros((m, 1024))

    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainningMat[i, :] = imageTool.img2vector('%s/%s' % (trainImagePath,fileNameStr))

    testFileList = os.listdir(testImagePath)
    errorCount = 0.0

    mTest = len(testFileList)

    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = imageTool.img2vector('%s/%s' % (testImagePath,fileNameStr))
        classifierResult = knn.classify0(vectorUnderTest, trainningMat, hwLabels, 3)

        print '分类器返回的数字是:%d, 实际的数字是:%d' %(classifierResult, classNumStr)

        if (classifierResult != classNumStr):
            errorCount += 1.

    print '总的错误数: %d' % errorCount
    print '错误率: %f' % (errorCount / float(mTest))
示例#5
0
def handwritingClassTest():
    hwLabels = []
    traingingFileList = os.listdir('trainingDigits')
    m = len(traingingFileList)
    trainingMat = np.zeros((m, 1024))
    for i in range(m):
        fileNameStr = traingingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i, :] = img2vect('trainingDigits/%s' % fileNameStr)
    testFileList = os.listdir("testDigits")
    errCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileNameStr.split('_')[0])
        vectorUnderTest = img2vect('testDigits/%s' % fileNameStr)
        classifierResult = knn.classify0(vectorUnderTest, trainingMat,
                                         hwLabels, 3)
        if classifierResult != classNumStr:
            errCount = errCount + 1
        print("calc is %d, real answer is %s" %
              (classifierResult, classNumStr))
    print("error rate is %f", errCount / float(mTest))
    print("error num is %d" % errCount)
示例#6
0
def handwritingClassTest():
    hwLabels = []
    trainingFileSet = os.listdir('trainingDigits')
    m = len(trainingFileSet)
    trainingMat = zeros((m,1024))
    for i in range(m):
        fileNameStr = trainingFileSet[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i,] = img2vector('trainingDigits/%s'%fileNameStr)
    testFileList = os.listdir('testDigits')
    errorCount =0.0
    mTest = len(testFileList[i])
    fileStr = fileNameStr.split('.')[0]
    classNumStr = fileNameStr.split('_')[0]
    vectorUnderTest = img2vector('testDigits/%s'%fileNameStr)
    classifierResult = classify0(vectorUnderTest,trainingMat,hwLabels,3)
    print "the classifier came back with:%d, the real answer is:%d"%(classifierResult,classNumStr)
    if(classifierResult!=classNumStr):
        errorCount+=1.0
    print "\nThe total number of error is:%d"%errorCount
    print "\nthe Total error rate is:%f"(errorCount/float(mTest))
    
        
示例#7
0
文件: knn_test.py 项目: z8g/common
    def test_main_hand_writing(self):
        labels = []
        
        train_files = os.listdir('digits/train')
        train_size = len(train_files)
        train_matrix = numpy.zeros((train_size, 1024))
        for i in range(train_size):
            file_name = train_files[i]  # 0_22.txt
            labels.append(get_label(file_name))
            train_matrix[i,:] = knn.read_vector('digits/train/%s' % file_name)

        test_files = os.listdir('digits/test')
        test_size = len(test_files)
        err_count = 0.0
        for i in range(test_size):
            file_name = test_files[i]  # 0_22.txt
            real_label = get_label(file_name)
            test_vector = knn.read_vector('digits/test/%s' % file_name)
            classifier_result = knn.classify0(test_vector,
                                              train_matrix,
                                              labels,
                                              3)
            print "predict: %s real: %s" % (classifier_result, real_label)
            if classifier_result != real_label:
                err_count += 1.0
            
        err_rate = err_count / float(test_size)
        print 'total: %d error: %d rate: %f' % (test_size, err_count, err_rate)
示例#8
0
def choose_action_by_knn(db, errorType, k):
    time = data_select(db, actionTriggerResTb,
                       whatSelect='min(BuildTime)')[0][0]
    # in inX choose success and min time action for the error
    inX = array([errorType, 1, time])
    res = data_select(db, actionTriggerResTb)
    dataSet, labels = knn.matrix_from_mysql(res)
    actionID = knn.classify0(inX, dataSet, labels, k)
    return actionID
def classfiy_person() :
    result_list = ['not at all','in small doses','in large doses']
    percent_tats= float(input("percentage of time spent playing video games ?"))
    ffmiles = float(input("frequent flier miles earned per year ?"))
    ice_cream = float(input("liter of ice cream consumed per year ?"))
    dating_mat, dating_labels = knn.file2matrix('./knn/datingTestSet2.txt')
    normal_mat, ranges, min_values = knn.auto_normal(dating_mat)
    inArr = array([ffmiles, percent_tats, ice_cream])
    classfiy_result = knn.classify0((inArr - min_values) / ranges, normal_mat, dating_labels, 3)
    print("You will probably like this person: ", result_list[classfiy_result - 1], "(" + str(classfiy_result) + ")")
示例#10
0
def begin():
    resultList = ['not at all', 'a litte like', 'like very much']
    a = float(raw_input("percent"))
    b = float(raw_input("miles"))
    c = float(raw_input("liter"))
    inX = array([a, b, c])
    datingDataMat, labels = file2matrix("./datingTestSet.txt")
    normData, range, min = autoNorm(datingDataMat)
    classRes = knn.classify0((inX - min) / range, normData, labels, 3)
    return resultList[classRes - 1]
示例#11
0
def begin():
    resultList = ['not at all', 'a litte like', 'like very much']
    a = float(raw_input("percent"))
    b = float(raw_input("miles"))
    c = float(raw_input("liter"))
    inX = array([a, b, c])
    datingDataMat, labels = file2matrix("./datingTestSet.txt")
    normData, range, min = autoNorm(datingDataMat)
    classRes = knn.classify0((inX-min)/range, normData, labels, 3)
    return resultList[classRes-1]
def classifiy_moives() :
    # 四组二维特征
    group = np.array([[1, 101], [5, 89], [108, 5], [115, 8]])
    # 四组特征的标签
    labels = ['爱情片', '爱情片', '动作片', '动作片']
    # 测试集
    test = [101, 20]
    # kNN分类
    test_class = knn.classify0(test, group, labels, 3)
    # 打印分类结果
    print(test_class)
示例#13
0
def classifiPerson():
    resultList = [u'完全不会', u'小概率', u'大概率']
    percentTats = float(raw_input("percent of time spent playing video game?"))
    ffMiles = float(raw_input("frequent flier miles earned per year?"))
    iceCream = float(raw_input("liters of ice cream consumed per year?"))
    datingDataMat, datingLabels = file2matrix('datingTestSet2.txt')
    normMat, ranges, minVals = autoNorm(datingDataMat)
    inArr = array([percentTats, ffMiles, iceCream])
    classifierResult = knn.classify0([inArr - minVals] / ranges, normMat,
                                     datingLabels, 3)
    print "you will propabably like this persion: ", resultList[
        classifierResult - 1]
def test() :
    group, labels = knn.create_dataset()
    print(group)
    print(labels)
    sort = knn.classify0([0,0],group,labels,3)
    print("distance is %s !"%(sort))
    ## pycharm 中的相对路径不一样,需要在 Run -> Edit Configurations 中查看 Working dorectory
    #dating_mat, dating_labels = knn.file2matrix('./knn/datingTestSet.txt')
    dating_mat, dating_labels = knn.file2matrix('./knn/datingTestSet2.txt')
    print(dating_mat)
    print(dating_labels)
    knn.show_plt(dating_mat,dating_labels)
示例#15
0
def datingClassTest():
    rating = 0.1
    datingDataMat, labels = file2matrix("./datingTestSet.txt")
    normDataSet, ranges, minVals = autoNorm(datingDataMat)
    m = normDataSet.shape[0]
    testNum = int(m * rating)
    err = 0.0
    for i in range(testNum):
        index = knn.classify0(normDataSet[i, :], normDataSet, labels, 3)
        print "predict:"+str(index)+" real:"+str(labels[i])
        if (index != labels[i]):
            err += 1.0
    print str(err)+":"+str(m)
示例#16
0
def datingClassTest():
    rating = 0.1
    datingDataMat, labels = file2matrix("./datingTestSet.txt")
    normDataSet, ranges, minVals = autoNorm(datingDataMat)
    m = normDataSet.shape[0]
    testNum = int(m * rating)
    err = 0.0
    for i in range(testNum):
        index = knn.classify0(normDataSet[i, :], normDataSet, labels, 3)
        print "predict:" + str(index) + " real:" + str(labels[i])
        if (index != labels[i]):
            err += 1.0
    print str(err) + ":" + str(m)
示例#17
0
def main():
    print 'Main Begin******************'

    group, labels = knn.createDataSet()

    print group, '\n', labels

    predict = [1, 0.9]
    label = knn.classify0(predict, group, labels, 3)

    print predict, ' lable is: ', label

    cp.predict()

    print 'Main End********************'
示例#18
0
def datingClassTest():
    hoRatio = .1
    datingDataMat, datingLabels = file2matrix('datingTestSet2.txt')
    normMat, ranges, minVals = autoNorm(datingDataMat)
    m = normMat.shape[0]  # 值为1000行
    numTestVecs = int(m * hoRatio)  #取前100行为测试数据
    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = knn.classify0(normMat[i, :],
                                         normMat[numTestVecs:m, :],
                                         datingLabels[numTestVecs:m], 3)
        print "line is %d,classifier came back with: %d,the real answer is %d" % (
            i, classifierResult, datingLabels[i])
        if (classifierResult != datingLabels[i]): errorCount += 1.
    print "total error rate is : %f" % (errorCount / float(numTestVecs))
def predict():
    resultList = ['一点也不喜欢', '有点喜欢', '非常喜欢']
    percentTats = float(raw_input('玩游戏的时间是: '))
    ffMiles = float(raw_input('每年的飞行公里数: '))
    iceCream = float(raw_input('每年消耗的冰淇淋: '))

    datingDataMat, datingLabels = knn.file2matrix('datingTestSet2.txt')
    normMat, ranges, minVals = knn.autoNorm(datingDataMat)

    inArr = np.array([ffMiles, percentTats, iceCream])

    # 对输入数据的正规化处理
    inArrNorm = (inArr - minVals) / ranges

    classifierResult = knn.classify0(inArrNorm, normMat, datingLabels, 3)

    print '预测你可能喜欢这个人的程度:', resultList[classifierResult - 1]
def predict():
    resultList = ['一点也不喜欢', '有点喜欢', '非常喜欢']
    percentTats = float(raw_input('玩游戏的时间是: '))
    ffMiles = float(raw_input('每年的飞行公里数: '))
    iceCream = float(raw_input('每年消耗的冰淇淋: '))

    datingDataMat, datingLabels = knn.file2matrix('datingTestSet2.txt')
    normMat, ranges, minVals = knn.autoNorm(datingDataMat)

    inArr = np.array([ffMiles, percentTats, iceCream])

    # 对输入数据的正规化处理
    inArrNorm = (inArr - minVals) / ranges

    classifierResult = knn.classify0(inArrNorm, normMat, datingLabels, 3)

    print '预测你可能喜欢这个人的程度:', resultList[classifierResult-1]
示例#21
0
文件: knn_test.py 项目: z8g/common
    def test_main_dating(self):
        test_ratio = 0.50
        dataset_matrix, labels = knn.read_matrix('dating/dataset.txt')
        norm_matrix, ranges, min_value = knn.auto_norm(dataset_matrix)
        size = norm_matrix.shape[0]
        test_num = int(size * test_ratio)
        err_count = 0.0

        for i in range(test_num):
            classifier_result = knn.classify0(norm_matrix[i, :],
                                              norm_matrix[test_num:size,:],
                                              labels[test_num:size],
                                              3)
            print "predict: %d real: %d" % (classifier_result, labels[i])
            if classifier_result != labels[i]:
                err_count += 1.0
                
        err_rate = err_count / float(test_num)
        print 'total: %d error: %d rate: %f' % (test_num, err_count, err_rate)
def datingClassTest():
    hoRatio = 0.10
    datingDataMat, datingLabels = knn.file2matix(
        '/home/matija/Projects/personal_projects/show-me-the-code/data-science/CollectiveIntelligence/dataSets/datingTestSet1.txt'
    )
    normMat, ranges, minVals = knn.autoNorm(datingDataMat)
    m = normMat.shape[0]
    #vectors to test knn clf
    numTestVecs = int(m * hoRatio)
    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = knn.classify0(normMat[i,:], normMat[numTestVecs:m,:]\
                                         ,datingLabels, 3)
        print "the classifier came back with: %d, the real answer is: %d"\
                    %(classifierResult, datingLabels[i])

        if (classifierResult != datingLabels[i]): errorCount += 1.0

    print "the total error rate is: %f" % (errorCount / float(numTestVecs))
示例#23
0
def count(disc_set, W_LDA, train_final, label, test_NUM_IN):
    #####准确率统计代码LDA#######
    # newImg = loadImageSet_many()
    print 'test_num is ' + str(test_NUM_IN)
    new = loadImageSet_many(test_NUM_IN)
    # newImg = LBP.LBP(rows,cols,new.T).T
    newImg = new
    newImg_pro = disc_set.T * newImg.T
    newImg_final = W_LDA.T * newImg_pro
    i = 0
    suM = 0
    while (i < new_test_tol):
        Class = knn.classify0(newImg_final.T[i], train_final.T, label.T, 7)
        # print Class
        if (Class == test_NUM_IN):
            suM = suM + 1
        i = i + 1
    statistic = float(suM) / new_test_tol
    print 'test_NUM_IN=' + str(test_NUM_IN) + ' ' + str(statistic)
    return statistic
示例#24
0
def error_prediction_by_knn(db, k):
    try:
        job_id_for_predict = data_select(db, jobNameTb, 'JobName',
                                         conf.jobName, 'id')[0][0]
    except:
        job_id_for_predict = data_select(db, jobNameTb,
                                         whatSelect='max(id)')[0][0] + 1
    inX = array([job_id_for_predict, conf.recentSuccessRate, conf.buildTime],
                dtype='float64')
    res = data_select(db, errorFeatureTb)
    dataSet, labels = knn.matrix_from_mysql(res)
    if len(dataSet) < k:
        print "There is not enough data to predict."
        return
    knn.plot_save(dataSet, labels, 'dataPlot.png', show=True)
    error_predict = knn.classify0(inX, dataSet, labels, k)
    print "I predict the error type is", data_select(db, errorTypeTb, 'id',
                                                     error_predict,
                                                     'ErrorString')[0][0]
    print "The Error collected is", conf.error
示例#25
0
def mnist_test(numTrain = 200, numTest = 100, k =5):
    """使用手写数据测试数据"""
    
    mnistData = load_data()
    training_inputs = mnistData['training_inputs'][:numTrain]
    training_labels = mnistData['training_labels'][:numTrain]
    
    test_inputs = mnistData['test_inputs'][:numTest]
    test_labels = mnistData['test_labels'][:numTest]
    
    
    n = test_inputs.shape[0]
    numError = 0
    for i in xrange(n):
        result = knn.classify0(test_inputs[i], dataSet=training_inputs, 
                               labels=training_labels, k = k)
        if result != test_labels[i]:
            numError += 1
    
    testError = numError / float(n)
    
    print "测试数据错误率为 %f" %testError
示例#26
0
def handwritingClassTest():

    hwLabels = []
    trainListFiles = os.listdir('trainingDigits')
    m = len(trainListFiles)
    trainMat = zeros([m, 1024])
    for i in range(1, m):
        fileNameStr = trainListFiles[i]
        classNum = fileNameStr.split('.')[0].split('_')[0]
        hwLabels.append(classNum)
        trainMat[i-1,:] = img2vector('./trainingDigits/'+fileNameStr)
    testFileList = os.listdir('./testDigits')
    errorcount = 0.0
    mTest = len(testFileList)
    for i in range(1, mTest):
        fileNameStr = testFileList[i]
        classNum = fileNameStr.split('.')[0].split('_')[0]
        vectorForTest = img2vector('./testDigits/'+fileNameStr)
        classResult = knn.classify0(vectorForTest, trainMat, hwLabels, 3)
        print "result : "+classResult+" true : "+classNum
        if (classNum != classResult):
            errorcount += 1.0;
    print "all is : "+str(errorcount/mTest)
示例#27
0
def datingClassTest():
    hoRatio = 0.1  # 测试范围,一部分测试一部分作为样本
    datingDataMat, datingLabels = file2matrix(
        "datingTestSet2.txt")  # load data setfrom file
    normMat, ranges, minVals = autoNorm(datingDataMat)
    m = normMat.shape[0]
    numTestVecs = int(m * hoRatio)
    print('numTestVecs=', numTestVecs)
    errorCount = 0
    for i in range(numTestVecs):
        classifierResult = classify0(normMat[i], normMat[numTestVecs:m],
                                     datingLabels[numTestVecs:m], 3)
        print("the classifier came back with: %d, the real answer is: %d" %
              (classifierResult, datingLabels[i]))
        errorCount += classifierResult != datingLabels[i]
    print("the total error rate is: %f" % (errorCount / numTestVecs))
    print(errorCount)

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(datingDataMat[:, 0], datingDataMat[:, 1],
               15.0 * np.array(datingLabels), 15.0 * np.array(datingLabels))
    plt.show()
示例#28
0
def train(trainImagePath, testImagePath):
    hwLabels = []
    trainingFileList = os.listdir(trainImagePath)

    m = len(trainingFileList)

    trainningMat = np.zeros((m, 1024))

    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainningMat[i, :] = imageTool.img2vector(
            '%s/%s' % (trainImagePath, fileNameStr))

    testFileList = os.listdir(testImagePath)
    errorCount = 0.0

    mTest = len(testFileList)

    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = imageTool.img2vector('%s/%s' %
                                               (testImagePath, fileNameStr))
        classifierResult = knn.classify0(vectorUnderTest, trainningMat,
                                         hwLabels, 3)

        print '分类器返回的数字是:%d, 实际的数字是:%d' % (classifierResult, classNumStr)

        if (classifierResult != classNumStr):
            errorCount += 1.

    print '总的错误数: %d' % errorCount
    print '错误率: %f' % (errorCount / float(mTest))
def train():
    '''对knn进行训练'''

    datingDataMat, datingLables = knn.file2matrix('datingTestSet2.txt')

    normMat, rangeVals, minVals = knn.autoNorm(datingDataMat)

    print normMat
    print datingLables

    # knn.plotData(datingDataMat, datingLables)



    # 用作交叉验证集的数量百分比
    hoRatio = 0.10
    # 数据集的总数量
    m = normMat.shape[0]

    # 测试集
    numTestVecs = int(m*hoRatio)

    errorCount = 0.0

    for i in range(numTestVecs):
        classifierResult = knn.classify0(normMat[i, :],
                                         normMat[numTestVecs:m, :],
                                         datingLables[numTestVecs:m],
                                         3)

        print '分类器返回: %d, 实际的结果是:%d' % (classifierResult, datingLables[i])

        if classifierResult != datingLables[i]:
            errorCount += 1.0


    print '错误率是: %f' % (errorCount / (float(numTestVecs)))
示例#30
0
# -*- coding: UTF-8 -*-    或者  #coding=utf-8
'''
Created on 2016年8月20日

@author: xiaoyuan
'''
import knn

group,labels = knn.createDataSet()
print knn.classify0([0,0], group, labels, 3)
示例#31
0
    # 归一化数据
    normMat, ranges, minVals = knn.autoNorm(datingDataMat)
    print('norm mat:')
    print(normMat)
    print('range:')
    print(ranges)
    print('norm mat:')
    print(minVals)

    # 测试分类器,使用数据集前hoRatio比例做测试集
    hoRatio = 0.10
    m = normMat.shape[0]
    numTestVecs = int(m * hoRatio)
    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = knn.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3)
        print("the classifier came back with: %d, the real answer is: %d" % (classifierResult, datingLabels[i]))
        if (classifierResult != datingLabels[i]):
            errorCount += 1.0
    print("the total error rate is: %f" % (errorCount / float(numTestVecs)))
    print(errorCount)

    # 预测分类
    resultList = ['not at all', 'in small doses', 'in large doxes']
    ffMiles = float(input('frequent flier miles earned per year?'))
    percentTats = float(input("percentage of time spent playing video games?"))
    iceCream = float(input('liters of ice cream consumed per year?'))
    inArr = array([ffMiles, percentTats, iceCream])
    classifierResult = knn.classify0((inArr - minVals) / ranges, normMat, datingLabels, 3)
    print('You will probably like this person:', resultList[classifierResult - 1])
示例#32
0
文件: testknn.py 项目: solomonope/ML
import knn;
import operator;
from numpy import *;

#TESt creating  datasets
group, labels = knn.createdataset() ;


print(group);


print(labels);

#Test classifier
label  = knn.classify0([5,0],group,labels,3);

print(label);


#test loading file
datingDataMat,datingLabels = knn.filetoMatrix('C:\\Users\Folorunsho Solomon\\Documents\\GitHub\\ML\\python\\datingTestSet2.txt');

print(datingDataMat);

print(datingLabels);


import matplotlib;
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
示例#33
0
    #计算类间差
    temp1 = EachClassMean - total_mean
    Fai_b = sqrt(train_samplesize) * temp1

    # print EachClassMean[:,0]
    # print EachClassMean[:,1]
    #计算类内差
    for i in range(0, train_tol):
        Fai_w[:, i] = train_pro[:, i] - EachClassMean[:, int(label[0, i]) - 1]

    Sb = Fai_b * Fai_b.T
    Sw = Fai_w * Fai_w.T

    LDA_dim = ClassNum - 1

    eig_val, eig_vec = linalg.eig(Sw.I * Sb)

    eigSortIndex = argsort(-eig_val)  # 从大到小排序,默认从小到大,参数为负表示降序
    W_LDA = mat(eig_vec[:, eigSortIndex[:LDA_dim]])  # 取LDA方向

    #训练样本再次投影
    train_final = W_LDA.T * train_pro

    #调用knn邻近分类器
    newImg = cv2.imread('D:\PyCharm\PyCharmProjects\s1_5.bmp', 0)
    newImg = mat(newImg).flatten().T
    newImg_pro = disc_set.T * newImg
    newImg_final = W_LDA.T * newImg_pro

    Class = knn.classify0(newImg_final.T, train_final.T, label.T, 7)
    print Class
示例#34
0
    trainingMat = zeros((m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        # 找分类标签
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i, :] = img2vector('./digits/trainingDigits/%s' %
                                       fileNameStr)

    # 获取测试文件列表
    testFileList = listdir('./digits/testDigits')
    errorCount = 0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        # 找分类标签
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = img2vector('./digits/testDigits/%s' % fileNameStr)
        # 计算分类结果
        classfierResult = knn.classify0(vectorUnderTest, trainingMat, hwLabels,
                                        3)
        print('file:' + fileStr +
              ' the classifier came back with:%d, the real answer is:%d' %
              (classfierResult, classNumStr))
        if (classfierResult != classNumStr):
            errorCount += 1
    print('the total number of errors is:%d' % errorCount)
    print('the total error rate is:%f' % (errorCount / float(mTest)))
示例#35
0
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 21 10:38:54 2018

@author: fsxn2
"""

import knn
import matplotlib
import matplotlib.pyplot as plt
#group,labels=knn.createDataSet()
#print(knn.classify0([0,0],group,labels,3))
group, labels = knn.file2matrix("input.txt")
auto, ranges, minval = knn.autoNorm(group)
print(auto)
print(ranges)
print(minval)
#fig=plt.figure()
#ax=fig.add_subplot(111)
#ax.scatter(group[:,1],group[:2])
#plt.show()
print(knn.classify0([1, 0, 3], group, labels, 3))
示例#36
0
            vec = f.read()
        return list(vec.replace('\n', ''))

    sizeData = len(listData)  #文件的数量
    cLabel = zeros(sizeData, dtype='int16')  #文件类别
    arrTrain = zeros((sizeData, 1024), dtype='int16')  #训练样本数组
    for i, j in enumerate(listData):
        cLabel[i] = int(j[0])  #每个文件对应的类别
        arrTrain[i, :] = file2arr(dirname + '\\' + j)
    return cLabel, arrTrain


if __name__ == '__main__':
    from knn import classify0
    fTrain = r'..\data\Ch02\digits\trainingDigits'
    fTest = r'..\data\Ch02\digits\testDigits'
    cLabel, arrTrain = loadData(fTrain)
    cLabelTest, arrTest = loadData(fTest)
    err = 0
    for j, i in enumerate(arrTest):
        label = classify0(i, arrTrain, cLabel, 3)
        if cLabelTest[j] != label: err += 1
    print('错误率:', err / len(cLabelTest))

    #sklearn库knn对比
    from sklearn.neighbors import KNeighborsClassifier as knn
    model = knn(n_neighbors=3, n_jobs=4, algorithm='auto')
    model.fit(arrTrain, cLabel)
    cLabelPredict = model.predict(arrTest)
    print('错误率', sum(cLabelPredict != cLabelTest) / len(cLabelTest))
示例#37
0
#!/usr/bin/env python

import knn
# import matplotlib
import matplotlib.pyplot as plt
import numpy as np

group, labels = knn.create_data_set()
print("group=", group)
print("labels=", labels)

result = knn.classify0([0, 0], group, labels, 3)
print("result=", result)

datingDataMat, datingLabels = knn.file2matrix('datingTestSet.txt')
print("datingDataMat=", datingDataMat)
print("datingLabels=", datingLabels[:20])
'''
fig = plt.figure()
ax = fig.add_subplot(111)
print("datingDataMat[:, 1]=", datingDataMat[:, 1][0])
print("datingDataMat[:, 2]=", datingDataMat[:, 2][0])
ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2])
# ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2], 15.0*array(datingLabels), 15.0*array(datingLabels))
plt.show()
'''

normMat, ranges, minVals = knn.autoNorm(datingDataMat)
print("normMat=", normMat)
print("ranges=", ranges)
print("minVals=", minVals)
# coding: UTF-8


import matplotlib
import matplotlib.pyplot as plt
import numpy as np

import knn
import mnist_test

group, labels = knn.createDataSet()
bb = knn.classify0([0,0], group, labels, 3)
print bb
cc = knn.knn2([0,0], group, labels, 3)

# 可视化数据
dataSet, labels = knn.file2matrix('data/datingTestSet2.txt')
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(dataSet[:,0], dataSet[:,1], s=15.0*labels, c=15.0*labels)
plt.show()


# 测试误判率
reload(knn)
# testRatio为测试集比例,k为邻居个数
knn.knnTest('../data/datingTestSet2.txt',testRatio=0.2, k=3)


# 测试手写数字识别