def dating_test(): """ KNN分类器针对约会网站的测试 :return: """ # 测试数据占总数据的百分比 test_ratio = 0.1 # 归一化数据 normalized_matrix = kNN.autoNormalize(matrix) total_number = normalized_matrix.shape[0] column = normalized_matrix.shape[1] # 测试的数据总个数 test_number = int(total_number * test_ratio) # 记录分类错误的个数 error_count = 0 for j in range(test_number): """ 注意此处取出了第一列和第三列的数据 由于前面的此时发现飞行的里程数和玩视频游戏分类的效果更明显 """ test_data = normalized_matrix[j, 0:2] # => normalized_matrix[i] sample_data_set = normalized_matrix[test_number:total_number, 0:2] # KNN算法获得的分类结果 classfy_result = kNN.knn_classfy(test_data, sample_data_set, labels[test_number:total_number], 6) # 实际的label结果 real_result = labels[j] print '预测的结果:%d, 实际的结果:%d' % (classfy_result, real_result) if classfy_result != real_result: error_count += 1.0 print '分类的错误率:%f' % (error_count / test_number) # 最好结果0.040000
#!/usr/bin/python2.7 # _*_ coding: utf-8 _*_ import kNN import matplotlib.pyplot as plt group, labels = kNN.createDataSet() input_data = [1.1, 1.2] result = kNN.knn_classfy(input_data, group, labels, 3) print '数据', input_data, '属于:', result # create a new figure and set it's width, height plt.figure(figsize=(10, 10)) plt.subplot(111) plt.scatter(group[:, 0], group[:, 1], c='#ef6790', s=20) for label, x, y in zip(labels, group[:, 0], group[:, 1]): plt.annotate(label, xy=(x, y), # 所要注释的坐标 xytext=(-10, 0), # 注释文字中心偏离(x,y)坐标的位移 textcoords='offset points', # important!设置text坐标是相对于(x,y)的偏移 ha='right', # 设置水平对齐horizontalalignment va='bottom') # 设置垂直对齐verticalalignment # 绘制预测的结果: plt.scatter(input_data[0], input_data[1], c='#ff0000', s=80) plt.annotate(result, xy=(input_data[0], input_data[1]), xytext=(-10, 0), textcoords='offset points', ha='right', va='bottom')