def test_auto_norm(self): date_mat, date_label = knn.file2matrix('datingTestSet2.txt') norm_mat, ranges, min_val = knn.auto_norm(date_mat) min_exp = np.array([0., 0., 0.001156]) ranges_exp = np.array([9.1273000e+04, 2.0919349e+01, 1.6943610e+00]) self.assertEqual(True, (min_exp == min_val).all()) self.assertEqual(True, (ranges == ranges_exp).all())
def classify_gui(k): """ :param k: k值 :return:人群分类 """ data_mat, class_label_vector = file_to_matrix('../data/dating_test_set_2.txt') fly_distances = float(input("请输入飞行里程数:")) icecream = float(input("请输入消耗冰淇淋公升数:")) play_time = float(input("请输入玩游戏花费时间百分比:")) norm_data_set, ranges, min_vals = auto_norm(data_mat) data_person=np.array([fly_distances,icecream,play_time]) norm_person_data=(data_person-min_vals)/ranges class_person=['不喜欢','一般','极具魅力'] label_person=classify(norm_person_data,norm_data_set,class_label_vector,k) return class_person[label_person-1]
def classify_person(): """ 对给定的数据进行人群分类判断 :return: """ #定义人群分类:[0,1,2] ff_miles=float(input("每年飞行常客里程数:")) ice_cream=float(input('每周消耗的冰淇淋公升数:')) percent_game=float(input('玩游戏所消耗的时间百分比:')) data_mat,class_label_vector=file_to_matrix('../data/dating_test_set_2.txt') norm_dating_data_set,ranges,min_vals=auto_norm(data_mat) in_x=np.array([ff_miles,ice_cream,percent_game]) #待验证数据 norm_in_x=(in_x-min_vals)/(ranges) classify_result=classify(norm_in_x,norm_dating_data_set,class_label_vector,3) return classify_result
def dating_class_test(): """ 应用测试集测试分类机的错误率 :return: None """ hold_out_ratio = 0.10 #拿出作为测试集的数据比例 data_set,labels = file_to_matrix('./data/dating_test_set_2.txt') norm_data_set,ranges,min_vals=auto_norm(data_set) size=norm_data_set.shape[0] #获得数据集行数 num_test_size = int(size * hold_out_ratio) #保留行数 error_count = 0.0 #错误统计 for i in range(num_test_size): classifier_result=classify(norm_data_set[i,:],norm_data_set[num_test_size:size],labels[num_test_size:size],5) print('分类器返回:%d, 真实答案为:%d'% (classifier_result,labels[i])) if classifier_result!= labels[i]: error_count+=1.0 print('分类器错误率为:%0.2f%%' % (error_count/(float(num_test_size))*100))
def test_main_dating(self): test_ratio = 0.50 dataset_matrix, labels = knn.read_matrix('dating/dataset.txt') norm_matrix, ranges, min_value = knn.auto_norm(dataset_matrix) size = norm_matrix.shape[0] test_num = int(size * test_ratio) err_count = 0.0 for i in range(test_num): classifier_result = knn.classify0(norm_matrix[i, :], norm_matrix[test_num:size,:], labels[test_num:size], 3) print "predict: %d real: %d" % (classifier_result, labels[i]) if classifier_result != labels[i]: err_count += 1.0 err_rate = err_count / float(test_num) print 'total: %d error: %d rate: %f' % (test_num, err_count, err_rate)