def evaluate(output=True): ratio = 0.1 k_value = 3 data_set, labels = get_data_set_from_file("dating.dataset") data_set, m, l = knn.auto_normalize(data_set) row_count = data_set.shape[0] row_test_count = int(ratio * row_count) test_data_set = data_set[0:row_test_count,:] classify_fun = knn.gen_classify_fun( [data_set[row_test_count:row_count,:], labels[row_test_count:row_count]], k_value) def is_right(row, label): result = classify_fun(row) if (result != label): if output: print "the classifier came back with: %d, the real answer is: %d" \ % (result, label) return False return True result = [is_right(row.tolist()[0], label) for label, row in zip(labels, test_data_set)] error_count = result.count(False) error_rate = error_count/float(row_test_count) if output: print "the total error rate is: %f (%d / %d)" \ % (error_rate, error_count, row_test_count) return error_rate
def run_evaluate(output=True): k_value = 3 classify_fun = knn.gen_classify_fun( get_handwriting_dataset('training_digits'), k_value) test_pathname = 'test_digits' test_dataset, test_labels = get_handwriting_dataset( test_pathname) error_list, result = knn.evaluate( classify_fun, test_dataset, test_labels, k_value) if output: print "error info:", error_list pp([(os.listdir(test_pathname)[index], "actual:"+actual, "expected:"+expected) for index, actual, expected, content in result]) return error_list, result