def distances_tests(): HAUS_S_D = 0.0673441488 haus = image_template.modified_hausdorff_distance assert abs(haus(temp_s, temp_d) - HAUS_S_D) < 0.0001 assert image_template.list_classification(temp_s, [temp_d, temp_s])[0] == "temp_s" test = page.image_templates[0] train = page.image_templates[1:50] assert image_template.list_classification_vec(test, train) == \ image_template.list_classification(test, train) dist_matrix = image_template.distance_matrix([temp_s, temp_d, temp_s]) assert np.allclose(dist_matrix, np.array([[ 0. , HAUS_S_D, 0. ], [ HAUS_S_D, 0. , HAUS_S_D], [ 0. , HAUS_S_D, 0. ]]))
def character_rec(dim=48, resample=True): print "Building data" pages = load_pages(base_directory, dim=dim, resample=resample) accuracies = [] total_temps = 0.0 total_right = 0.0 for i, (test, train) in enumerate(holdout(pages)): print i, "Classifiying" train_images = [image for page in train for image in page.image_templates if image.name != "NO LABEL"] grouped_labels = [list_classification(t, train_images)[0] for t in test.image_templates] predicted_labels = distribute_labels(test.groups, grouped_labels, test.num_temps) real_labels = test.labels # num_right = np.sum([1.0 if predicted_labels[i] == real_labels[i] # else 0.0 # for i in range(len(real_labels))]) # accuracies.append(num_right/test.num_temps) num_right = num_correct_labels(predicted_labels, real_labels) total_temps += test.num_temps total_right += num_right accuracies.append(num_right/len(test.labels)) avg_accuracy = total_right/total_temps return (accuracies, avg_accuracy)
def ensemble_rec(): print "Building data" pages = load_pages(base_directory) accuracies = [] for i, (test, train) in enumerate(holdout(pages)): print i, "Grouping" group_clf = create_grouping_classifier(train, tree.DecisionTreeClassifier) (g_acc, grouped_test) = group_classify(test, group_clf) print i, "Classifiying" train_images = [image for page in train for image in page.image_templates if image.name != "NO LABEL"] grouped_labels = [list_classification(t, train_images)[0] for t in grouped_test] predicted_labels = distribute_labels(test.groups, grouped_labels, test.num_temps) real_labels = test.labels num_right = np.sum([1.0 if predicted_labels[i] == real_labels[i] else 0.0 for i in range(len(real_labels))]) accuracies.append([num_right/test.num_temps, g_acc]) return accuracies