def detect_error_types(error_set, gold_tree, test_tree): init_error_count = len(error_set['miss']) + len(error_set['extra']) ungrouped = [] for etype in error_set: for error in error_set[etype]: ungrouped.append(error) bracket_errors.sort_by_depth(ungrouped) init_ungrouped_length = len(ungrouped) assert init_ungrouped_length == init_error_count grouped = [] mutable_test = test_tree.copy() # iterate through the errors until there is no change after an iteration # Note - order of these is intentional aggregators = [ s_unary.unary_error, s_single_word.single_word_error, s_attachment.attachment_error, ] changed = True while changed: changed = False ### print mutable_test.colour_repr() ### for error in ungrouped: ### print error ### print for func in aggregators: plen = len(ungrouped), len(grouped) tchanged, mutable_test = func(ungrouped, grouped, gold_tree, mutable_test) if tchanged: mutable_test = check_for_matching_errors( ungrouped, grouped[-1], gold_tree, mutable_test) changed = True remaining_errors = bracket_errors.get_errors(gold_tree, mutable_test) return grouped, mutable_test, remaining_errors, ungrouped
def detect_error_types(error_set, gold_tree, test_tree): init_error_count = len(error_set['miss']) + len(error_set['extra']) ungrouped = [] for etype in error_set: for error in error_set[etype]: ungrouped.append(error) bracket_errors.sort_by_depth(ungrouped) init_ungrouped_length = len(ungrouped) assert init_ungrouped_length == init_error_count grouped = [] mutable_test = test_tree.copy() # iterate through the errors until there is no change after an iteration # Note - order of these is intentional aggregators = [ s_unary.unary_error, s_single_word.single_word_error, s_attachment.attachment_error, ] changed = True while changed: changed = False ### print mutable_test.colour_repr() ### for error in ungrouped: ### print error ### print for func in aggregators: plen = len(ungrouped), len(grouped) tchanged, mutable_test = func(ungrouped, grouped, gold_tree, mutable_test) if tchanged: mutable_test = check_for_matching_errors(ungrouped, grouped[-1], gold_tree, mutable_test) changed = True remaining_errors = bracket_errors.get_errors(gold_tree, mutable_test) return grouped, mutable_test, remaining_errors, ungrouped
simple_tree = ptb.apply_collins_rules(tree) if VERBOSE: print simple_tree test_tree = error_tree.Error_Tree() test_tree.set_by_ptb(simple_tree, tree) if VERBOSE: print test_tree gold_words = gold_tree.word_yield() test_words = test_tree.word_yield() if len(test_words.split()) != len(gold_words.split()): print "Sentence lengths do not maych..." print "Gold:", gold_words print "Test:", test_words error_set = bracket_errors.get_errors(gold_tree, test_tree)[0] missing = bracket_errors.get_missing_errors(error_set, test_tree) print test_tree.colour_repr(missing=missing).strip() if len(error_set['miss']) > 0 or len(error_set['extra']) > 0: print 'initial errors:', len(error_set['miss']), len( error_set['extra']) aggregated_errors = detect_error_types(error_set, gold_tree, test_tree) for group in aggregated_errors[0]: group.determine_type() print 'Class:', group.classification print 'Fixes:', for error in group.errors: print error error_groups.append(group) error_set = bracket_errors.get_errors(gold_tree,
simple_tree = ptb.apply_collins_rules(tree) if VERBOSE: print simple_tree test_tree = error_tree.Error_Tree() test_tree.set_by_ptb(simple_tree, tree) if VERBOSE: print test_tree gold_words = gold_tree.word_yield() test_words = test_tree.word_yield() if len(test_words.split()) != len(gold_words.split()): print "Sentence lengths do not maych..." print "Gold:", gold_words print "Test:", test_words error_set = bracket_errors.get_errors(gold_tree, test_tree)[0] missing = bracket_errors.get_missing_errors(error_set, test_tree) print test_tree.colour_repr(missing=missing).strip() if len(error_set['miss']) > 0 or len(error_set['extra']) > 0: print 'initial errors:', len(error_set['miss']), len(error_set['extra']) aggregated_errors = detect_error_types(error_set, gold_tree, test_tree) for group in aggregated_errors[0]: group.determine_type() print 'Class:', group.classification print 'Fixes:', for error in group.errors: print error error_groups.append(group) error_set = bracket_errors.get_errors(gold_tree, aggregated_errors[1])[0] missing = bracket_errors.get_missing_errors(error_set, aggregated_errors[1]) print 'remaining errors:', len(error_set['miss']), len(error_set['extra'])