def extra_matching_crossing_miss(error, test_tree, shortest_error, ungrouped, to_group): if shortest_error.node.span[1] == error.node.span[1]: moving = [] mspan = shortest_error.node.span cend = error.node.span[0] while cend > mspan[0]: brac = test_tree done = False while not done: for subtree in brac.subtrees: if cend == subtree.span[1] and subtree.span[0] >= mspan[0]: moving.append(subtree) done = True cend = subtree.span[0] break if subtree.span[0] < cend <= subtree.span[1]: brac = subtree break # move them across group_fields = {} group_fields['type'] = 'attachment' group_fields['height'] = 'incorrect' group_fields['from parents'] = '' ### print ### print "Moving" for node in moving: group_fields['from parents'] += ' ' + node.parent.label ### print node addendum = [] target = bracket_errors.get_extra_tree(error, test_tree) target.extra = False ### print "To:", target ### print 'error is:', error group_desc = 'attachment incorrect %s_instead_of_%s' % (moving[0].parent.label, target.label) group_fields['to parent'] = target.label single_child_parents = [] for node in moving: parent = node.parent node.parent.subtrees.remove(node) # if the parent now has only one child, look into whether it should be deleted if len(parent.subtrees) == 1: if parent.label == parent.subtrees[0].label: single_child_parents.append(parent) target.subtrees.insert(0, node) node.parent = target addendum.append(node.label) group_fields['nodes moving'] = ' '.join(addendum) group_desc += ' ' + '_'.join(addendum) test_tree.update_span() for parent in single_child_parents: if len(parent.subtrees) == 1: if parent.subtrees[0].extra and parent.label == parent.subtrees[0].label: eerror = bracket_errors.get_extra_error(ungrouped, parent.subtrees[0]) if eerror is not None: repair_tree.repair_extra_node(eerror, test_tree) to_group.append(eerror) to_group.append(error) to_group.append(shortest_error) group_desc += ' |emcm1' group_fields['ID'] = 'emcm1' group_fields['old desc'] = group_desc test_tree.check_consistency() return group_fields, test_tree return None, test_tree
def unary_error(ungrouped, grouped, gold, test): nodes, span_set = test.get_spans() gold_nodes, gold_span_set = gold.get_spans() relevant_errors = {} for error in ungrouped: span = error.node.span if span[1] - span[0] > 1 and span in span_set and span in gold_span_set: if span not in relevant_errors: relevant_errors[span] = (len(span_set[span].values()), len(gold_span_set[span].values()), []) relevant_errors[span][2].append(error) changed = False for span in relevant_errors: test_count, gold_count, errors = relevant_errors[span] missing_errors = 0 extra_errors = 0 for error in errors: if error.missing: missing_errors += 1 else: extra_errors += 1 if test_count > 0 and extra_errors == 0: # there is/are missing unary production(s) here group = error_group.Error_Group() current_labels = [] for node_set_label in span_set[span]: for node in span_set[span][node_set_label]: current_labels.append(node.label) current_labels.sort() missing_labels = [error.node.label for error in errors] missing_labels.sort() for error in errors: ungrouped.remove(error) group.errors.append(error) repair_tree.repair_missing_node(error, test) group.fields['type'] = 'unary' group.fields['subtype'] = 'missing' group.desc = 'unary miss %s over %s' % ('_'.join(missing_labels), '_'.join(current_labels)) group.fields['nodes'] = ' '.join(missing_labels) group.fields['old desc'] = group.desc grouped.append(group) ### print group.desc changed = True elif gold_count > 0 and missing_errors == 0: # there is/are extra unary production(s) here group = error_group.Error_Group() current_labels = [] for node_set_label in span_set[span]: for node in span_set[span][node_set_label]: if not node.extra: current_labels.append(node.label) current_labels.sort() extra_labels = [error.node.label for error in errors] extra_labels.sort() # only use it if there isn't a matching missing error directly above skip = False if len(extra_labels) == 1: error = errors[0] for merror in ungrouped: if merror.node.label == extra_labels[0]: if merror.node.span[0] == error.node.span[0]: if error.node.parent.span[1] >= merror.node.span[1]: skip = True break elif merror.node.span[1] == error.node.span[1]: if error.node.parent.span[0] <= merror.node.span[0]: skip = True break if not skip: for error in errors: ungrouped.remove(error) group.errors.append(error) repair_tree.repair_extra_node(error, test) group.fields['type'] = 'unary' group.fields['subtype'] = 'extra' group.fields['nodes'] = ' '.join(extra_labels) group.desc = 'unary extra %s over %s' % ( '_'.join(extra_labels), '_'.join(current_labels)) group.fields['old desc'] = group.desc grouped.append(group) changed = True elif missing_errors == 1 and extra_errors == 1: # We have a mislabelled node extra = relevant_errors[span][2][0] missing = relevant_errors[span][2][1] if not extra.extra: extra = relevant_errors[span][2][1] missing = relevant_errors[span][2][0] group = error_group.Error_Group() group.fields['type'] = 'wrong label, right span' if test_count == 1 and gold_count == 1: group.desc = 'diff %s should_be %s' % (extra.node.label, missing.node.label) ### print 'wrong label, right span %s should be %s' % (extra.node.label, missing.node.label) else: group.desc = 'unary diff %s should_be %s' % ( extra.node.label, missing.node.label) group.fields['old desc'] = group.desc group.errors.append(extra) ungrouped.remove(extra) group.errors.append(missing) ungrouped.remove(missing) repair_tree.repair_extra_missing_pair(missing, extra, test) grouped.append(group) changed = True else: # Most of the other cases are either just an incorrect node labelling, or less clear # TODO: One case to consider is when there is a correct node with all # the missing nodes above and all the extra nodes below (or vice versa) pass return changed, test
def extra_crossing_starting(error, test_tree, to_group, starting, ungrouped, ctree): '''Extra, then if there is a crossing bracket that starts here, and no crossing bracket that ends at the same spot, the other thing under this bracket has something that should have attached to it, but attached too high. Consider what would happen if it had attached here and see what other errors it fixes (ie this extra may now match with a missing bracket above)''' # find the longest crossing missing bracket that starts here start = starting.keys()[0] cend = ctree.span[1] crossing_errors = starting[start] longest_error = None text = error.node.word_yield() for merror in crossing_errors: if longest_error is None or longest_error.node.span[1] < merror.node.span[1]: longest_error = merror mspan = (cend, longest_error.node.span[1]) # find all the parts that start in the missing bracket to be here moving = [] while cend < mspan[1]: brac = test_tree done = False while not done: for subtree in brac.subtrees: if cend == subtree.span[0]: moving.append(subtree) done = True cend = subtree.span[1] break if subtree.span[0] < cend < subtree.span[1]: brac = subtree break # move them across group_fields = {} group_fields['type'] = 'attachment' group_fields['height'] = 'too high' group_fields['from parent'] = moving[0].parent.label group_fields['to parent'] = longest_error.node.label group_desc = 'attachment too_high %s_instead_of_%s' % (moving[0].parent.label, longest_error.node.label) addendum = [] target = ctree if ctree.subtrees[-1].extra: if ctree.subtrees[-1].label == longest_error.node.label: if ctree.subtrees[-1].span[0] == longest_error.node.span[0]: target = ctree.subtrees[-1] single_child_parents = [] for node in moving: parent = node.parent parent.subtrees.remove(node) # if the parent now has only one child, look into whether it should be deleted if len(parent.subtrees) == 1: if parent.label == parent.subtrees[0].label: single_child_parents.append(parent) target.subtrees.append(node) node.parent = target addendum.append(node.label) group_desc += ' ' + '_'.join(addendum) group_fields['nodes moving'] = ' '.join(addendum) test_tree.update_span() for parent in single_child_parents: if len(parent.subtrees) == 1: if parent.subtrees[0].extra and parent.label == parent.subtrees[0].label: eerror = bracket_errors.get_extra_error(ungrouped, parent.subtrees[0]) repair_tree.repair_extra_node(eerror, test_tree) to_group.append(eerror) # attempt to repair the longest crossing error if target == ctree: if repair_tree.repair_missing_node(longest_error, test_tree, failure_expected=True): to_group.append(longest_error) if error not in to_group: to_group.append(error) target.extra = False group_desc += ' ' + text + ' |ecs1' group_fields['ID'] = 'ecs1' group_fields['old desc'] = group_desc return group_fields, test_tree
def extra_multicrossing_starting(error, test_tree, to_group, starting, ungrouped, ctree): '''Extra, then if there are crossing brackets that start here, and no crossing bracket that ends at the same spot, the other thing under this bracket has something that should have attached to it, but attached too high. Consider what would happen if it had attached here and see what other errors it fixes (ie this extra may now match with a missing bracket above)''' ### print error ### print ctree # find the longest crossing missing bracket that starts here start = starting.keys()[0] cend = ctree.span[1] crossing_errors = starting[start] longest_error = None for merror in crossing_errors: if longest_error is None or longest_error.node.span[1] < merror.node.span[1]: longest_error = merror mspan = (cend, longest_error.node.span[1]) ### print mspan # find the set of missing brackets that end where that one ends ### print "Related missing:" related_missing = [] for merror in ungrouped: if merror.missing: if merror.node.span[1] == longest_error.node.span[1]: related_missing.append((merror.node.span, merror)) ### print merror related_missing.sort() # find the set of extra brackets that end where this one ends ### print "Related extra:" related_extra = [] for eerror in ungrouped: if eerror.extra: current_node = bracket_errors.get_extra_tree(eerror, test_tree) if current_node.span[1] == ctree.span[1]: related_extra.append((current_node.span, eerror)) ### print current_node ### print eerror related_extra.sort() # find the lowest pairing lowest = None for pair in related_extra: for mpair in related_missing: if mpair[1].node.label == pair[1].node.label: if mpair[1].node.span[0] == pair[1].node.span[0]: lowest = pair[1] break if lowest is None: return None, test_tree ### print lowest # find all the parts that start in the missing bracket to be here moving = [] while cend < mspan[1]: brac = test_tree done = False while not done: for subtree in brac.subtrees: if cend == subtree.span[0]: moving.append(subtree) done = True cend = subtree.span[1] break if subtree.span[0] < cend < subtree.span[1]: brac = subtree break # move them across group_fields = {} group_fields['type'] = 'attachment' group_fields['height'] = 'too high' group_fields['from parent'] = moving[0].parent.label addendum = [] target = bracket_errors.get_extra_tree(lowest, test_tree) group_desc = 'attachment too_high %s_instead_of_%s' % (moving[0].parent.label, target.label) group_fields['to parent'] = target.label single_child_parents = [] for node in moving: parent = node.parent parent.subtrees.remove(node) # if the parent now has only one child, look into whether it should be deleted if len(parent.subtrees) == 1: if parent.label == parent.subtrees[0].label: single_child_parents.append(parent) target.subtrees.append(node) node.parent = target addendum.append(node.label) group_fields['nodes moving'] = ' '.join(addendum) group_desc += ' ' + '_'.join(addendum) test_tree.update_span() for parent in single_child_parents: if len(parent.subtrees) == 1: if parent.subtrees[0].extra and parent.label == parent.subtrees[0].label: eerror = bracket_errors.get_extra_error(ungrouped, parent.subtrees[0]) repair_tree.repair_extra_node(eerror, test_tree) to_group.append(eerror) # attempt to repair the longest crossing error if target == ctree: if repair_tree.repair_missing_node(longest_error, test_tree, failure_expected=True): to_group.append(longest_error) group_desc += ' |emcs1' group_fields['ID'] = 'emcs1' group_fields['old desc'] = group_desc return group_fields, test_tree
def extra_crossing_ending(error, test_tree, to_group, ending, ungrouped, ctree): '''Extra, then if there is a crossing bracket that ends in the middle of here, the other thing under this bracket is attaching too low. This could explain a bunch of other errors. In particular, consider if the wrongly attached thing was collapsed to 0, what would that fix (note that the extra bracket may still be extra at this point, or may now be equivalent to a msising bracket).''' ### print error # work out what needs to move end = ending.keys()[0] crossing_errors = ending[end] ### for cerror in crossing_errors: ### print cerror # Check the case of a matching missing bracket ### print error ### print ending if len(ending[end]) == 1: for merror in ungrouped: if merror.missing and merror.node.label == error.node.label: if merror.node.span[1] == error.node.span[1]: if ending[end][0].node.span[0] == merror.node.span[0]: # the other things should be moving under here! ### print merror ### print error moving = [] target = bracket_errors.get_extra_tree(error, test_tree) mspan = merror.node.span cend = target.span[0] while cend > mspan[0]: brac = test_tree done = False while not done: for subtree in brac.subtrees: if cend == subtree.span[1] and subtree.span[0] >= mspan[0]: moving.append(subtree) done = True cend = subtree.span[0] break if subtree.span[0] < cend <= subtree.span[1]: brac = subtree break ### print "Moving" ### for node in moving: ### print node ### print "To:" ### print target # move them across group_fields = {} group_fields['type'] = 'attachment' group_fields['height'] = 'incorrect' group_fields['from parents'] = '' for node in moving: group_fields['from parents'] += ' ' + node.parent.label addendum = [] group_desc = 'attachment incorrect %s_instead_of_%s' % (moving[0].parent.label, target.label) group_fields['to parent'] = target.label single_child_parents = [] for node in moving: parent = node.parent parent.subtrees.remove(node) # if the parent now has only one child, look into whether it should be deleted if len(parent.subtrees) == 1: if parent.label == parent.subtrees[0].label: single_child_parents.append(parent) target.subtrees.insert(0, node) node.parent = target addendum.insert(0, node.label) group_fields['nodes moving'] = ' '.join(addendum) group_desc += ' ' + '_'.join(addendum) test_tree.update_span() for parent in single_child_parents: if len(parent.subtrees) == 1: if parent.subtrees[0].extra and parent.label == parent.subtrees[0].label: eerror = bracket_errors.get_extra_error(ungrouped, parent.subtrees[0]) repair_tree.repair_extra_node(eerror, test_tree) to_group.append(eerror) target.extra = False if error not in to_group: to_group.append(error) to_group.append(merror) group_desc += ' |ece2' group_fields['ID'] = 'ece2' group_fields['old desc'] = group_desc test_tree.check_consistency() return group_fields, test_tree # work out where it is going to move to # first find the longest crossing error longest_error = None for merror in crossing_errors: if longest_error is None or merror.node.span[0] < longest_error.node.span[0]: longest_error = merror end = longest_error.node.span[1] ### print "getting movers from:", ctree ### print "after:", end, ctree.span cend = end moving = [] while cend < ctree.span[1]: brac = test_tree done = False while not done: for subtree in brac.subtrees: if cend == subtree.span[0] and subtree.span[0] <= ctree.span[1]: moving.append(subtree) done = True cend = subtree.span[1] break if subtree.span[0] <= cend < subtree.span[1]: brac = subtree break ### print "Moving:" ### for mover in moving: ### print mover # then see how far up we can go to it parent = ctree while parent.span[1] == ctree.span[1]: if parent.span[0] <= longest_error.node.span[0]: break parent = parent.parent ### print parent # move the things up to this level group_fields = {} group_fields['type'] = 'attachment' group_fields['height'] = 'too low' group_fields['from parent'] = ctree.label group_fields['to parent'] = parent.label group_fields['nodes moving'] = [] group_desc = 'attachment too_low %s_instead_of_%s' % (ctree.label, parent.label) for pos in xrange(len(parent.subtrees)): if parent.subtrees[pos].span[1] == ctree.span[1]: for subtree in moving: subtree.parent.subtrees.remove(subtree) parent.subtrees.insert(pos + 1, subtree) pos += 1 subtree.parent = parent group_desc += ' ' + subtree.label group_fields['nodes moving'].append(subtree.label) break group_fields['nodes moving'] = ' '.join(group_fields['nodes moving']) # if only one thing is left behind, and its parent is extra, fix that if len(ctree.subtrees) == 1: for pos in xrange(len(ctree.parent.subtrees)): if ctree.parent.subtrees[pos] == ctree: for subtree in ctree.subtrees[::-1]: ctree.parent.subtrees.insert(pos+1, subtree) subtree.parent = ctree.parent break ctree.parent.subtrees.remove(ctree) to_group.append(error) test_tree.update_span() # if possible, fix longest_error left, right = -1, -1 for pos in xrange(len(parent.subtrees)): if longest_error.node.span[0] == parent.subtrees[pos].span[0]: left = pos if longest_error.node.span[1] == parent.subtrees[pos].span[1]: right = pos if -1 < left < right: repair_tree.repair_missing_node(longest_error, test_tree) to_group.append(longest_error) # other errors that are fixed as a side effect will be found by the cleanup stuff group_desc += ' |ece1' group_fields['ID'] = 'ece1' group_fields['old desc'] = group_desc return group_fields, test_tree
def missing_with_matching_extra(error, test_tree, to_group, to_add, left, right, parent, ungrouped): '''Missing, then if there is an equivalent extra above it, then the next chunk of sentence is attaching too low. This one attachment mistake could actually be causing a stack of errors, so we pull out the incorrectly attached bits and see what else is fixed.''' if left == 0: # our missing bracket covers nodes starting on the left end = error.node.span[1] # take the rest out, and move them up to be beneath the next layer that is # correct (not extra) to_group.append(error) eerror = bracket_errors.get_extra_error(ungrouped, parent) if eerror is None: print "Couldn't find match!" for terror in ungrouped: print terror else: to_group.append(eerror) parent.extra = False clevel = parent parent = parent.parent prev = clevel ### while parent.extra and parent.parent is not None: ### # check for crossing spans ### for oerror in ungrouped: ### if oerror.missing and oerror != error: ### if parent.parent.span[0] < oerror.node.span[0] < parent.parent.span[1]: ### break ### if parent.parent.span[0] < oerror.node.span[1] < parent.parent.span[1]: ### break ### if clevel.span[1] < parent.span[1]: ### break ### prev = parent ### parent = parent.parent # pull out the node(s) down the bottom on the right # move them up to the discovered level group_fields = {} group_fields['type'] = 'attachment' group_fields['height'] = 'too low' group_fields['from parent'] = clevel.label group_fields['from left siblings'] = '' for child in parent.subtrees: if child == prev: break group_fields['from left siblings'] += ' ' + child.label group_fields['to parent'] = parent.label group_desc = 'attachment too_low %s_instead_of_%s' % (clevel.label, parent.label) addendum = [] for pos in xrange(len(parent.subtrees)): if clevel.span[1] <= parent.subtrees[pos].span[1]: if clevel.span[1] == parent.subtrees[pos].span[1]: pos = pos + 1 while len(clevel.subtrees) > right + 1: node = clevel.subtrees.pop() parent.subtrees.insert(pos, node) node.parent = parent addendum.insert(0, node.label) break group_fields['nodes moving'] = ' '.join(addendum) group_desc += ' ' + '_'.join(addendum) test_tree.update_span() group_fields['ID'] = '|mwme1' group_desc += ' |mwme1' group_fields['old desc'] = group_desc test_tree.check_consistency() return group_fields, test_tree elif right == len(parent.subtrees) - 1: # our missing bracket is to the right # if the extra is an NP and everthing under it is a word, NP internal structure if parent.label == 'NP': if parent.parent is not None: if parent.parent.label == 'NP' and not parent.parent.extra: all_words = True for subtree in parent.subtrees: if subtree.word is None: all_words = False break if all_words: group_fields = {} group_fields['type'] = 'NP structure' eerror = bracket_errors.get_extra_error(ungrouped, parent) for merror in ungrouped: if merror.node.span[0] >= parent.span[0]: if merror.node.span[1] <= parent.span[1]: if merror.missing: to_group.append(merror) repair_tree.repair_missing_node(merror, test_tree) to_group.append(eerror) repair_tree.repair_extra_node(eerror, test_tree) test_tree.update_span() group_fields['ID'] = '|mwme2' group_fields['old desc'] = 'missing error NP structure |mwme2' return group_fields, test_tree # no other missing or extra brackets under this extra span # attachment, give info no_others = True ### print "Available:" ### for terror in ungrouped: ### print terror eerror = bracket_errors.get_extra_error(ungrouped, parent) for oerror in ungrouped: if oerror.node.span[0] >= parent.span[0]: if oerror.node.span[1] <= parent.span[1]: if oerror != error and oerror != eerror: no_others = False break if no_others: group_fields = {} group_fields['type'] = 'extra under bracket on right' group_fields['parent'] = parent.label group_fields['extra nodes'] = '' group_fields['children'] = '' for subtree in parent.subtrees: if subtree.span[0] < error.node.span[0]: group_fields['extra nodes'] += ' ' + subtree.label elif subtree.span[1] < error.node.span[1]: group_fields['children'] += ' ' + subtree.label group_fields['ID'] = '|mwme3' group_fields['old desc'] = 'extra under bracket on right |mwme3' if error is not None: to_group.append(error) repair_tree.repair_missing_node(error, test_tree) if eerror is not None: to_group.append(eerror) repair_tree.repair_extra_node(eerror, test_tree) test_tree.update_span() return group_fields, test_tree else: # our missing bracket is somewhere in the middle pass return None, test_tree
def single_word_error(ungrouped, grouped, gold, test): '''An extra/missing bracket at any depth that has a span of 1 ''' singles = {} for error in ungrouped: span = error.node.span if span[0] + 1 == span[1]: if span not in singles: singles[span] = [] singles[span].append(error) changed = False to_fix = [] for span in singles: errors = singles[span] # First check for cases where there is a matching bracket (so it is in fact # just the wrong label) if len(errors) == 2 and errors[0].extra != errors[1].extra: group = error_group.Error_Group() group.errors += errors group.fields['type'] = 'wrong label, right span' group.desc = 'single_word diff ' if errors[0].extra: group.desc += errors[0].node.label + '_' + errors[1].node.label else: group.desc += errors[1].node.label + '_' + errors[0].node.label grouped.append(group) to_fix += errors else: # this includes cases of multiple brackets (so we don't know which to # link as above), and a single bracket error for error in errors: # check to see if a matching bracket type starts here and matches type use = True for uerror in ungrouped: if uerror.node.span[0] == error.node.span[0]: if uerror.node.label == error.node.label: if uerror.missing and error.extra: use = False break if uerror.extra and error.missing: use = False break if not use: continue group = error_group.Error_Group() group.errors.append(error) group.desc = 'single_word ' if error.missing: group.desc += 'miss' else: group.desc += 'extra' group.desc += ' ' + error.node.label ### print group.desc group.fields['type'] = 'single word phrase' group.fields['old desc'] = group.desc grouped.append(group) to_fix.append(error) for error in to_fix: ungrouped.remove(error) if error.extra: repair_tree.repair_extra_node(error, test) else: repair_tree.repair_missing_node(error, test) return changed, test
def unary_error(ungrouped, grouped, gold, test): nodes, span_set = test.get_spans() gold_nodes, gold_span_set = gold.get_spans() relevant_errors = {} for error in ungrouped: span = error.node.span if span[1] - span[0] > 1 and span in span_set and span in gold_span_set: if span not in relevant_errors: relevant_errors[span] = (len(span_set[span].values()), len(gold_span_set[span].values()), []) relevant_errors[span][2].append(error) changed = False for span in relevant_errors: test_count, gold_count, errors = relevant_errors[span] missing_errors = 0 extra_errors = 0 for error in errors: if error.missing: missing_errors += 1 else: extra_errors += 1 if test_count > 0 and extra_errors == 0: # there is/are missing unary production(s) here group = error_group.Error_Group() current_labels = [] for node_set_label in span_set[span]: for node in span_set[span][node_set_label]: current_labels.append(node.label) current_labels.sort() missing_labels = [error.node.label for error in errors] missing_labels.sort() for error in errors: ungrouped.remove(error) group.errors.append(error) repair_tree.repair_missing_node(error, test) group.fields['type'] = 'unary' group.fields['subtype'] = 'missing' group.desc = 'unary miss %s over %s' % ('_'.join(missing_labels), '_'.join(current_labels)) group.fields['nodes'] = ' '.join(missing_labels) group.fields['old desc'] = group.desc grouped.append(group) ### print group.desc changed = True elif gold_count > 0 and missing_errors == 0: # there is/are extra unary production(s) here group = error_group.Error_Group() current_labels = [] for node_set_label in span_set[span]: for node in span_set[span][node_set_label]: if not node.extra: current_labels.append(node.label) current_labels.sort() extra_labels = [error.node.label for error in errors] extra_labels.sort() # only use it if there isn't a matching missing error directly above skip = False if len(extra_labels) == 1: error = errors[0] for merror in ungrouped: if merror.node.label == extra_labels[0]: if merror.node.span[0] == error.node.span[0]: if error.node.parent.span[1] >= merror.node.span[1]: skip = True break elif merror.node.span[1] == error.node.span[1]: if error.node.parent.span[0] <= merror.node.span[0]: skip = True break if not skip: for error in errors: ungrouped.remove(error) group.errors.append(error) repair_tree.repair_extra_node(error, test) group.fields['type'] = 'unary' group.fields['subtype'] = 'extra' group.fields['nodes'] = ' '.join(extra_labels) group.desc = 'unary extra %s over %s' % ('_'.join(extra_labels), '_'.join(current_labels)) group.fields['old desc'] = group.desc grouped.append(group) changed = True elif missing_errors == 1 and extra_errors == 1: # We have a mislabelled node extra = relevant_errors[span][2][0] missing = relevant_errors[span][2][1] if not extra.extra: extra = relevant_errors[span][2][1] missing = relevant_errors[span][2][0] group = error_group.Error_Group() group.fields['type'] = 'wrong label, right span' if test_count == 1 and gold_count == 1: group.desc = 'diff %s should_be %s' % (extra.node.label, missing.node.label) ### print 'wrong label, right span %s should be %s' % (extra.node.label, missing.node.label) else: group.desc = 'unary diff %s should_be %s' % (extra.node.label, missing.node.label) group.fields['old desc'] = group.desc group.errors.append(extra) ungrouped.remove(extra) group.errors.append(missing) ungrouped.remove(missing) repair_tree.repair_extra_missing_pair(missing, extra, test) grouped.append(group) changed = True else: # Most of the other cases are either just an incorrect node labelling, or less clear # TODO: One case to consider is when there is a correct node with all # the missing nodes above and all the extra nodes below (or vice versa) pass return changed, test