Exemplo n.º 1
0
def extra_matching_crossing_miss(error, test_tree, shortest_error, ungrouped, to_group):
	if shortest_error.node.span[1] == error.node.span[1]:
		moving = []
		mspan = shortest_error.node.span
		cend = error.node.span[0]
		while cend > mspan[0]:
			brac = test_tree
			done = False
			while not done:
				for subtree in brac.subtrees:
					if cend == subtree.span[1] and subtree.span[0] >= mspan[0]:
						moving.append(subtree)
						done = True
						cend = subtree.span[0]
						break
					if subtree.span[0] < cend <= subtree.span[1]:
						brac = subtree
						break
		# move them across
		group_fields = {}
		group_fields['type'] = 'attachment'
		group_fields['height'] = 'incorrect'
		group_fields['from parents'] = ''
###		print
###		print "Moving"
		for node in moving:
			group_fields['from parents'] += ' ' + node.parent.label
###			print node
		addendum = []
		target = bracket_errors.get_extra_tree(error, test_tree)
		target.extra = False
###		print "To:", target
###		print 'error is:', error
		group_desc = 'attachment incorrect %s_instead_of_%s' % (moving[0].parent.label, target.label)
		group_fields['to parent'] = target.label
		single_child_parents = []
		for node in moving:
			parent = node.parent
			node.parent.subtrees.remove(node)
			# if the parent now has only one child, look into whether it should be deleted
			if len(parent.subtrees) == 1:
				if parent.label == parent.subtrees[0].label:
					single_child_parents.append(parent)
			target.subtrees.insert(0, node)
			node.parent = target
			addendum.append(node.label)
		group_fields['nodes moving'] = ' '.join(addendum)
		group_desc += ' ' + '_'.join(addendum)
		test_tree.update_span()

		for parent in single_child_parents:
			if len(parent.subtrees) == 1:
				if parent.subtrees[0].extra and parent.label == parent.subtrees[0].label:
					eerror = bracket_errors.get_extra_error(ungrouped, parent.subtrees[0])
					if eerror is not None:
						repair_tree.repair_extra_node(eerror, test_tree)
						to_group.append(eerror)

		to_group.append(error)
		to_group.append(shortest_error)
		group_desc += ' |emcm1'
		group_fields['ID'] = 'emcm1'
		group_fields['old desc'] = group_desc
		test_tree.check_consistency()
		return group_fields, test_tree
	return None, test_tree
Exemplo n.º 2
0
def extra_multicrossing_starting(error, test_tree, to_group, starting, ungrouped, ctree):
	'''Extra, then if there are crossing brackets that start here, and no
	crossing bracket that ends at the same spot, the other thing under this
	bracket has something that should have attached to it, but attached too high.
	Consider what would happen if it had attached here and see what other errors
	it fixes (ie this extra may now match with a missing bracket above)'''
	
###	print error
###	print ctree

	# find the longest crossing missing bracket that starts here
	start = starting.keys()[0]
	cend = ctree.span[1]
	crossing_errors = starting[start]
	longest_error = None
	for merror in crossing_errors:
		if longest_error is None or longest_error.node.span[1] < merror.node.span[1]:
			longest_error = merror
	mspan = (cend, longest_error.node.span[1])
###	print mspan

	# find the set of missing brackets that end where that one ends
###	print "Related missing:"
	related_missing = []
	for merror in ungrouped:
		if merror.missing:
			if merror.node.span[1] == longest_error.node.span[1]:
				related_missing.append((merror.node.span, merror))
###				print merror
	related_missing.sort()
	
	# find the set of extra brackets that end where this one ends
###	print "Related extra:"
	related_extra = []
	for eerror in ungrouped:
		if eerror.extra:
			current_node = bracket_errors.get_extra_tree(eerror, test_tree)
			if current_node.span[1] == ctree.span[1]:
				related_extra.append((current_node.span, eerror))
###				print current_node
###				print eerror
	related_extra.sort()
	
	# find the lowest pairing
	lowest = None
	for pair in related_extra:
		for mpair in related_missing:
			if mpair[1].node.label == pair[1].node.label:
				if mpair[1].node.span[0] == pair[1].node.span[0]:
					lowest = pair[1]
					break
	if lowest is None:
		return None, test_tree
###	print lowest

	# find all the parts that start in the missing bracket to be here
	moving = []
	while cend < mspan[1]:
		brac = test_tree
		done = False
		while not done:
			for subtree in brac.subtrees:
				if cend == subtree.span[0]:
					moving.append(subtree)
					done = True
					cend = subtree.span[1]
					break
				if subtree.span[0] < cend < subtree.span[1]:
					brac = subtree
					break
	# move them across
	group_fields = {}
	group_fields['type'] = 'attachment'
	group_fields['height'] = 'too high'
	group_fields['from parent'] = moving[0].parent.label
	addendum = []
	target = bracket_errors.get_extra_tree(lowest, test_tree)
	group_desc = 'attachment too_high %s_instead_of_%s' % (moving[0].parent.label, target.label)
	group_fields['to parent'] = target.label
	single_child_parents = []
	for node in moving:
		parent = node.parent
		parent.subtrees.remove(node)
		# if the parent now has only one child, look into whether it should be deleted
		if len(parent.subtrees) == 1:
			if parent.label == parent.subtrees[0].label:
				single_child_parents.append(parent)
		target.subtrees.append(node)
		node.parent = target
		addendum.append(node.label)
	group_fields['nodes moving'] = ' '.join(addendum)
	group_desc += ' ' + '_'.join(addendum)
	test_tree.update_span()

	for parent in single_child_parents:
		if len(parent.subtrees) == 1:
			if parent.subtrees[0].extra and parent.label == parent.subtrees[0].label:
				eerror = bracket_errors.get_extra_error(ungrouped, parent.subtrees[0])
				repair_tree.repair_extra_node(eerror, test_tree)
				to_group.append(eerror)

	# attempt to repair the longest crossing error
	if target == ctree:
		if repair_tree.repair_missing_node(longest_error, test_tree, failure_expected=True):
			to_group.append(longest_error)
	group_desc += ' |emcs1'
	group_fields['ID'] = 'emcs1'
	group_fields['old desc'] = group_desc
	return group_fields, test_tree
Exemplo n.º 3
0
def extra_crossing_ending(error, test_tree, to_group, ending, ungrouped, ctree):
	'''Extra, then if there is a crossing bracket that ends in the middle of
	here, the other thing under this bracket is attaching too low.  This could
	explain a bunch of other errors.  In particular, consider if the wrongly
	attached thing was collapsed to 0, what would that fix (note that the extra
	bracket may still be extra at this point, or may now be equivalent to a
	msising bracket).'''
###	print error
	# work out what needs to move
	end = ending.keys()[0]
	crossing_errors = ending[end]
###	for cerror in crossing_errors:
###		print cerror

	# Check the case of a matching missing bracket
###	print error
###	print ending
	if len(ending[end]) == 1:
		for merror in ungrouped:
			if merror.missing and merror.node.label == error.node.label:
				if merror.node.span[1] == error.node.span[1]:
					if ending[end][0].node.span[0] == merror.node.span[0]:
						# the other things should be moving under here!
###						print merror
###						print error
						moving = []
						target = bracket_errors.get_extra_tree(error, test_tree)
						mspan = merror.node.span
						cend = target.span[0]
						while cend > mspan[0]:
							brac = test_tree
							done = False
							while not done:
								for subtree in brac.subtrees:
									if cend == subtree.span[1] and subtree.span[0] >= mspan[0]:
										moving.append(subtree)
										done = True
										cend = subtree.span[0]
										break
									if subtree.span[0] < cend <= subtree.span[1]:
										brac = subtree
										break
###						print "Moving"
###						for node in moving:
###							print node
###						print "To:"
###						print target
						# move them across
						group_fields = {}
						group_fields['type'] = 'attachment'
						group_fields['height'] = 'incorrect'
						group_fields['from parents'] = ''
						for node in moving:
							group_fields['from parents'] += ' ' + node.parent.label
						addendum = []
						group_desc = 'attachment incorrect %s_instead_of_%s' % (moving[0].parent.label, target.label)
						group_fields['to parent'] = target.label
						single_child_parents = []
						for node in moving:
							parent = node.parent
							parent.subtrees.remove(node)
							# if the parent now has only one child, look into whether it should be deleted
							if len(parent.subtrees) == 1:
								if parent.label == parent.subtrees[0].label:
									single_child_parents.append(parent)
							target.subtrees.insert(0, node)
							node.parent = target
							addendum.insert(0, node.label)
						group_fields['nodes moving'] = ' '.join(addendum)
						group_desc += ' ' + '_'.join(addendum)
						test_tree.update_span()

						for parent in single_child_parents:
							if len(parent.subtrees) == 1:
								if parent.subtrees[0].extra and parent.label == parent.subtrees[0].label:
									eerror = bracket_errors.get_extra_error(ungrouped, parent.subtrees[0])
									repair_tree.repair_extra_node(eerror, test_tree)
									to_group.append(eerror)

						target.extra = False
						if error not in to_group:
							to_group.append(error)
						to_group.append(merror)
						group_desc += ' |ece2'
						group_fields['ID'] = 'ece2'
						group_fields['old desc'] = group_desc
						test_tree.check_consistency()
						return group_fields, test_tree

	# work out where it is going to move to
	# first find the longest crossing error
	longest_error = None
	for merror in crossing_errors:
		if longest_error is None or merror.node.span[0] < longest_error.node.span[0]:
			longest_error = merror
	end = longest_error.node.span[1]
###	print "getting movers from:", ctree
###	print "after:", end, ctree.span
	cend = end
	moving = []
	while cend < ctree.span[1]:
		brac = test_tree
		done = False
		while not done:
			for subtree in brac.subtrees:
				if cend == subtree.span[0] and subtree.span[0] <= ctree.span[1]:
					moving.append(subtree)
					done = True
					cend = subtree.span[1]
					break
				if subtree.span[0] <= cend < subtree.span[1]:
					brac = subtree
					break
###	print "Moving:"
###	for mover in moving:
###		print mover
	# then see how far up we can go to it
	parent = ctree
	while parent.span[1] == ctree.span[1]:
		if parent.span[0] <= longest_error.node.span[0]:
			break
		parent = parent.parent

###	print parent
	# move the things up to this level
	group_fields = {}
	group_fields['type'] = 'attachment'
	group_fields['height'] = 'too low'
	group_fields['from parent'] = ctree.label
	group_fields['to parent'] = parent.label
	group_fields['nodes moving'] = []
	group_desc = 'attachment too_low %s_instead_of_%s' % (ctree.label, parent.label)
	for pos in xrange(len(parent.subtrees)):
		if parent.subtrees[pos].span[1] == ctree.span[1]:
			for subtree in moving:
				subtree.parent.subtrees.remove(subtree)
				parent.subtrees.insert(pos + 1, subtree)
				pos += 1
				subtree.parent = parent
				group_desc += ' ' + subtree.label
				group_fields['nodes moving'].append(subtree.label)
			break
	group_fields['nodes moving'] = ' '.join(group_fields['nodes moving'])

	# if only one thing is left behind, and its parent is extra, fix that
	if len(ctree.subtrees) == 1:
		for pos in xrange(len(ctree.parent.subtrees)):
			if ctree.parent.subtrees[pos] == ctree:
				for subtree in ctree.subtrees[::-1]:
					ctree.parent.subtrees.insert(pos+1, subtree)
					subtree.parent = ctree.parent
				break
		ctree.parent.subtrees.remove(ctree)
		to_group.append(error)
	test_tree.update_span()

	# if possible, fix longest_error
	left, right = -1, -1
	for pos in xrange(len(parent.subtrees)):
		if longest_error.node.span[0] == parent.subtrees[pos].span[0]:
			left = pos
		if longest_error.node.span[1] == parent.subtrees[pos].span[1]:
			right = pos
	if -1 < left < right:
		repair_tree.repair_missing_node(longest_error, test_tree)
		to_group.append(longest_error)

	# other errors that are fixed as a side effect will be found by the cleanup stuff

	group_desc += ' |ece1'
	group_fields['ID'] = 'ece1'
	group_fields['old desc'] = group_desc
	return group_fields, test_tree
Exemplo n.º 4
0
def extra_crossing_starting(error, test_tree, to_group, starting, ungrouped, ctree):
	'''Extra, then if there is a crossing bracket that starts here, and no
	crossing bracket that ends at the same spot, the other thing under this
	bracket has something that should have attached to it, but attached too high.
	Consider what would happen if it had attached here and see what other errors
	it fixes (ie this extra may now match with a missing bracket above)'''
	
	# find the longest crossing missing bracket that starts here
	start = starting.keys()[0]
	cend = ctree.span[1]
	crossing_errors = starting[start]
	longest_error = None
	text = error.node.word_yield()
	for merror in crossing_errors:
		if longest_error is None or longest_error.node.span[1] < merror.node.span[1]:
			longest_error = merror
	mspan = (cend, longest_error.node.span[1])

	# find all the parts that start in the missing bracket to be here
	moving = []
	while cend < mspan[1]:
		brac = test_tree
		done = False
		while not done:
			for subtree in brac.subtrees:
				if cend == subtree.span[0]:
					moving.append(subtree)
					done = True
					cend = subtree.span[1]
					break
				if subtree.span[0] < cend < subtree.span[1]:
					brac = subtree
					break
	# move them across
	group_fields = {}
	group_fields['type'] = 'attachment'
	group_fields['height'] = 'too high'
	group_fields['from parent'] = moving[0].parent.label
	group_fields['to parent'] = longest_error.node.label
	group_desc = 'attachment too_high %s_instead_of_%s' % (moving[0].parent.label, longest_error.node.label)
	addendum = []
	target = ctree
	if ctree.subtrees[-1].extra:
		if ctree.subtrees[-1].label == longest_error.node.label:
			if ctree.subtrees[-1].span[0] == longest_error.node.span[0]:
				target = ctree.subtrees[-1]
	single_child_parents = []
	for node in moving:
		parent = node.parent
		parent.subtrees.remove(node)
		# if the parent now has only one child, look into whether it should be deleted
		if len(parent.subtrees) == 1:
			if parent.label == parent.subtrees[0].label:
				single_child_parents.append(parent)
		target.subtrees.append(node)
		node.parent = target
		addendum.append(node.label)
	group_desc += ' ' + '_'.join(addendum)
	group_fields['nodes moving'] = ' '.join(addendum)
	test_tree.update_span()

	for parent in single_child_parents:
		if len(parent.subtrees) == 1:
			if parent.subtrees[0].extra and parent.label == parent.subtrees[0].label:
				eerror = bracket_errors.get_extra_error(ungrouped, parent.subtrees[0])
				repair_tree.repair_extra_node(eerror, test_tree)
				to_group.append(eerror)

	# attempt to repair the longest crossing error
	if target == ctree:
		if repair_tree.repair_missing_node(longest_error, test_tree, failure_expected=True):
			to_group.append(longest_error)
	if error not in to_group:
		to_group.append(error)
	target.extra = False
	group_desc += ' ' + text + ' |ecs1'
	group_fields['ID'] = 'ecs1'
	group_fields['old desc'] = group_desc
	return group_fields, test_tree
Exemplo n.º 5
0
def missing_with_matching_extra(error, test_tree, to_group, to_add, left, right, parent, ungrouped):
	'''Missing, then if there is an equivalent extra above it, then the next
	chunk of sentence is attaching too low.  This one attachment mistake could
	actually be causing a stack of errors, so we pull out the incorrectly
	attached bits and see what else is fixed.'''
	if left == 0:
		# our missing bracket covers nodes starting on the left
		end = error.node.span[1]

		# take the rest out, and move them up to be beneath the next layer that is
		# correct (not extra)
		to_group.append(error)
		eerror = bracket_errors.get_extra_error(ungrouped, parent)
		if eerror is None:
			print "Couldn't find match!"
			for terror in ungrouped:
				print terror
		else:
			to_group.append(eerror)
		parent.extra = False
		clevel = parent
		parent = parent.parent
		prev = clevel
###		while parent.extra and parent.parent is not None:
###			# check for crossing spans
###			for oerror in ungrouped:
###				if oerror.missing and oerror != error:
###					if parent.parent.span[0] < oerror.node.span[0] < parent.parent.span[1]:
###						break
###					if parent.parent.span[0] < oerror.node.span[1] < parent.parent.span[1]:
###						break
###			if clevel.span[1] < parent.span[1]:
###				break
###			prev = parent
###			parent = parent.parent

		# pull out the node(s) down the bottom on the right
		# move them up to the discovered level
		group_fields = {}
		group_fields['type'] = 'attachment'
		group_fields['height'] = 'too low'
		group_fields['from parent'] = clevel.label
		group_fields['from left siblings'] = ''
		for child in parent.subtrees:
			if child == prev:
				break
			group_fields['from left siblings'] += ' ' + child.label
		group_fields['to parent'] = parent.label
		group_desc = 'attachment too_low %s_instead_of_%s' % (clevel.label, parent.label)
		addendum = []
		for pos in xrange(len(parent.subtrees)):
			if clevel.span[1] <= parent.subtrees[pos].span[1]:
				if clevel.span[1] == parent.subtrees[pos].span[1]:
					pos = pos + 1
				while len(clevel.subtrees) > right + 1:
					node = clevel.subtrees.pop()
					parent.subtrees.insert(pos, node)
					node.parent = parent
					addendum.insert(0, node.label)
				break
		group_fields['nodes moving'] = ' '.join(addendum)
		group_desc += ' ' + '_'.join(addendum)
		test_tree.update_span()
		group_fields['ID'] = '|mwme1'
		group_desc += ' |mwme1'
		group_fields['old desc'] = group_desc
		test_tree.check_consistency()
		return group_fields, test_tree
	elif right == len(parent.subtrees) - 1:
		# our missing bracket is to the right

		# if the extra is an NP and everthing under it is a word, NP internal structure
		if parent.label == 'NP':
			if parent.parent is not None:
				if  parent.parent.label == 'NP' and not parent.parent.extra:
					all_words = True
					for subtree in parent.subtrees:
						if subtree.word is None:
							all_words = False
							break
					if all_words:
						group_fields = {}
						group_fields['type'] = 'NP structure'
						eerror = bracket_errors.get_extra_error(ungrouped, parent)
						for merror in ungrouped:
							if merror.node.span[0] >= parent.span[0]:
								if merror.node.span[1] <= parent.span[1]:
									if merror.missing:
										to_group.append(merror)
										repair_tree.repair_missing_node(merror, test_tree)
						to_group.append(eerror)
						repair_tree.repair_extra_node(eerror, test_tree)
						test_tree.update_span()
						group_fields['ID'] = '|mwme2'
						group_fields['old desc'] = 'missing error NP structure |mwme2'
						return group_fields, test_tree

		# no other missing or extra brackets under this extra span
		# attachment, give info
		no_others = True
###		print "Available:"
###		for terror in ungrouped:
###			print terror
		eerror = bracket_errors.get_extra_error(ungrouped, parent)
		for oerror in ungrouped:
			if oerror.node.span[0] >= parent.span[0]:
				if oerror.node.span[1] <= parent.span[1]:
					if oerror != error and oerror != eerror:
						no_others = False
						break
		if no_others:
			group_fields = {}
			group_fields['type'] = 'extra under bracket on right'
			group_fields['parent'] = parent.label
			group_fields['extra nodes'] = ''
			group_fields['children'] = ''
			for subtree in parent.subtrees:
				if subtree.span[0] < error.node.span[0]:
					group_fields['extra nodes'] += ' ' + subtree.label
				elif subtree.span[1] < error.node.span[1]:
					group_fields['children'] += ' ' + subtree.label
			group_fields['ID'] = '|mwme3'
			group_fields['old desc'] = 'extra under bracket on right |mwme3'
			if error is not None:
				to_group.append(error)
				repair_tree.repair_missing_node(error, test_tree)
			if eerror is not None:
				to_group.append(eerror)
				repair_tree.repair_extra_node(eerror, test_tree)
			test_tree.update_span()
			return group_fields, test_tree
	else:
		# our missing bracket is somewhere in the middle
		pass
	return None, test_tree