示例#1
0
def detect_error_types(error_set, gold_tree, test_tree):
    init_error_count = len(error_set['miss']) + len(error_set['extra'])
    ungrouped = []
    for etype in error_set:
        for error in error_set[etype]:
            ungrouped.append(error)
    bracket_errors.sort_by_depth(ungrouped)
    init_ungrouped_length = len(ungrouped)
    assert init_ungrouped_length == init_error_count

    grouped = []
    mutable_test = test_tree.copy()

    # iterate through the errors until there is no change after an iteration
    # Note - order of these is intentional
    aggregators = [
        s_unary.unary_error,
        s_single_word.single_word_error,
        s_attachment.attachment_error,
    ]
    changed = True
    while changed:
        changed = False
        ###		print mutable_test.colour_repr()
        ###		for error in ungrouped:
        ###			print error
        ###		print
        for func in aggregators:
            plen = len(ungrouped), len(grouped)
            tchanged, mutable_test = func(ungrouped, grouped, gold_tree,
                                          mutable_test)
            if tchanged:
                mutable_test = check_for_matching_errors(
                    ungrouped, grouped[-1], gold_tree, mutable_test)
                changed = True

    remaining_errors = bracket_errors.get_errors(gold_tree, mutable_test)
    return grouped, mutable_test, remaining_errors, ungrouped
def detect_error_types(error_set, gold_tree, test_tree):
	init_error_count = len(error_set['miss']) + len(error_set['extra'])
	ungrouped = []
	for etype in error_set:
		for error in error_set[etype]:
			ungrouped.append(error)
	bracket_errors.sort_by_depth(ungrouped)
	init_ungrouped_length = len(ungrouped)
	assert init_ungrouped_length == init_error_count

	grouped = []
	mutable_test = test_tree.copy()

	# iterate through the errors until there is no change after an iteration
	# Note - order of these is intentional
	aggregators = [
		s_unary.unary_error,
		s_single_word.single_word_error,
		s_attachment.attachment_error,
	]
	changed = True
	while changed:
		changed = False
###		print mutable_test.colour_repr()
###		for error in ungrouped:
###			print error
###		print
		for func in aggregators:
			plen = len(ungrouped), len(grouped)
			tchanged, mutable_test = func(ungrouped, grouped, gold_tree, mutable_test)
			if tchanged:
				mutable_test = check_for_matching_errors(ungrouped, grouped[-1], gold_tree, mutable_test)
				changed = True

	remaining_errors = bracket_errors.get_errors(gold_tree, mutable_test)
	return grouped, mutable_test, remaining_errors, ungrouped
示例#3
0
            simple_tree = ptb.apply_collins_rules(tree)
            if VERBOSE:
                print simple_tree
            test_tree = error_tree.Error_Tree()
            test_tree.set_by_ptb(simple_tree, tree)
            if VERBOSE:
                print test_tree

            gold_words = gold_tree.word_yield()
            test_words = test_tree.word_yield()
            if len(test_words.split()) != len(gold_words.split()):
                print "Sentence lengths do not maych..."
                print "Gold:", gold_words
                print "Test:", test_words

            error_set = bracket_errors.get_errors(gold_tree, test_tree)[0]
            missing = bracket_errors.get_missing_errors(error_set, test_tree)
            print test_tree.colour_repr(missing=missing).strip()
            if len(error_set['miss']) > 0 or len(error_set['extra']) > 0:
                print 'initial errors:', len(error_set['miss']), len(
                    error_set['extra'])
                aggregated_errors = detect_error_types(error_set, gold_tree,
                                                       test_tree)
                for group in aggregated_errors[0]:
                    group.determine_type()
                    print 'Class:', group.classification
                    print 'Fixes:',
                    for error in group.errors:
                        print error
                    error_groups.append(group)
                error_set = bracket_errors.get_errors(gold_tree,
			simple_tree = ptb.apply_collins_rules(tree)
			if VERBOSE:
				print simple_tree
			test_tree = error_tree.Error_Tree()
			test_tree.set_by_ptb(simple_tree, tree)
			if VERBOSE:
				print test_tree

			gold_words = gold_tree.word_yield()
			test_words = test_tree.word_yield()
			if len(test_words.split()) != len(gold_words.split()):
				print "Sentence lengths do not maych..."
				print "Gold:", gold_words
				print "Test:", test_words

			error_set = bracket_errors.get_errors(gold_tree, test_tree)[0]
			missing = bracket_errors.get_missing_errors(error_set, test_tree)
			print test_tree.colour_repr(missing=missing).strip()
			if len(error_set['miss']) > 0 or len(error_set['extra']) > 0:
				print 'initial errors:', len(error_set['miss']), len(error_set['extra'])
				aggregated_errors = detect_error_types(error_set, gold_tree, test_tree)
				for group in aggregated_errors[0]:
					group.determine_type()
					print 'Class:', group.classification
					print 'Fixes:',
					for error in group.errors:
						print error
					error_groups.append(group)
				error_set = bracket_errors.get_errors(gold_tree, aggregated_errors[1])[0]
				missing = bracket_errors.get_missing_errors(error_set, aggregated_errors[1])
				print 'remaining errors:', len(error_set['miss']), len(error_set['extra'])