def compare(gold_text, test_text, out_dict, error_counts, classify):
	""" Compares two trees in text form.
	This checks for empty trees and mismatched numbers
	of words.
	"""
	gold_text = gold_text.strip()
	test_text = test_text.strip()
	if len(gold_text) == 0:
		print >> out_dict['out'], "No gold tree"
		print >> out_dict['err'], "No gold tree"
		return
	elif len(test_text) == 0:
		print >> out_dict['out'], "Not parsed"
		print >> out_dict['err'], "Not parsed"
		return
	gold_tree = read_tree(gold_text, out_dict, 'gold')
	test_tree = read_tree(test_text, out_dict, 'test')
	if gold_tree is None or test_tree is None:
		print >> out_dict['out'], "Not parsed, but had output"
		print >> out_dict['err'], "Not parsed, but had output"
		print >> out_dict['init_errors'], "Not parsed, but had output"
		return
	print >> out_dict['init_errors'], render_tree.text_coloured_errors(test_tree, gold_tree).strip()

	gold_words = gold_tree.word_yield()
	test_words = test_tree.word_yield()
	if len(test_words.split()) != len(gold_words.split()):
		for out in [out_dict['out'], out_dict['err']]:
			print >> out, "Sentence lengths do not match..."
			print >> out, "Gold: " + gold_words
			print >> out, "Test: " + test_words
		return

	compare_trees(gold_tree, test_tree, out_dict, error_counts, classify)
def compare(gold_text, test_text, out_dict, error_counts, classify):
    """ Compares two trees in text form.
	This checks for empty trees and mismatched numbers
	of words.
	"""
    gold_text = gold_text.strip()
    test_text = test_text.strip()
    if len(gold_text) == 0:
        print >> out_dict['out'], "No gold tree"
        print >> out_dict['err'], "No gold tree"
        return
    elif len(test_text) == 0:
        print >> out_dict['out'], "Not parsed"
        print >> out_dict['err'], "Not parsed"
        return
    gold_tree = read_tree(gold_text, out_dict, 'gold')
    test_tree = read_tree(test_text, out_dict, 'test')
    if gold_tree is None or test_tree is None:
        print >> out_dict['out'], "Not parsed, but had output"
        print >> out_dict['err'], "Not parsed, but had output"
        print >> out_dict['init_errors'], "Not parsed, but had output"
        return
    print >> out_dict['init_errors'], render_tree.text_coloured_errors(
        test_tree, gold_tree).strip()

    gold_words = gold_tree.word_yield()
    test_words = test_tree.word_yield()
    if len(test_words.split()) != len(gold_words.split()):
        for out in [out_dict['out'], out_dict['err']]:
            print >> out, "Sentence lengths do not match..."
            print >> out, "Gold: " + gold_words
            print >> out, "Test: " + test_words
        return

    compare_trees(gold_tree, test_tree, out_dict, error_counts, classify)
def compare_trees(gold_tree, test_tree, out_dict, error_counts, classify):
    """ Compares two trees. """
    init_errors = parse_errors.get_errors(test_tree, gold_tree)
    error_count = len(init_errors)
    print >> out_dict['out'], "{} Initial errors".format(error_count)
    iters, path = greedy_search(gold_tree, test_tree, classify)
    print >> out_dict['out'], "{} on fringe, {} iterations".format(*iters)
    if path is not None:
        print >> out_dict['test_trees'], test_tree
        print >> out_dict['gold_trees'], gold_tree
        for tree in path[1:]:
            print >> out_dict['out'], "{} Error:{}".format(
                str(tree[2]), tree[1]['classified_type'])

        if len(path) > 1:
            for tree in path:
                print >> out_dict['out'], "Step:{}".format(
                    tree[1]['classified_type'])
                error_counts[tree[1]['classified_type']].append(tree[2])
                print >> out_dict['out'], tree[1]
                print >> out_dict['out'], render_tree.text_coloured_errors(
                    tree[0], gold=gold_tree).strip()
    else:
        print >> out_dict['out'], "no path found"
    print >> out_dict['err'], ""
    print >> out_dict['out'], ""
def compare_trees(gold_tree, test_tree, out_dict, error_counts, classify):
	""" Compares two trees. """
	init_errors = parse_errors.get_errors(test_tree, gold_tree)
	error_count = len(init_errors)
	print >> out_dict['out'], "{} Initial errors".format(error_count)
	iters, path = greedy_search(gold_tree, test_tree, classify)
	print >> out_dict['out'], "{} on fringe, {} iterations".format(*iters)
	if path is not None:
		print >> out_dict['test_trees'], test_tree
		print >> out_dict['gold_trees'], gold_tree
		for tree in path[1:]:
			print >> out_dict['out'], "{} Error:{}".format(str(tree[2]),tree[1]['classified_type'])

		if len(path) > 1:
			for tree in path:
				print >> out_dict['out'], "Step:{}".format(tree[1]['classified_type'])
				error_counts[tree[1]['classified_type']].append(tree[2])
				print >> out_dict['out'], tree[1]
				print >> out_dict['out'], render_tree.text_coloured_errors(tree[0], gold=gold_tree).strip()
	else:
		print >> out_dict['out'], "no path found"
	print >> out_dict['err'], ""
	print >> out_dict['out'], ""
示例#5
0
        if test_tree is None:
            mprint("Empty test tree", out, 'all')
            mprint(test_complete_tree.__repr__(), out, 'all')
            mprint(test_tree.__repr__(), out, 'all')
            continue

        gold_words = gold_tree.word_yield()
        test_words = test_tree.word_yield()
        if len(test_words.split()) != len(gold_words.split()):
            mprint("Sentence lengths do not match...", out, 'all')
            mprint("Gold: " + gold_words.__repr__(), out, 'all')
            mprint("Test: " + test_words.__repr__(), out, 'all')

        mprint("After applying collins rules:", out, 'out')
        mprint(
            render_tree.text_coloured_errors(test_tree, gold_tree).strip(),
            out, 'out')
        match, gold, test, crossing, POS = parse_errors.counts_for_prf(
            test_tree, gold_tree)
        stats['out'][0] += match
        stats['out'][1] += gold
        stats['out'][2] += test
        p, r, f = nlp_eval.calc_prf(match, gold, test)
        mprint("Eval: %.2f  %.2f  %.2f" % (p * 100, r * 100, f * 100), out,
               'out')

        # Work out the minimal span to show all errors
        gold_spans = set([(node.label, node.span[0], node.span[1])
                          for node in gold_tree.get_nodes()])
        test_spans = set([(node.label, node.span[0], node.span[1])
                          for node in test_tree.get_nodes()])
		test_tree = treebanks.apply_collins_rules(test_complete_tree, False)
		if test_tree is None:
			mprint("Empty test tree", out, 'all')
			mprint(test_complete_tree.__repr__(), out, 'all')
			mprint(test_tree.__repr__(), out, 'all')
			continue

		gold_words = gold_tree.word_yield()
		test_words = test_tree.word_yield()
		if len(test_words.split()) != len(gold_words.split()):
			mprint("Sentence lengths do not match...", out, 'all')
			mprint("Gold: " + gold_words.__repr__(), out, 'all')
			mprint("Test: " + test_words.__repr__(), out, 'all')

		mprint("After applying collins rules:", out, 'out')
		mprint(render_tree.text_coloured_errors(test_tree, gold_tree).strip(), out, 'out')
		match, gold, test, crossing, POS = parse_errors.counts_for_prf(test_tree, gold_tree)
		stats['out'][0] += match
		stats['out'][1] += gold
		stats['out'][2] += test
		p, r, f = nlp_eval.calc_prf(match, gold, test)
		mprint("Eval: %.2f  %.2f  %.2f" % (p*100, r*100, f*100), out, 'out')

		# Work out the minimal span to show all errors
		gold_spans = set([(node.label, node.span[0], node.span[1]) for node in gold_tree.get_nodes()])
		test_spans = set([(node.label, node.span[0], node.span[1]) for node in test_tree.get_nodes()])
		diff = gold_spans.symmetric_difference(test_spans)
		width = [1e5, -1]
		for span in diff:
			if span[2] - span[1] == 1:
				continue
示例#7
0
		test_tree = treebanks.apply_collins_rules(test_complete_tree, False)
		if test_tree is None:
			mprint("Empty test tree", out, 'all')
			mprint(test_complete_tree.__repr__(), out, 'all')
			mprint(test_tree.__repr__(), out, 'all')
			continue

		gold_words = gold_tree.word_yield()
		test_words = test_tree.word_yield()
		if len(test_words.split()) != len(gold_words.split()):
			mprint("Sentence lengths do not match...", out, 'all')
			mprint("Gold: " + gold_words.__repr__(), out, 'all')
			mprint("Test: " + test_words.__repr__(), out, 'all')

		mprint("After applying collins rules:", out, 'out')
		mprint(render_tree.text_coloured_errors(test_tree, gold_tree).strip(), out, 'out')
		match, gold, test, crossing, POS = parse_errors.counts_for_prf(test_tree, gold_tree)
		stats['out'][0] += match
		stats['out'][1] += gold
		stats['out'][2] += test
		p, r, f = nlp_eval.calc_prf(match, gold, test)
		mprint("Eval: %.2f  %.2f  %.2f" % (p*100, r*100, f*100), out, 'out')

		# Work out the minimal span to show all errors
		gold_spans = set([(node.label, node.span[0], node.span[1]) for node in gold_tree.get_nodes()])
		test_spans = set([(node.label, node.span[0], node.span[1]) for node in test_tree.get_nodes()])
		diff = gold_spans.symmetric_difference(test_spans)
		width = [1e5, -1]
		for span in diff:
			if span[2] - span[1] == 1:
				continue