Пример #1
0
            mprint("End of test input", out, 'err')
            break

        mprint("Sentence %d:" % sent_no, out, 'all')

        gold_text = gold_text.strip()
        test_text = test_text.strip()
        if len(gold_text) == 0:
            mprint("No gold tree", out, 'all')
            continue
        elif len(test_text) == 0:
            mprint("Not parsed", out, 'all')
            continue

        gold_complete_tree = pstree.tree_from_text(gold_text)
        treebanks.ptb_cleaning(gold_complete_tree)
        gold_tree = treebanks.apply_collins_rules(gold_complete_tree, False)
        if gold_tree is None:
            mprint("Empty gold tree", out, 'all')
            mprint(gold_complete_tree.__repr__(), out, 'all')
            mprint(gold_tree.__repr__(), out, 'all')
            continue

        if '()' in test_text:
            mprint("() test tree", out, 'all')
            continue
        test_complete_tree = pstree.tree_from_text(test_text)
        treebanks.ptb_cleaning(test_complete_tree)
        test_tree = treebanks.apply_collins_rules(test_complete_tree, False)
        if test_tree is None:
            mprint("Empty test tree", out, 'all')
		mprint("Sentence %d:" % sent_no, out, 'all')

		gold_text = gold_text.strip()
		test_text = test_text.strip()
		if len(gold_text) == 0:
			mprint("No gold tree", out, 'all')
			continue
		elif len(test_text) == 0:
			mprint("Not parsed", out, 'all')
			continue

		gold_complete_tree = pstree.tree_from_text(gold_text)
		tree = treebanks.homogenise_tree(gold_complete_tree, False)
		treebanks.remove_traces(tree)
		treebanks.remove_function_tags(tree)
		treebanks.ptb_cleaning(tree)
		treebanks.remove_trivial_unaries(tree)
		gold_tree = tree
		if gold_tree is None:
			mprint("Empty gold tree", out, 'all')
			mprint(gold_complete_tree.__repr__(), out, 'all')
			mprint(gold_tree.__repr__(), out, 'all')
			continue

		if '()' in test_text:
			mprint("() test tree", out, 'all')
			continue
		test_complete_tree = pstree.tree_from_text(test_text)
		tree = treebanks.homogenise_tree(test_complete_tree, False)
		treebanks.remove_traces(tree)
		treebanks.remove_function_tags(tree)
			mprint("End of test input", out, 'err')
			break

		mprint("Sentence %d:" % sent_no, out, 'all')

		gold_text = gold_text.strip()
		test_text = test_text.strip()
		if len(gold_text) == 0:
			mprint("No gold tree", out, 'all')
			continue
		elif len(test_text) == 0:
			mprint("Not parsed", out, 'all')
			continue

		gold_complete_tree = pstree.tree_from_text(gold_text)
		treebanks.ptb_cleaning(gold_complete_tree)
		gold_tree = treebanks.apply_collins_rules(gold_complete_tree, False)
		if gold_tree is None:
			mprint("Empty gold tree", out, 'all')
			mprint(gold_complete_tree.__repr__(), out, 'all')
			mprint(gold_tree.__repr__(), out, 'all')
			continue

		if '()' in test_text:
			mprint("() test tree", out, 'all')
			continue
		test_complete_tree = pstree.tree_from_text(test_text)
		treebanks.ptb_cleaning(test_complete_tree)
		test_tree = treebanks.apply_collins_rules(test_complete_tree, False)
		if test_tree is None:
			mprint("Empty test tree", out, 'all')
Пример #4
0
def compute_overall_score(gold_file, test_file):
    gold_in = open(gold_file).readlines()
    test_in = open(test_file).readlines()
    stats = {'out_evalb': [0, 0, 0], 'out_relaxed': [0, 0, 0]}

    assert len(gold_in) == len(test_in)

    for i in range(len(gold_in)):
        print "Sent: " + str(i)
        gold_text = gold_in[i]
        test_text = test_in[i]
        if gold_text == '' and test_text == '':
            break
        elif gold_text == '':
            break
        elif test_text == '':
            break

        gold_text = gold_text.strip()
        test_text = test_text.strip()
        if len(gold_text) == 0:
            continue
        elif len(test_text) == 0:
            continue

        gold_complete_tree = pstree.tree_from_text(gold_text,
                                                   allow_empty_labels=True)
        gold_complete_tree = treebanks.homogenise_tree(gold_complete_tree)
        treebanks.ptb_cleaning(gold_complete_tree)
        gold_tree = gold_complete_tree
        #gold_tree = treebanks.apply_collins_rules(gold_complete_tree, False)

        test_complete_tree = pstree.tree_from_text(test_text,
                                                   allow_empty_labels=True)
        test_complete_tree = treebanks.homogenise_tree(test_complete_tree)
        treebanks.ptb_cleaning(test_complete_tree)
        test_tree = test_complete_tree
        #test_tree = treebanks.apply_collins_rules(test_complete_tree, False)

        gold_words = gold_tree.word_yield()
        test_words = test_tree.word_yield()
        if len(test_words.split()) != len(gold_words.split()):
            print "Sentence lengths do not match in sentence..." + str(i)
            print "Gold: " + gold_words.__repr__()
            print "Test: " + test_words.__repr__()

        match_strict, gold_strict, test_strict, _, _ = relaxed_parse_errors.counts_for_prf(
            test_tree, gold_tree)
        match_relaxed, gold_relaxed, test_relaxed, _, _ = relaxed_parse_errors.relaxed_counts_for_prf(
            test_tree, gold_tree)
        stats['out_evalb'][0] += match_strict
        stats['out_evalb'][1] += gold_strict
        stats['out_evalb'][2] += test_strict
        p, r, f = nlp_eval.calc_prf(match_strict, gold_strict, test_strict)
        print "Eval--Strict Evalb: %.2f  %.2f  %.2f" % (p * 100, r * 100,
                                                        f * 100)

        stats['out_relaxed'][0] += match_relaxed
        stats['out_relaxed'][1] += gold_relaxed
        stats['out_relaxed'][2] += test_relaxed
        p, r, f = nlp_eval.calc_prf(match_relaxed, gold_relaxed, test_relaxed)
        print "Eval--Relaxed Edit: %.2f  %.2f  %.2f" % (p * 100, r * 100,
                                                        f * 100)

    match = stats['out_evalb'][0]
    gold = stats['out_evalb'][1]
    test = stats['out_evalb'][2]
    p, r, f = nlp_eval.calc_prf(match, gold, test)
    print "Overall--Standard EVALB %s: %.2f  %.2f  %.2f" % ('out', p * 100,
                                                            r * 100, f * 100)

    match = stats['out_relaxed'][0]
    gold = stats['out_relaxed'][1]
    test = stats['out_relaxed'][2]
    p, r, f = nlp_eval.calc_prf(match, gold, test)
    print "Overall--Relaxed EDIT %s: %.2f  %.2f  %.2f" % ('out', p * 100,
                                                          r * 100, f * 100)