mprint("End of test input", out, 'err') break mprint("Sentence %d:" % sent_no, out, 'all') gold_text = gold_text.strip() test_text = test_text.strip() if len(gold_text) == 0: mprint("No gold tree", out, 'all') continue elif len(test_text) == 0: mprint("Not parsed", out, 'all') continue gold_complete_tree = pstree.tree_from_text(gold_text) treebanks.ptb_cleaning(gold_complete_tree) gold_tree = treebanks.apply_collins_rules(gold_complete_tree, False) if gold_tree is None: mprint("Empty gold tree", out, 'all') mprint(gold_complete_tree.__repr__(), out, 'all') mprint(gold_tree.__repr__(), out, 'all') continue if '()' in test_text: mprint("() test tree", out, 'all') continue test_complete_tree = pstree.tree_from_text(test_text) treebanks.ptb_cleaning(test_complete_tree) test_tree = treebanks.apply_collins_rules(test_complete_tree, False) if test_tree is None: mprint("Empty test tree", out, 'all')
mprint("Sentence %d:" % sent_no, out, 'all') gold_text = gold_text.strip() test_text = test_text.strip() if len(gold_text) == 0: mprint("No gold tree", out, 'all') continue elif len(test_text) == 0: mprint("Not parsed", out, 'all') continue gold_complete_tree = pstree.tree_from_text(gold_text) tree = treebanks.homogenise_tree(gold_complete_tree, False) treebanks.remove_traces(tree) treebanks.remove_function_tags(tree) treebanks.ptb_cleaning(tree) treebanks.remove_trivial_unaries(tree) gold_tree = tree if gold_tree is None: mprint("Empty gold tree", out, 'all') mprint(gold_complete_tree.__repr__(), out, 'all') mprint(gold_tree.__repr__(), out, 'all') continue if '()' in test_text: mprint("() test tree", out, 'all') continue test_complete_tree = pstree.tree_from_text(test_text) tree = treebanks.homogenise_tree(test_complete_tree, False) treebanks.remove_traces(tree) treebanks.remove_function_tags(tree)
def compute_overall_score(gold_file, test_file): gold_in = open(gold_file).readlines() test_in = open(test_file).readlines() stats = {'out_evalb': [0, 0, 0], 'out_relaxed': [0, 0, 0]} assert len(gold_in) == len(test_in) for i in range(len(gold_in)): print "Sent: " + str(i) gold_text = gold_in[i] test_text = test_in[i] if gold_text == '' and test_text == '': break elif gold_text == '': break elif test_text == '': break gold_text = gold_text.strip() test_text = test_text.strip() if len(gold_text) == 0: continue elif len(test_text) == 0: continue gold_complete_tree = pstree.tree_from_text(gold_text, allow_empty_labels=True) gold_complete_tree = treebanks.homogenise_tree(gold_complete_tree) treebanks.ptb_cleaning(gold_complete_tree) gold_tree = gold_complete_tree #gold_tree = treebanks.apply_collins_rules(gold_complete_tree, False) test_complete_tree = pstree.tree_from_text(test_text, allow_empty_labels=True) test_complete_tree = treebanks.homogenise_tree(test_complete_tree) treebanks.ptb_cleaning(test_complete_tree) test_tree = test_complete_tree #test_tree = treebanks.apply_collins_rules(test_complete_tree, False) gold_words = gold_tree.word_yield() test_words = test_tree.word_yield() if len(test_words.split()) != len(gold_words.split()): print "Sentence lengths do not match in sentence..." + str(i) print "Gold: " + gold_words.__repr__() print "Test: " + test_words.__repr__() match_strict, gold_strict, test_strict, _, _ = relaxed_parse_errors.counts_for_prf( test_tree, gold_tree) match_relaxed, gold_relaxed, test_relaxed, _, _ = relaxed_parse_errors.relaxed_counts_for_prf( test_tree, gold_tree) stats['out_evalb'][0] += match_strict stats['out_evalb'][1] += gold_strict stats['out_evalb'][2] += test_strict p, r, f = nlp_eval.calc_prf(match_strict, gold_strict, test_strict) print "Eval--Strict Evalb: %.2f %.2f %.2f" % (p * 100, r * 100, f * 100) stats['out_relaxed'][0] += match_relaxed stats['out_relaxed'][1] += gold_relaxed stats['out_relaxed'][2] += test_relaxed p, r, f = nlp_eval.calc_prf(match_relaxed, gold_relaxed, test_relaxed) print "Eval--Relaxed Edit: %.2f %.2f %.2f" % (p * 100, r * 100, f * 100) match = stats['out_evalb'][0] gold = stats['out_evalb'][1] test = stats['out_evalb'][2] p, r, f = nlp_eval.calc_prf(match, gold, test) print "Overall--Standard EVALB %s: %.2f %.2f %.2f" % ('out', p * 100, r * 100, f * 100) match = stats['out_relaxed'][0] gold = stats['out_relaxed'][1] test = stats['out_relaxed'][2] p, r, f = nlp_eval.calc_prf(match, gold, test) print "Overall--Relaxed EDIT %s: %.2f %.2f %.2f" % ('out', p * 100, r * 100, f * 100)