Exemplo n.º 1
0
#!/usr/bin/python3
import setup_run_dir  # this import tricks script to run from 2 levels up
import sys
from amrlib.evaluate.smatch_enhanced import get_entries, compute_smatch, compute_scores
from amrlib.evaluate.smatch_enhanced import redirect_smatch_errors

# Score "nowiki" version, meaning the generated file should not have the :wiki tags added
GOLD = 'amrlib/data/tdata_xfm/test.txt.nowiki'
PRED = 'amrlib/data/model_parse_xfm_bart_large/test-pred.txt'
# Score with the original version meaning the generated files need to have been "wikified"
#GOLD='amrlib/data/tdata_xfm/test.txt'
#PRED='amrlib/data/model_parse_xfm_bart_base/test-pred.txt.wiki'

redirect_smatch_errors('logs/score_smatch_errors.log')
# Run only the smatch score
if 0:
    gold_entries = get_entries(GOLD)
    test_entries = get_entries(PRED)
    precision, recall, f_score = compute_smatch(test_entries, gold_entries)
    print('SMATCH -> P: %.3f,  R: %.3f,  F: %.3f' %
          (precision, recall, f_score))
# Compute enhanced scoring
else:
    compute_scores(GOLD, PRED)
    print('%d generated graphs do not deserialize out of %d = %.1f%%' % (len(bad_graphs), num_non_clipped, pct))
    print()

    # Save the reference, omitting any clipped or bad
    ref_fpath = os.path.join(out_dir, ref_out_fn)
    print('Saving', ref_fpath)
    skipped = 0
    with open(ref_fpath, 'w') as f:
        for i, graph in enumerate(ref_in_graphs):
            if i in bad_graphs or i in clip_index_set:
                skipped += 1
                continue
            f.write(graph + '\n\n')
    print('Skipped writing %d as either bad or clipped' % skipped)
    print('Wrote a total of %d reference AMR graphs' % (len(ref_in_graphs) - skipped))
    print()

    # Save the generated
    gen_fpath = os.path.join(out_dir, gen_out_fn)
    print('Saving', gen_fpath)
    penman.dump(gen_out_graphs, gen_fpath, indent=6, model=NoOpModel())
    print('Wrote a total of %d generated AMR graphs' % len(gen_out_graphs))
    print()

    # Score the resultant files
    print('Scoring the above files with SMATCH')
    gold_entries = get_entries(ref_fpath)
    test_entries = get_entries(gen_fpath)
    precision, recall, f_score = compute_smatch(test_entries, gold_entries)
    print('SMATCH -> P: %.3f,  R: %.3f,  F: %.3f' % (precision, recall, f_score))
Exemplo n.º 3
0
    print('Generating')
    gen_graphs = inference.parse_sents(ref_sents, disable_progress=False)
    assert len(gen_graphs) == len(ref_serials)

    # Save the reference and generated graphs, inserting dummy graphs for that are None
    # Originally I was omitting these graphs but that makes it to test after wikification
    # because the graphs will no longer line up with the original file.
    f_ref = open(gold_fpath, 'w')
    f_gen = open(pred_fpath, 'w')
    print('Saving %s and %s' % (gold_fpath, pred_fpath))
    dummies = 0
    for ref_graph, gen_graph in zip(ref_graphs, gen_graphs):
        # If I didn't get a return, form a dummy graph so the file still aligns with the original
        if gen_graph is None:
            dummies += 1
            gen_graph = '# ::snt dummy graph for deserialization failure.\n()'
        f_ref.write(ref_graph + '\n\n')
        f_gen.write(gen_graph + '\n\n')
    f_ref.close()
    f_gen.close()
    print('Out of %d graphs, %d did not deserialize properly.' %
          (len(ref_graphs), dummies))
    print()

    # Run smatch
    gold_entries = get_entries(gold_fpath)
    test_entries = get_entries(pred_fpath)
    precision, recall, f_score = compute_smatch(test_entries, gold_entries)
    print('SMATCH -> P: %.3f,  R: %.3f,  F: %.3f' %
          (precision, recall, f_score))