示例#1
0
def load_amrs_cached(amr_fpath):
    global pgraph_cache
    pgraphs = pgraph_cache.get(amr_fpath, None)
    if pgraphs is None:
        pgraphs = penman.load(amr_fpath, model=NoOpModel())
        pgraph_cache[amr_fpath] = pgraphs
    return pgraphs
示例#2
0
 def wikify_file(self, infpath, outfpath):
     print('Loading', infpath)
     pgraphs = penman.load(infpath)
     winfo_list = self.find_wiki_nodes_for_graphs(pgraphs)
     print('Running BLINK to get wiki values')
     winfo_list = self.predict_blink(winfo_list)
     print('Adding and saving graphs to', outfpath)
     pgraphs = self.add_wiki_to_graphs(pgraphs, winfo_list)
     penman.dump(pgraphs, outfpath, indent=6)
def read_from_file(filepath, graph=False):
    graphs = penman.load(filepath, cls=CODEC)
    if graph:
        return graphs
    codec = CODEC()
    amrs = []
    for g in graphs:
        amr = codec.encode(g)
        amrs.append(amr)
    return amrs
示例#4
0
def load(fh, model):
    """
    Deserialize PENMAN graphs from a file (handle or filename)

    Args:
        fh: filename or file object
        model: Xmrs subclass instantiated from decoded triples
    Returns:
        a list of objects (of class *model*)
    """
    graphs = penman.load(fh, cls=XMRSCodec)
    xs = [model.from_triples(g.triples()) for g in graphs]
    return xs
示例#5
0
def read_file(source):
    # read preprocessed amr file
    token, lemma, pos, ner, amrs = [], [], [], [], []
    graphs = penman.load(source)
    logger.info('read from %s, %d amrs' % (source, len(graphs)))
    for g in graphs:
        # Load the metadata
        token.append(json.loads(g.metadata['tokens']))
        lemma.append(json.loads(g.metadata['lemmas']))
        pos.append(json.loads(g.metadata['pos_tags']))
        ner.append(json.loads(g.metadata['ner_tags']))
        # Build the AMRGraph from the penman graph
        amr_graph = AMRGraph(g)
        amrs.append(amr_graph)
    return amrs, token, lemma, pos, ner
示例#6
0
def load(source):
    """
    Deserialize PENMAN graphs from a file (handle or filename)

    Args:
        source: filename or file object
    Returns:
        a list of DMRS objects
    """
    if not hasattr(source, 'read'):
        source = Path(source).expanduser()
    try:
        graphs = penman.load(source)
    except penman.PenmanError as exc:
        raise PyDelphinException('could not decode with Penman') from exc
    xs = [from_triples(g.triples) for g in graphs]
    return xs
示例#7
0
文件: amr_rw.py 项目: bjascob/amrlib
def load_amr_file(source, dereify=None, remove_wiki=False):
    assert remove_wiki in (False, 'replace', 'remove')
    # Select the model to use
    if dereify is None or dereify:  # None or True (odd way to do default logic)
        model = Model()  # default penman model, same as load(..., model=None)
    else:  # False
        model = noop_model
    # Load the data
    out = penman.load(source=source, model=model)
    # Remove or replace the wiki tags
    if remove_wiki == 'remove':
        for i in range(len(out)):
            out[i] = _remove_wiki(out[i])
    elif remove_wiki == 'replace':
        for i in range(len(out)):
            out[i] = _replace_wiki(out[i])
    return out
示例#8
0
def gather_test_graphs():
    # These are for amr_annotation_3.0/data/multisentence/ms-amr-split/test/msamr_dfa_007.xml
    fn = 'data/amr_annotation_3.0/data/amrs/unsplit/amr-release-3.0-amrs-dfa.txt'
    gids = [
        "DF-200-192400-625_7046.1", "DF-200-192400-625_7046.2",
        "DF-200-192400-625_7046.3", "DF-200-192400-625_7046.4",
        "DF-200-192400-625_7046.5", "DF-200-192400-625_7046.6",
        "DF-200-192400-625_7046.7", "DF-200-192400-625_7046.8",
        "DF-200-192400-625_7046.9", "DF-200-192400-625_7046.10",
        "DF-200-192400-625_7046.11", "DF-200-192400-625_7046.12",
        "DF-200-192400-625_7046.13", "DF-200-192400-625_7046.14",
        "DF-200-192400-625_7046.15", "DF-200-192400-625_7046.16",
        "DF-200-192400-625_7046.17", "DF-200-192400-625_7046.18"
    ]
    # Load the AMR file with penman and then extract the specific ids and put them in order
    pgraphs = penman.load(fn, model=NoOpModel())
    ordered_pgraphs = [None] * len(gids)
    for pgraph in pgraphs:
        gid = pgraph.metadata['id']
        doc_idx = gids.index(gid) if gid in gids else None
        if doc_idx is not None:
            ordered_pgraphs[doc_idx] = pgraph
    assert None not in ordered_pgraphs
    return ordered_pgraphs
示例#9
0
	top = [(x[0], x[1], x[1] * 100 /len(sample)) for x in counter.most_common(top_k)]
	return top


if __name__ == "__main__":


	fname = sys.argv[1]

	print("Loading Verb-Brasil framesets")
	framesets = []
	with open("verbo-brasil.dic","r", encoding="utf8") as f:
		framesets = [line.strip() for line in f]
	print(f'Verb-Brasil ({len(framesets)}) loaded')

	amrs = penman.load(fname)

	nodes = []
	instance_nodes = []
	edges = []
	tokens = []

	freq_concepts  = {"general concepts": 0, "named-entities": 0, "modal verbs": 0, \
					"amr-unknown": 0, "Verbo-Brasil framesets": 0, "constants": 0, \
					"negative":0, "special frames": 0}

	for amr in amrs:

		if "snt" in amr.metadata:
			tokens += [token.lower() for token in amr.metadata["snt"].split()]
		else:
示例#10
0
 def wikify_file(self, infn, outfn):
     new_graphs = []
     for graph in tqdm(penman.load(infn)):
         new_graph = self.wikify_graph(graph)
         new_graphs.append(new_graph)
     penman.dump(new_graphs, outfn, indent=6)
示例#11
0
 def get_sents_from_AMR(infn):
     sents = []
     for graph in penman.load(infn):
         sents.append( graph.metadata['snt'] )
     return sents
示例#12
0
if __name__ == '__main__':
    if 1:  # dev dataset
        gold_alignments_fn = 'amrlib/alignments/isi_hand_alignments/dev-gold.txt'
        test_amr_fn = 'amrlib/data/alignments/dev-aligned.txt'
    else:  # test dataset
        gold_alignments_fn = 'amrlib/alignments/isi_hand_alignments/test-gold.txt'
        test_amr_fn = 'amrlib/data/alignments/test-aligned.txt'

    # Print load alignments
    print('Loading alignments from', gold_alignments_fn)
    gold_alignments, gold_ids = load_gold_alignments(gold_alignments_fn)

    # Load the aligned corpus and extract the data
    print('Loading corpus data from', test_amr_fn)
    pgraphs = penman.load(test_amr_fn, model=NoOpModel())
    test_alignments = [
        g.metadata['rbw_alignments'].strip().split() for g in pgraphs
    ]
    test_alignments = [a for a in test_alignments if a]
    test_ids = [g.metadata['id'] for g in pgraphs]

    # Sanity check that things match up
    assert len(gold_alignments) == len(test_alignments), '%s != %s' % (
        len(gold_alignments), len(test_alignments))
    assert len(gold_alignments) == 100, len(gold_alignments)
    for gold_id, test_id in zip(gold_ids, test_ids):
        assert gold_id == test_id, '%s != %s' % (gold_id, test_id)
    print('Gold and Test aligment files match')

    # Score against isi automated alignments
示例#13
0
#!/usr/bin/python3
import setup_run_dir    # Set the working directory and python sys.path to 2 levels above
import os
import penman


if __name__ == '__main__':
    data_dir  = 'amrlib/data/LDC2020T02'

    for fn in ('dev.txt', 'test.txt', 'train.txt'):
        fpath = os.path.join(data_dir, fn)
        print('Loading', fpath)
        graphs = penman.load(fpath)
        print('Loaded {:,} graphs'.format(len(graphs)))
        print()