Exemplo n.º 1
0
from edge_to_irtg import edge2irtg
from convert_irtg_to_mrp import get_edges, get_id2lex, get_input, get_mrp_edges, get_nodes, get_tops, irtg2mrp
from process_c import compress_c_edge, decompress_c

infile = sys.argv[1]
outfile = sys.argv[2]

non_deducible = ["id", "flavor", "framework", "version", "time"]
with open(infile, 'r') as f:
    for line in f:
        mrp_dict = json.loads(line)
        extras = {}
        for category in mrp_dict.keys():
            if category in non_deducible:
                extras[category] = mrp_dict[category]
        edges = get_mrp_edges(mrp_dict)
        labels = get_id2lex(mrp_dict)
        decompressed_c = decompress_c(edges)
        raised_d = raise_edge(decompressed_c, 'D', ['P', 'S'], mark=True)
        print(labels)
        for (u, v) in decompressed_c.keys():
            if u not in labels:
                labels[u] = 'Non-Terminal'
        print(labels)
        postprocessed_mrp = irtg2mrp(raised_d, labels)
        for key in extras.keys():
            postprocessed_mrp[key] = extras[key]
        with open(outfile, 'a') as out:
            out.write(json.dumps(postprocessed_mrp))
            out.write('\n')
Exemplo n.º 2
0
 framework = mrp_dict["framework"]
 version = mrp_dict["version"]
 time = mrp_dict["time"]
 for token_file in os.listdir(tokenized_dir):
     #print(token_file)
     if token_file[:3] == filename[:3]:
         companion_data = json.load(
             open(tokenized_dir + token_file,
                  encoding='utf-8'))
         if id not in companion_data.keys():
             continue
         else:
             spans = ' '.join(
                 list(companion_data[id]["spans"].keys()))
             tokens = companion_data[id]['tokenization']
             edges = get_mrp_edges(mrp_dict,
                                   get_remote=False)
             edges = eliminate_h(edges)
             labels = get_id2lex(mrp_dict)
             compressed_edges = compress_c_edge(edges)
             compressed_labels = update_id_labels(
                 compressed_edges, labels)
             irtg_format_compressed = edge2irtg(
                 compressed_edges, labels)
             node_tokens = node_to_token_index(
                 companion_data, mrp_dict,
                 compressed_labels, id)
             aligned = percolate(compressed_edges,
                                 priority_queue,
                                 compressed_labels)
             alignments = ''
             for alignment in aligned.keys():
Exemplo n.º 3
0
import sys
import json
import collections
import os
import random
from tqdm import tqdm

from edge_to_irtg import edge2irtg
from get_edges_from_mrp import get_id2lex, get_mrp_edges
from convert_irtg_to_mrp import get_edges, get_mrp_edges, get_nodes, get_tops, irtg2mrp
from eliminate_h_top import eliminate_h
from a_star_mrp import *

mrp_in = sys.argv[1]

with open(mrp_in) as infile:
    for line in infile:
        mrp = json.loads(line)
        print(mrp['id'])
        labels = get_id2lex(mrp)
        edges = get_mrp_edges(mrp, get_remote = False)
        irtg = edge2irtg(edges, labels)
        print(irtg)
        print('_'*40)
Exemplo n.º 4
0
                label_dict[u] = label_dict[int(str(u)[:-4])]
            else: label_dict[u] = 'Non-Terminal'
    nodes_in_edge_dict = list(set([node for edge in edge_dict.keys() for node in edge]))
    label_dict_nodes = list(label_dict.keys())
    for edge in edge_dict.keys():
        for node in edge:
            if node not in label_dict.keys():
                label_dict[node] = 'Non-Terminal'
    return label_dict


with open(mrp_data_path,encoding='utf8', errors='ignore') as infile:
    counter = 0
    for line in infile:
        #print(line)
        mrp_dict = json.loads(line)
        id = mrp_dict["id"]
        print(id)
        edges = get_mrp_edges(mrp_dict, get_remote = True)
        edges = eliminate_h(edges)
        labels = get_id2lex(mrp_dict)
        compressed_edges = compress_c_edge(edges)
        compressed_labels = update_id_labels(compressed_edges, labels)
        irtg_format_compressed = edge2irtg(compressed_edges, labels)
        print(irtg_format_compressed)
        node_tokens = node_to_token_index(companion_data, mrp_dict, compressed_labels, id)
        #print(companion_data)
        #print(compressed_labels)
        #print(node_tokens)
        alignments = align(compressed_edges, priority_dict, mrp_dict, node_tokens, compressed_labels)
Exemplo n.º 5
0
from edge_to_irtg import edge2irtg
from process_c import *
from move_edges import lower_edge, raise_edge
from test_head_percolation import update_id_labels
from utils import number_edges
from a_star_mrp import get_roots

mrp_in = sys.argv[1]

with open(mrp_in) as infile:
    for line in infile:
        mrp = json.loads(line)
        id = mrp['id']
        print(id)
        labels = get_id2lex(mrp)
        edges = get_mrp_edges(mrp, get_remote=True)
        edges = number_edges(edges, 'A')
        print('original')
        irtg_original = edge2irtg(edges, labels)
        print(irtg_original)
        compressed = compress_c_edge(edges)
        labels = update_id_labels(compressed, labels)
        print('COMPRESSED')
        irtg_compressed = edge2irtg(edges, labels)
        print(irtg_compressed)
        print('RAISED U')
        raised_u = raise_edge(compressed,
                              'U', ['L', 'H', 'P', 'S', 'A', 'D'],
                              label_dict=labels)
        labels = update_id_labels(raised_u, labels)
        irtg_raised_u = edge2irtg(raised_u, labels)
import json
import sys
from get_edges_from_mrp import get_id2lex, get_mrp_edges


corpus = sys.argv[1]

with open(corpus) as infile:
    for line in infile:
        mrp = json.loads(line)
        labels = get_id2lex(mrp)
        edges = get_mrp_edges(mrp)
        nodes = set()
        node_mentions_in_edges = set()
        for node_id in labels.keys():
            nodes.add(node_id)
        for (u, v) in edges.keys():
            node_mentions_in_edges.add(u)
            node_mentions_in_edges.add(v)