from datetime import datetime from nltk.parse.corenlp import CoreNLPDependencyParser from nltk.parse.dependencygraph import DependencyGraph parser = CoreNLPDependencyParser(url='http://localhost:9000') sentence = "The trophy would not fit in the brown suitcase because it was too big" # sentence = "I spread the roth on the table in order to protect it" # sentence = "On the table I've spread the roth in order to protect it" # sentence = "The city councilmen refused the demonstrators a permit because they feared violence" # sentence = "She said he told her their secrets" sentence = "The monkey said the bird told the elephant he was dangerous." sentence = "The women stopped taking the pills because they were carcinogenic." sentence = "Marta has a cat, her cat is brown" parse, = parser.raw_parse(sentence) conll = parse.to_conll(4) print(conll) dg = DependencyGraph(conll) dotted = dg.to_dot() G = dg.nx_graph() f = open('hoy_' + str(datetime.now()) + '.svg', 'w') svg = dg._repr_svg_() f.write(svg)
from datetime import datetime from nltk.parse.corenlp import CoreNLPDependencyParser from nltk.parse.dependencygraph import DependencyGraph parser = CoreNLPDependencyParser(url='http://localhost:9000') # filename = "text6" # f = open("../Fragments_for_testing/"+filename, "r") # sentences = f.readlines() # for sentence in sentences: sentence = "Elephants are big. Monkeys are small" parse, = parser.raw_parse(sentence) conll = parse.to_conll(4) dp = DependencyGraph(conll) dotted = dp.to_dot() G = dp.nx_graph() f = open('test_' + str(datetime.now()) + '.svg', 'w') svg = dp._repr_svg_() f.write(svg)
def output_conllu(filename, sents, pos, stags, arcs, rels, dependencies, new_edges, output_dir, result_file): scores = {} with open(result_file) as fin: for line in fin: line = line.split() scores[(int(line[0]), int(line[1]))] = int(line[2]) tree_prop_file = 'd6.treeproperties' t2props_dict = get_t2props_dict(tree_prop_file) t2topsub_dict = get_t2topsub_dict(tree_prop_file) #for sent_idx in range(len(sents)): for sent_idx in [21]: deps_sent = dependencies[sent_idx] for dep_idx, dep in enumerate(deps_sent): unbounded_dep = dep #start = min(int(dep[0]), int(dep[1]))-1 start = 25 #end = max(int(dep[0]), int(dep[1]))+1 end = 33 conllu = '' sent = sents[sent_idx] pos_sent = pos[sent_idx] stags_sent = stags[sent_idx] arcs_sent = arcs[sent_idx] rels_sent = rels[sent_idx] token_idx = int(dep[1]) output_list = [ str(token_idx), sent[token_idx - 1] + '_' + stags_sent[token_idx - 1], '_', stags_sent[token_idx - 1], pos_sent[token_idx - 1], '_', str(dep[0]), dep[2], '_', '_' ] conllu += '\t'.join(output_list) conllu += '\n' for token_idx in range(len(sent)): if token_idx >= start and token_idx <= end: #if arcs_sent[token_idx] >= start and arcs_sent[token_idx] <= end: output_list = [ str(token_idx + 1), sent[token_idx] + '_' + stags_sent[token_idx], '_', stags_sent[token_idx], pos_sent[token_idx], '_', str(arcs_sent[token_idx]), rels_sent[token_idx], '_', '_' ] conllu += '\t'.join(output_list) conllu += '\n' for new_idx, dep in enumerate(new_edges[sent_idx]): if dep[0] >= start and dep[0] <= end: #if dep[1] >= start and dep[1] <= end: token_idx = int(dep[0]) output_list = [ str(token_idx), sent[token_idx - 1] + '_' + stags_sent[token_idx - 1], '_', stags_sent[token_idx - 1], pos_sent[token_idx - 1], '_', str(dep[1]), dep[2], '_', '_' ] conllu += '\t'.join(output_list) conllu += '\n' graph = DependencyGraph(conllu) if not os.path.isdir(output_dir): os.makedirs(output_dir) output_file = os.path.join( output_dir, 'sent{}_dep{}_correct{}.gv'.format(sent_idx, dep_idx, scores[(sent_idx, dep_idx)])) dot_string = graph.to_dot() ## add colors new_dot_string = '' new_lines = [ '{} -> {} [label="{}"]'.format(dep[1], dep[0], dep[2]) for dep in new_edges[sent_idx] ] for line in dot_string.split('\n'): line = line.strip() if line == '{} -> {} [label="{}"]'.format( unbounded_dep[0], unbounded_dep[1], unbounded_dep[2]): line = '{} -> {} [label="{}", color="red"]'.format( unbounded_dep[1], unbounded_dep[0], unbounded_dep[2]) elif line in new_lines: line = line[:-1] + ', color="blue"]' new_dot_string += line new_dot_string += '\n' with open(output_file, 'wt') as fout: fout.write(new_dot_string)