def extraction_pos1(list_srcs_dst, pos_1, pos_2): pair = [] for line in list_srcs_dst: dic_morph = defaultdict(lambda: '') # 動詞:動詞に係る助詞 dic_phrase = defaultdict(lambda: '') for phrase in line: morph_1 = phrase[1].check_pos_morph(pos_1) morph_2 = phrase[0].check_pos_morph(pos_2) phrase_2 = phrase[0].check_pos_phrase(pos_2) if morph_1 is None: continue if morph_2 is not None: # if morph_2 not in dic_morph[morph_1]: dic_morph[morph_1] += ' ' + morph_2 dic_phrase[morph_1] += ' ' + phrase_2 for k, v in dic_morph.items(): pair.append([k, v, dic_phrase[k]]) return pair if __name__ == '__main__': text = 'neko.txt.cabocha' sentences = chunk_list(text) list_ = srcs_dst_list(sentences) with open('knock46.txt', 'w') as f: dic = extraction_pos1(list_, '動詞', '助詞') for pair in dic: f.write(f'{pair[0]}\t{pair[1]}\t{pair[2]}\n')
# -*- coding: utf-8 -*- from knock41 import chunk_list from knock42 import srcs_dst_list from graphviz import Digraph import pydot if __name__ == '__main__': text = 'neko.txt.cabocha' sentences = chunk_list(text) G = Digraph(format='png') G.attr('node', shape='circle') list_srcs_dst = srcs_dst_list(sentences) edge = [] for i, line in enumerate(list_srcs_dst): if i < 10: for pair in line: edge.append((str(pair[0].phrase_surface()), str(pair[1].phrase_surface()))) G.edge(str(pair[0].phrase_surface()), str(pair[1].phrase_surface())) G.render('binary_tree_graphviz') g = pydot.graph_from_edges(edge) g.write_jpeg('binary_tree_pydot.png', prog='dot') # for i, (dst, srcs) in enumerate(list_srcs_dst.items()): # if i < 100: # edge.append((str(dst.phrase_surface()), str(srcs.phrase_surface()))) # G.edge(str(dst.phrase_surface()), str(srcs.phrase_surface())) # G.render('binaryfield_tree_graphviz') # g = pydot.graph_from_edges(edge) # g.write_jpeg('binary_tree_pydot.png', prog='dot')