def write_kakariuke_tree_oneline(i, oneline): graph = pydot.Dot(graph_type='digraph') map_id_and_node_graph_includes = {} for i_s, src in enumerate(oneline): if src.dst() == -1: continue chunk_src_string = NLP42.concat_morphs(src.morphs()) chunk_dst_string = NLP42.concat_morphs(oneline[src.dst()].morphs()) if len(chunk_src_string) == 0 or len(chunk_dst_string) == 0: continue i_d = src.dst() if i_s in map_id_and_node_graph_includes: src_node = map_id_and_node_graph_includes[i_s] else: src_node = pydot.Node(str(i_s), label = chunk_src_string) map_id_and_node_graph_includes[i_s] = src_node graph.add_node(src_node) if i_d in map_id_and_node_graph_includes: dst_node = map_id_and_node_graph_includes[i_d] else: dst_node = pydot.Node(str(i_d), label = chunk_dst_string) map_id_and_node_graph_includes[i_d] = dst_node graph.add_node(pydot.Node(str(i_d), label = chunk_dst_string)) graph.add_edge(pydot.Edge(src_node, dst_node)) graph.write_png(output_dir + '/' + str(i) + '.png')
def write_kakariuke_oneline_2(oneline): for src in oneline: if src.dst() == -1: continue if not (has_chunk_noun(src) and has_chunk_verb(oneline[src.dst()])): continue chunk_src_string = NLP42.concat_morphs(src.morphs()) chunk_dst_string = NLP42.concat_morphs(oneline[src.dst()].morphs()) if len(chunk_src_string) == 0 or len(chunk_dst_string) == 0: continue print(chunk_src_string, chunk_dst_string, sep = '\t')
def write_sa_verb_and_pattern_of_particle_and_morphs_oneline(i, oneline): map_sa_verb_and_pattern_of_particle_and_morphs = {} for i_d, dst in enumerate(oneline): if not NLP43.is_verb(dst.morphs()[0]): continue for i_s in dst.srcs(): chunk = oneline[i_s].morphs() if len(chunk) < 2: continue last_morph_of_src = chunk[-1] second_last_morph_of_src = chunk[-2] if not (is_particle_wo(last_morph_of_src) and is_noun_connected_sa(second_last_morph_of_src)): continue key_base = NLP42.concat_morphs(chunk) + dst.morphs()[0].base() for i_src_of_s in dst.srcs(): if i_src_of_s == i_s: continue c = oneline[i_src_of_s].morphs() c = omit_punctual_from_morphs(c) if len(c) < 1: continue last_morph_of_i_src_of_s = c[-1] if NLP45.is_particle(last_morph_of_i_src_of_s): if key_base in map_sa_verb_and_pattern_of_particle_and_morphs: d = map_sa_verb_and_pattern_of_particle_and_morphs[key_base] d['particles'].append(copy.deepcopy(last_morph_of_i_src_of_s.surface())) d['morphs'].append(copy.deepcopy(c)) else: new_dict = dict(particles = copy.deepcopy([last_morph_of_i_src_of_s.surface()]), morphs = copy.deepcopy([c])) map_sa_verb_and_pattern_of_particle_and_morphs[key_base] = new_dict write_verb_and_pattern_of_particle_and_morphs_result(i, map_sa_verb_and_pattern_of_particle_and_morphs)
def write_verb_and_pattern_of_particle_and_morphs_result(i, map_verb_and_pattern_of_particle_and_morphs): #print(i) for key_base in map_verb_and_pattern_of_particle_and_morphs: d = map_verb_and_pattern_of_particle_and_morphs[key_base] particles = d['particles'] morphs = d['morphs'] chunk_strings = list(map(lambda x: NLP42.concat_morphs(x), morphs)) if len(particles) > 0: print(key_base, ' '.join(map(lambda x: str(x), particles)), ' '.join(chunk_strings), sep = '\t')
def get_morph_chain(nouns, chunks): if isinstance(nouns, list): chains = [] for i in nouns: chains.append(get_morph_chain(i, chunks)) return chains else: if nouns is None: return '' chunk = chunks[nouns] omitted = NLP47.omit_punctual_from_morphs(chunk.morphs()) omitted_string = NLP42.concat_morphs(omitted) if chunk.dst() == -1: return [omitted_string] else: return [omitted_string] + get_morph_chain(chunk.dst(), chunks)