Пример #1
0
from knock41 import Chunk, cabocha_chunk_data
import collections

with open('../data/neko.txt.cabocha', 'r') as data_in:
    with open('result/knock46.txt', 'w') as data_out:
        for i, line in enumerate(cabocha_chunk_data(data_in)):
            print('[{}行目]'.format(i + 1), file=data_out)
            for phrase in line:
                verb_pattern = ''
                temp_src = collections.defaultdict(lambda: '')
                if '動詞' in phrase.get_phrase_pos():
                    verb_pattern += phrase.get_phrase_list()[
                        phrase.get_phrase_pos().index('動詞')] + '\t'
                    for src in phrase.srcs:
                        if '助詞' in line[src].get_phrase_pos():
                            if '格助詞' in line[src].get_phrase_pos1():
                                temp_index = max([
                                    k for k, y in enumerate(
                                        line[src].get_phrase_pos1())
                                    if y == '格助詞'
                                ])
                                temp_src[line[src].get_phrase_txt(
                                )] = line[src].get_phrase_list()[temp_index]
                            else:
                                temp_index = max([
                                    k for k, y in enumerate(
                                        line[src].get_phrase_pos())
                                    if y == '助詞'
                                ])
                                temp_src[line[src].get_phrase_txt(
                                )] = line[src].get_phrase_list()[temp_index]
Пример #2
0
from knock41 import Chunk, cabocha_chunk_data

if __name__ == '__main__':
    with open('../data/neko.txt.cabocha', 'r') as data_in:
        with open('result/knock42.txt', 'w') as data_out:
            for line in cabocha_chunk_data(data_in):
                for phrase in line:
                    if phrase.get_phrase_txt() == '' or line[
                            phrase.dst].get_phrase_txt() == '':
                        continue
                    if phrase.dst == -1:
                        if phrase.srcs == []:
                            data_out.write(phrase.get_phrase_txt() + '\n')
                        else:
                            continue
                    else:
                        data_out.write(phrase.get_phrase_txt() + '\t\t' +
                                       line[phrase.dst].get_phrase_txt() +
                                       '\n')
                data_out.write('\n')