# -*- coding: utf-8 -*- #python knock47.py < neko.txt2.cabocha from knock41 import F41 from collections import OrderedDict if __name__ == "__main__": for i, chunks in enumerate(F41()): for chunk in chunks: meisi_flag = False for morph in chunk.morphs: if meisi_flag: if morph.surface != u"を": meisi_flag = False else: zyosi_list = list() kou_list = list() if chunks[chunk.dst].morphs[0].pos == u"動詞": dousi = chunks[chunk.dst].morphs[0].base for src in chunks[chunk.dst].srcs: kou = "" zyosi = "" for morph2 in chunks[src].morphs: if morph2.pos == u"助詞": zyosi = morph2.base kou = chunks[src].chunk2str() if len(zyosi) != "": zyosi_list.append(zyosi) kou_list.append(kou) zyosi_kou_dic = dict(zip(zyosi_list, kou_list)) #sortした後、辞書の形を保持するOrderedDict
# -*- coding: utf-8 -*- #python knock45.py < neko.txt2.cabocha> output.txt from knock41 import F41 if __name__ == "__main__": for chunks in F41(): for chunk in chunks: for morph in chunk.morphs: if morph.pos == u"動詞": dousi = morph.base zyosi_list = list() for src in chunk.srcs: zyosi = "" for morph in chunks[src].morphs: if morph.pos == u"助詞": zyosi = morph.base if zyosi != "": zyosi_list.append(zyosi) zyosi_list.sort() #"が"のほうが"と"より先に来る zyosi = " ".join(zyosi_list) if len(zyosi) != 0: print "{}\t{}".format(dousi,zyosi) #最左の動詞という条件があるので #これ以上動詞を探すのをやめる break """ cat output.txt | sort | uniq -c | sort -nr | head -20 704 云う と 452 する を
# -*- coding: utf-8 -*- #cabocha -f1 < neko.txt > neko.txt2.cabocha #python knock41.py < neko.txt2.cabocha from knock41 import F41 import sys if __name__ == "__main__": sentenses = F41() for sentense in sentenses: for chunk in sentense: for morph in chunk.morphs: print morph.surface.rstrip("。"), print "\t", if chunk.dst > 0: for morph in sentense[chunk.dst].morphs: print morph.surface.rstrip("。"), print "\n", """ 0, 一 0, 0, 猫 で ある 1, 吾輩 は 猫 で ある 2, 猫 で ある 0, 名前 は 無い 1, まだ 無い 2, 無い """