Пример #1
0
# -*- coding: utf-8 -*-
#python knock47.py < neko.txt2.cabocha

from knock41 import F41
from collections import OrderedDict

if __name__ == "__main__":
    for i, chunks in enumerate(F41()):
        for chunk in chunks:
            meisi_flag = False
            for morph in chunk.morphs:
                if meisi_flag:
                    if morph.surface != u"を":
                        meisi_flag = False
                    else:
                        zyosi_list = list()
                        kou_list = list()
                        if chunks[chunk.dst].morphs[0].pos == u"動詞":
                            dousi = chunks[chunk.dst].morphs[0].base
                            for src in chunks[chunk.dst].srcs:
                                kou = ""
                                zyosi = ""
                                for morph2 in chunks[src].morphs:
                                    if morph2.pos == u"助詞":
                                        zyosi = morph2.base
                                        kou = chunks[src].chunk2str()
                                if len(zyosi) != "":
                                    zyosi_list.append(zyosi)
                                    kou_list.append(kou)
                            zyosi_kou_dic = dict(zip(zyosi_list, kou_list))
                            #sortした後、辞書の形を保持するOrderedDict
Пример #2
0
# -*- coding: utf-8 -*-
#python knock45.py < neko.txt2.cabocha> output.txt

from knock41 import F41

if __name__ == "__main__":
    for chunks in F41():
        for chunk in chunks:
            for morph in chunk.morphs:
                if morph.pos == u"動詞":
                    dousi = morph.base
                    zyosi_list = list()
                    for src in chunk.srcs:
                        zyosi = ""
                        for morph in chunks[src].morphs:
                            if morph.pos == u"助詞":
                                zyosi = morph.base
                        if zyosi != "":
                            zyosi_list.append(zyosi)
                    zyosi_list.sort()
                    #"が"のほうが"と"より先に来る
                    zyosi = " ".join(zyosi_list)
                    if len(zyosi) != 0:
                        print "{}\t{}".format(dousi,zyosi)
                    #最左の動詞という条件があるので
                    #これ以上動詞を探すのをやめる
                    break
"""                    
cat output.txt | sort | uniq -c | sort -nr | head -20
704 云う	と
452 する	を
Пример #3
0
# -*- coding: utf-8 -*-
#cabocha -f1 < neko.txt > neko.txt2.cabocha
#python knock41.py < neko.txt2.cabocha

from knock41 import F41
import sys

if __name__ == "__main__":
    sentenses = F41()
    for sentense in sentenses:
        for chunk in sentense:
            for morph in chunk.morphs:
                print morph.surface.rstrip("。"),
            print "\t",
            if chunk.dst > 0:
                for morph in sentense[chunk.dst].morphs:
                    print morph.surface.rstrip("。"),
            print "\n",
"""
0, 一 	
0, 	
0,  	猫 で ある  
1, 吾輩 は 	猫 で ある  
2, 猫 で ある  	
0, 名前 は 	無い  
1, まだ 	無い  
2, 無い  	
"""