Пример #1
0
                verbframedict[verbbase].append(
                    (adp, chunks[i].show_only_words()))
        noun = set()
        verb = set()
    return verbframedict


if __name__ == "__main__":
    with open("ai.ja1.txt.parsed", "r") as ai:
        ai = ai.readlines()

    ai_morphs = []
    for i in range(len(ai)):
        ai_morphs.append(Morph(ai[i]))

    sents = morph2sents(ai_morphs)
    frames = defaultdict(list)
    for sent in sents:
        dep = morph2chunk(sent)
        verbframe2(dep, frames)

    #コーパス中で頻出する述語と格パターンの組み合わせ
    for key, value in sorted(frames.items(),
                             key=lambda x: len(x[1]),
                             reverse=True)[:10]:
        output = defaultdict(list)
        for elem in value:
            output[elem[0]].append(elem[1])
        print(key, end="\t")
        print(" ".join(output.keys()), end="\t")
        print(" ".join([" ".join(elem) for elem in output.values()]))
Пример #2
0
from collections import defaultdict
from knock40 import Morph, morph2sents
from knock41 import Chunk, morph2chunk, sources

if __name__ == "__main__":
    with open("ai.ja1.txt.parsed", "r") as ai:
        ai = ai.readlines()

    ai_morphs = []
    for i in range(len(ai)):
        ai_morphs.append(Morph(ai[i]))

    sents = morph2sents(ai_morphs)
    for sent in sents:
        chunks = morph2chunk(sent)
        sources(chunks)
        for i in range(len(chunks)):
            #print(chunk.meta)
            #print(chunk.show_bunsetsu_tag())
            if "サ変接続" in chunks[i].show_morph_pos1():
                if "を" in chunks[i].show_only_listwords():
                    print(chunks[i].show_only_words(), end="")
                    goto = chunks[i].dst
                    verb = chunks[goto].show_base_for_X("動詞")
                    adpos = set()
                    dep = []
                    if verb != None:
                        if chunks[goto].srcs != []:
                            for head_id in chunks[goto].srcs:
                                adp = chunks[head_id].show_base_for_X("助詞")
                                if adp != None and head_id != i:
Пример #3
0

def noun2verb(chunks):
    noun = set()
    verb = set()
    for i in range(len(chunks)):
        for morph in chunks[i].morphs:
            noun.add(morph.pos)
            if "名詞" in noun:
                goto = chunks[i].dst
                for morph2 in chunks[goto].morphs:
                    verb.add(morph2.pos)
        if "動詞" in verb:
            print(chunks[i].show_only_words(), end="\t")
            print(chunks[goto].show_only_words())
        noun = set()
        verb = set()


if __name__ == "__main__":
    with open("ai.ja1.txt.parsed", "r") as ai:
        ai = ai.readlines()

    ai_morphs = []
    for i in range(len(ai)):
        ai_morphs.append(Morph(ai[i]))

    dep = morph2chunk(morph2sents(ai_morphs)[1])

    noun2verb(dep)