示例#1
0
def extract_verb_surface():
    morphines, sentences = morphines_read()
    verb_surface = []
    with open("verb_surface_31.txt", "w") as fp:
        for morphine in morphines:
            if morphine["pos"] == "動詞":
                verb_surface.append(morphine["surface"])
                fp.write(morphine["surface"] + "\n")
    return verb_surface
示例#2
0
def extract_sahen_noun():
    '''サ変接続の名詞を抽出'''
    morphines, sentences = morphines_read()
    sahen_noun = []
    with open("sahen_noun_33.txt", "w") as fp:
        for line in morphines:
            if line["pos"] == "名詞" and line["pos1"] == "sahen":
                sahen_noun.append(line["surface"])
                fp.write(line["surface"] + "\n")
    return sahen_noun
示例#3
0
def extract_A_B():
    '''2つの名詞が「の」で連結されている名詞句を抽出'''
    morphines, sentences = morphines_read()
    A_B = []
    with open("AのB.txt", "w") as fp:
        for i in range(len(morphines) - 2):
            if morphines[i]["pos"] == "名詞" and morphines[i+1]["surface"] == "の"\
                and morphines[i+2]["pos"] == "名詞":
                str_ = morphines[i]["surface"] + morphines[i+1]["surface"]\
                    + morphines[i+2]["surface"]
                A_B.append(str_)
                fp.write(str_ + "\n")
    return A_B
示例#4
0
def noun_sequence():
    morphines, sentences = morphines_read()
    noun_seq = []
    str_ = ""
    with open("noun_seq.txt", "w") as fp:
        for i in range(len(morphines) - 1):
            j = i
            while morphines[j]["pos"] == "名詞":
                j += 1
                if j > len(morphines):
                    break
                str_ += morphines[j]["surface"]
            if (j - i) >= 2:
                noun_seq.append(str_)
                fp.write(str_ + "\n")
            str_ = ""
    return noun_seq