def extract_verb_surface(): morphines, sentences = morphines_read() verb_surface = [] with open("verb_surface_31.txt", "w") as fp: for morphine in morphines: if morphine["pos"] == "動詞": verb_surface.append(morphine["surface"]) fp.write(morphine["surface"] + "\n") return verb_surface
def extract_sahen_noun(): '''サ変接続の名詞を抽出''' morphines, sentences = morphines_read() sahen_noun = [] with open("sahen_noun_33.txt", "w") as fp: for line in morphines: if line["pos"] == "名詞" and line["pos1"] == "sahen": sahen_noun.append(line["surface"]) fp.write(line["surface"] + "\n") return sahen_noun
def extract_A_B(): '''2つの名詞が「の」で連結されている名詞句を抽出''' morphines, sentences = morphines_read() A_B = [] with open("AのB.txt", "w") as fp: for i in range(len(morphines) - 2): if morphines[i]["pos"] == "名詞" and morphines[i+1]["surface"] == "の"\ and morphines[i+2]["pos"] == "名詞": str_ = morphines[i]["surface"] + morphines[i+1]["surface"]\ + morphines[i+2]["surface"] A_B.append(str_) fp.write(str_ + "\n") return A_B
def noun_sequence(): morphines, sentences = morphines_read() noun_seq = [] str_ = "" with open("noun_seq.txt", "w") as fp: for i in range(len(morphines) - 1): j = i while morphines[j]["pos"] == "名詞": j += 1 if j > len(morphines): break str_ += morphines[j]["surface"] if (j - i) >= 2: noun_seq.append(str_) fp.write(str_ + "\n") str_ = "" return noun_seq