示例#1
0
 def test_mecab_analysys_load(self):
     """
     mecabファイルを読み込んで分析結果をdataに保持する
     """
     test = "吾輩\t名詞,代名詞,一般,*,*,*,吾輩,ワガハイ,ワガハイ\n"
     test += "は\t助詞,係助詞,*,*,*,*,は,ハ,ワ\n"
     test += "EOS\n"
     test += "猫\t名詞,一般,*,*,*,*,猫,ネコ,ネコ\n"
     test += "で\t助動詞,*,*,*,特殊・ダ,連用形,だ,デ,デ\n"
     test += "ある\t助動詞,*,*,*,五段・ラ行アル,基本形,ある,アル,アル\n"
     test += "EOS"
     ma = MecabAnalysys(test)
     ma.load()
     expection = [
                     [
                         {"surface": "吾輩", "base": "吾輩", "pos":"名詞", "pos1":"代名詞"},
                         {"surface": "は", "base": "は", "pos":"助詞", "pos1":"係助詞"},
                     ],
                     [
                         {"surface": "猫", "base": "猫", "pos":"名詞", "pos1":"一般"},
                         {"surface": "で", "base": "だ", "pos":"助動詞", "pos1":"*"},
                         {"surface": "ある", "base": "ある", "pos":"助動詞", "pos1":"*"},
                     ],
                 ]
     self.assertEqual(expection, ma.data)
示例#2
0
 def test_mecab_analysys_to_dict(self):
     """
     一般的な動詞の解析
     """
     test = "入っ\t動詞,自立,*,*,五段・ラ行,連用タ接続,入る,ハイッ,ハイッ"
     ma = MecabAnalysys(test)
     result = ma._to_dict(test)
     expection = {"surface": "入っ", "base": "入る", "pos":"動詞", "pos1":"自立"}
     self.assertEqual(expection, result)
示例#3
0
 def test_mecab_analysys_to_dict_comma(self):
     """
     解析対象にカンマが入ったケース
     """
     test = ",\t名詞,サ変接続,*,*,*,*,*"
     ma = MecabAnalysys(test)
     result = ma._to_dict(test)
     expection = {"surface": ",", "base": "", "pos":"名詞", "pos1":"サ変接続"}
     self.assertEqual(expection, result)
示例#4
0
 def test_mecab_analysys_get_all_morphemes(self):
     """
     data全文の形態素解析結果を1つのリストにまとめて返す
     """
     test = "吾輩\t名詞,代名詞,一般,*,*,*,吾輩,ワガハイ,ワガハイ\n"
     test += "は\t助詞,係助詞,*,*,*,*,は,ハ,ワ\n"
     test += "EOS\n"
     test += "猫\t名詞,一般,*,*,*,*,猫,ネコ,ネコ\n"
     test += "で\t助動詞,*,*,*,特殊・ダ,連用形,だ,デ,デ\n"
     test += "ある\t助動詞,*,*,*,五段・ラ行アル,基本形,ある,アル,アル\n"
     test += "EOS"
     ma = MecabAnalysys(test)
     ma.load()
     expection = [
                     {"surface": "吾輩", "base": "吾輩", "pos":"名詞", "pos1":"代名詞"},
                     {"surface": "は", "base": "は", "pos":"助詞", "pos1":"係助詞"},
                     {"surface": "猫", "base": "猫", "pos":"名詞", "pos1":"一般"},
                     {"surface": "で", "base": "だ", "pos":"助動詞", "pos1":"*"},
                     {"surface": "ある", "base": "ある", "pos":"助動詞", "pos1":"*"},
                 ]
     self.assertEqual(expection, ma.get_all_morphemes())
示例#5
0
# 32. 動詞の原形

# 動詞の原形をすべて抽出せよ.

import os
from mecabAnalysys import MecabAnalysys

if __name__ == "__main__":
    src = os.path.join(os.path.dirname(__file__), r"../Output/Chapter4/neko.txt.mecab")
    with open(src, encoding="utf-8") as f:
        ma = MecabAnalysys("\n".join(f.readlines()))
    ma.load()

    morph = ma.get_all_morphemes()
    verbs = list(filter(lambda x: x["pos"] == "動詞", morph))
    disp = [v["base"] for v in verbs]

    output = os.path.join(os.path.dirname(__file__), r"../Output/Chapter4/q32.txt")
    with open(output, mode="w", encoding="utf-8") as f:
        f.write("\n".join(disp))
示例#6
0
# 30. 形態素解析結果の読み込み

# 形態素解析結果(neko.txt.mecab)を読み込むプログラムを実装せよ.
# ただし,各形態素は表層形(surface),基本形(base),品詞(pos),品詞細分類1(pos1)をキーとするマッピング型に格納し,
# 1文を形態素(マッピング型)のリストとして表現せよ.第4章の残りの問題では,ここで作ったプログラムを活用せよ.

import os
from mecabAnalysys import MecabAnalysys

if __name__ == "__main__":
    src = os.path.join(os.path.dirname(__file__),
                       r"../Output/Chapter4/neko.txt.mecab")
    with open(src, encoding="utf-8") as f:
        ma = MecabAnalysys("\n".join(f.readlines()))
        ma.load()
    print(len(ma.data))
    print(ma.sentence(2))