def test_mecab_analysys_load(self): """ mecabファイルを読み込んで分析結果をdataに保持する """ test = "吾輩\t名詞,代名詞,一般,*,*,*,吾輩,ワガハイ,ワガハイ\n" test += "は\t助詞,係助詞,*,*,*,*,は,ハ,ワ\n" test += "EOS\n" test += "猫\t名詞,一般,*,*,*,*,猫,ネコ,ネコ\n" test += "で\t助動詞,*,*,*,特殊・ダ,連用形,だ,デ,デ\n" test += "ある\t助動詞,*,*,*,五段・ラ行アル,基本形,ある,アル,アル\n" test += "EOS" ma = MecabAnalysys(test) ma.load() expection = [ [ {"surface": "吾輩", "base": "吾輩", "pos":"名詞", "pos1":"代名詞"}, {"surface": "は", "base": "は", "pos":"助詞", "pos1":"係助詞"}, ], [ {"surface": "猫", "base": "猫", "pos":"名詞", "pos1":"一般"}, {"surface": "で", "base": "だ", "pos":"助動詞", "pos1":"*"}, {"surface": "ある", "base": "ある", "pos":"助動詞", "pos1":"*"}, ], ] self.assertEqual(expection, ma.data)
def test_mecab_analysys_to_dict(self): """ 一般的な動詞の解析 """ test = "入っ\t動詞,自立,*,*,五段・ラ行,連用タ接続,入る,ハイッ,ハイッ" ma = MecabAnalysys(test) result = ma._to_dict(test) expection = {"surface": "入っ", "base": "入る", "pos":"動詞", "pos1":"自立"} self.assertEqual(expection, result)
def test_mecab_analysys_to_dict_comma(self): """ 解析対象にカンマが入ったケース """ test = ",\t名詞,サ変接続,*,*,*,*,*" ma = MecabAnalysys(test) result = ma._to_dict(test) expection = {"surface": ",", "base": "", "pos":"名詞", "pos1":"サ変接続"} self.assertEqual(expection, result)
def test_mecab_analysys_get_all_morphemes(self): """ data全文の形態素解析結果を1つのリストにまとめて返す """ test = "吾輩\t名詞,代名詞,一般,*,*,*,吾輩,ワガハイ,ワガハイ\n" test += "は\t助詞,係助詞,*,*,*,*,は,ハ,ワ\n" test += "EOS\n" test += "猫\t名詞,一般,*,*,*,*,猫,ネコ,ネコ\n" test += "で\t助動詞,*,*,*,特殊・ダ,連用形,だ,デ,デ\n" test += "ある\t助動詞,*,*,*,五段・ラ行アル,基本形,ある,アル,アル\n" test += "EOS" ma = MecabAnalysys(test) ma.load() expection = [ {"surface": "吾輩", "base": "吾輩", "pos":"名詞", "pos1":"代名詞"}, {"surface": "は", "base": "は", "pos":"助詞", "pos1":"係助詞"}, {"surface": "猫", "base": "猫", "pos":"名詞", "pos1":"一般"}, {"surface": "で", "base": "だ", "pos":"助動詞", "pos1":"*"}, {"surface": "ある", "base": "ある", "pos":"助動詞", "pos1":"*"}, ] self.assertEqual(expection, ma.get_all_morphemes())
# 32. 動詞の原形 # 動詞の原形をすべて抽出せよ. import os from mecabAnalysys import MecabAnalysys if __name__ == "__main__": src = os.path.join(os.path.dirname(__file__), r"../Output/Chapter4/neko.txt.mecab") with open(src, encoding="utf-8") as f: ma = MecabAnalysys("\n".join(f.readlines())) ma.load() morph = ma.get_all_morphemes() verbs = list(filter(lambda x: x["pos"] == "動詞", morph)) disp = [v["base"] for v in verbs] output = os.path.join(os.path.dirname(__file__), r"../Output/Chapter4/q32.txt") with open(output, mode="w", encoding="utf-8") as f: f.write("\n".join(disp))
# 30. 形態素解析結果の読み込み # 形態素解析結果(neko.txt.mecab)を読み込むプログラムを実装せよ. # ただし,各形態素は表層形(surface),基本形(base),品詞(pos),品詞細分類1(pos1)をキーとするマッピング型に格納し, # 1文を形態素(マッピング型)のリストとして表現せよ.第4章の残りの問題では,ここで作ったプログラムを活用せよ. import os from mecabAnalysys import MecabAnalysys if __name__ == "__main__": src = os.path.join(os.path.dirname(__file__), r"../Output/Chapter4/neko.txt.mecab") with open(src, encoding="utf-8") as f: ma = MecabAnalysys("\n".join(f.readlines())) ma.load() print(len(ma.data)) print(ma.sentence(2))