import mecab import ex36 import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties if __name__ == "__main__": morphemesCount = ex36.count(mecab.formatter("neko.txt.mecab")) morphemesCount = sorted(morphemesCount.items(), key=lambda x: -x[1]) x = [i+1 for i in range(len(morphemesCount))] y = [m[1] for m in morphemesCount] plt.xscale("log") plt.yscale("log") plt.plot(x, y) fp = FontProperties(fname='/Users/yushi/Library/Fonts/IPAfont00303/ipag.ttf', size=14) plt.title('両対数グラフ', fontproperties=fp) plt.xlabel('出現頻度順位(log)', fontproperties=fp) plt.ylabel('出現頻度(log)', fontproperties=fp) plt.show()
import mecab morphemes = mecab.formatter("neko.txt.mecab") result = [] nFlag = False for morpheme in morphemes: if morpheme["pos"] == "名詞": if nFlag: result[len(result) - 1].append(morpheme["surface"]) else: result.append([morpheme["surface"]]) nFlag = True else: nFlag = False #result = result[:20] for n in result: if len(n) >= 2: print("".join(n))
import mecab list = [ morpheme["base"] for morpheme in mecab.formatter("neko.txt.mecab") if morpheme["pos"] == "名詞" and morpheme["pos1"] == "サ変接続" ] list = list[:20] for l in list: print(l)
import mecab vList = [ morpheme["surface"] for morpheme in mecab.formatter("neko.txt.mecab") if morpheme["pos"] == "動詞" ] vList = vList[:20] for v in vList: print(v)