Пример #1
0
import mecab
import ex36
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties

if __name__ == "__main__":
    morphemesCount = ex36.count(mecab.formatter("neko.txt.mecab"))
    morphemesCount = sorted(morphemesCount.items(), key=lambda x: -x[1])

    x = [i+1 for i in range(len(morphemesCount))]
    y = [m[1] for m in morphemesCount]
    plt.xscale("log")
    plt.yscale("log")
    plt.plot(x, y)

    fp = FontProperties(fname='/Users/yushi/Library/Fonts/IPAfont00303/ipag.ttf', size=14)
    plt.title('両対数グラフ', fontproperties=fp)
    plt.xlabel('出現頻度順位(log)', fontproperties=fp)
    plt.ylabel('出現頻度(log)', fontproperties=fp)

    plt.show()
Пример #2
0
import mecab

morphemes = mecab.formatter("neko.txt.mecab")
result = []
nFlag = False
for morpheme in morphemes:
    if morpheme["pos"] == "名詞":
        if nFlag:
            result[len(result) - 1].append(morpheme["surface"])
        else:
            result.append([morpheme["surface"]])
            nFlag = True
    else:
        nFlag = False
#result = result[:20]
for n in result:
    if len(n) >= 2:
        print("".join(n))
Пример #3
0
import mecab

list = [
    morpheme["base"] for morpheme in mecab.formatter("neko.txt.mecab")
    if morpheme["pos"] == "名詞" and morpheme["pos1"] == "サ変接続"
]
list = list[:20]
for l in list:
    print(l)
Пример #4
0
import mecab

vList = [
    morpheme["surface"] for morpheme in mecab.formatter("neko.txt.mecab")
    if morpheme["pos"] == "動詞"
]
vList = vList[:20]
for v in vList:
    print(v)