示例#1
0
# <codecell>

# Deal with suffixes
for sent in nltk.sent_tokenize(text):
    for word in nltk.word_tokenize(sent):
        word = word.lower()
        print wordnet.morphy(word) or word

# <codecell>

from metanl import english

# <codecell>

# Deal with even more suffixes
for word in english.normalize_list(text):
    print word

# <codecell>



########NEW FILE########
__FILENAME__ = 2 - Interesting n-grams
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>

# <codecell>

from nltk.book import *
示例#2
0
text = 'この文も、言葉で構成されています'
# Translation: "This sentence is also made of words"

# <codecell>

for word in nltk.word_tokenize(text): print(word)

# <codecell>

from metanl import japanese

# <codecell>

for word in japanese.normalize_list(text): print(word)

# <codecell>

text2 = 'You might be wondering whether we can deal with suffixes in English'

# <codecell>

from metanl import english

# <codecell>

english.normalize_list(text2)

# <codecell>