Exemplos de tagged_words em Python, exemplos de nltk.corpus.indian.tagged_words em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_corpora.py Projeto: Geolem/nltk

 def test_tagged_words(self):
     tagged_words = indian.tagged_words()[:3]
     self.assertEqual(
         tagged_words, [('মহিষের', 'NN'), ('সন্তান', 'NN'), (':', 'SYM')]
     )

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test_corpora.py Projeto: Weiming-Hu/text-based-six-degree

 def test_tagged_words(self):
     tagged_words = indian.tagged_words()[:3]
     self.assertEqual(tagged_words, [('মহিষের', 'NN'), ('সন্তান', 'NN'), (':', 'SYM')])

Exemplo n.º 3

0

Exibir arquivo

Arquivo: nltk_pos_freq_dist.py Projeto: th00masml/Natural-Language-Processing

import nltk
from nltk.corpus import indian

indian_pos = indian.tagged_words()

# Somehow tagset='universal' is not working. Need to check it
tag_fd = nltk.FreqDist(tag for (word, tag) in indian_pos)
print(tag_fd.most_common()[:5])

tag_fd.plot(cumulative=True)

Exemplo n.º 4

0

Exibir arquivo

 def test_tagged_words(self):
     tagged_words = indian.tagged_words()[:3]
     self.assertEqual(tagged_words, [("মহিষের", "NN"), ("সন্তান", "NN"),
                                     (":", "SYM")])

Exemplo n.º 5

0

Exibir arquivo

                 i,
                 format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')


marathi_sent = indian.sents('marathi_pos_rad_3NOV17.pos')
mpos = indian.tagged_sents('marathi_pos_rad_3NOV17.pos')
mp = shuffle(mpos)
size = int(len(marathi_sent) * 0.8)
tags = [
    tag for (word, tag) in indian.tagged_words('marathi_pos_rad_3NOV17.pos')
]
print(np.unique(tags))
#print("no. of tags=",len(nltk.FreqDist(tags)))
defaultTag = nltk.FreqDist(tags).max()

#print(defaultTag)
train_sents = mp[:size]
#print(len(train_sents))
test_sents = mp[size:]

print(marathi_sent[0])
trainFeatures, trainLabels = transformDataset(train_sents)

testFeatures, testLabels = transformDataset(test_sents)
print("lengths of features")

Exemplo n.º 6

0

Exibir arquivo

                i] == '':  #In case there are no matching entries between the transition tags and emission tags, we choose the most frequent emission tag
            output_li[i] = max(di_emission_probs, key=itemgetter(1))[0]

    return output_li


#tup = fn_train()
#dict2_tag_follow_tag_ = tup[0]
#dict2_word_tag = tup[1]
#dict_word_tag_baseline = tup[2]

if __name__ == "__main__":

    k = 5
    #to shuffle sentences
    mp = indian.tagged_words('marathi_pos_rad_3NOV17.pos')
    marathi_sent = shuffle(mp)
    print("length of tagged words=", len(marathi_sent))
    size = int(len(marathi_sent) * 0.8)
    print("size=", size)
    mtrain1 = marathi_sent[:size]
    print("len of mtrain=", len(mtrain1))
    test = marathi_sent[size:]
    print("len of mtrain=", len(test))
    # without shufle
    #marathi_sent= indian.tagged_words('marathi_pos_rad_3NOV17.pos')
    r = len(mtrain1) / k
    l = len(mtrain1)
    score = []
    for i in range(k):
        test_set = mtrain1[int(r * i):int(r * i + r)]

Exemplo n.º 7

0

Exibir arquivo

Arquivo: test_corpora.py Projeto: GloriousFt/TextBlob

 def test_tagged_words(self):
     tagged_words = indian.tagged_words()[:3]
     self.assertEqual(tagged_words, [("মহিষের", "NN"), ("সন্তান", "NN"), (":", "SYM")])

Exemplo n.º 8

0

Exibir arquivo

# nltk.download('state_union')
print(nltk.corpus.state_union.words())
# nltk.download('webtext')
print(nltk.corpus.webtext.words())
# tagged corpora
print(brown.words())
print(brown.tagged_words())
print(brown.sents())  # doctest: +ELLIPSIS
print(brown.tagged_sents())  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
print(brown.paras(
    categories='reviews'))  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
print(brown.tagged_paras(
    categories='reviews'))  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
# nltk.download('indian')
print(indian.words())  # doctest: +SKIP
print(indian.tagged_words())  # doctest: +SKIP
# nltk.download('universal_tagset')
print(brown.tagged_sents(
    tagset='universal'))  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
print(conll2000.tagged_words(
    tagset='universal'))  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
# chunked corpora
print(conll2000.sents())  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
for tree in conll2000.chunked_sents()[:2]:
    print(tree)  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
# nltk.download('conll2002')
print(conll2002.sents())  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
for tree in conll2002.chunked_sents()[:2]:
    print(tree)  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
# nltk.download('semcor')
print(semcor.words())