def demo(): from nltk_lite.corpora import brown from itertools import islice from pprint import pprint pprint(list(islice(brown.raw('a'), 0, 5))) pprint(list(islice(brown.tagged('a'), 0, 5)))
def learn(self, listofsentences=[], n=2000): self.learned = defaultdict(mydict) if listofsentences == []: listofsentences = brown.raw() for i, sent in enumerate(listofsentences): if i >= n: # Limit to the first nth sentences of the corpus break for word in sent: self.learned[self.specialhash(word)][word.lower()] += 1
def demo(): import tnt from nltk_lite.corpora import brown sents = list(brown.tagged()) test = list(brown.raw()) # create and train the tagger tagger = tnt.Tnt() tagger.train(sents[200:1000]) # tag some data tagged_data = tagger.tagdata(test[100:120]) # print results for j in range(len(tagged_data)): s = tagged_data[j] t = sents[j + 100] for i in range(len(s)): print s[i], '--', t[i] print
def demo(): import tnt from nltk_lite.corpora import brown sents = list(brown.tagged()) test = list(brown.raw()) # create and train the tagger tagger = tnt.Tnt() tagger.train(sents[200:1000]) # tag some data tagged_data = tagger.tagdata(test[100:120]) # print results for j in range(len(tagged_data)): s = tagged_data[j] t = sents[j+100] for i in range(len(s)): print s[i],'--', t[i] print