def demo():
    from nltk_lite.corpora import brown
    from itertools import islice
    from pprint import pprint

    pprint(list(islice(brown.raw('a'), 0, 5)))

    pprint(list(islice(brown.tagged('a'), 0, 5)))
示例#2
0
 def learn(self, listofsentences=[], n=2000):
     self.learned = defaultdict(mydict)
     if listofsentences == []:
         listofsentences = brown.raw()
     for i, sent in enumerate(listofsentences):
         if i >= n:  # Limit to the first nth sentences of the corpus
             break
         for word in sent:
             self.learned[self.specialhash(word)][word.lower()] += 1
示例#3
0
def learn(self, listofsentences=[], n=2000):
	self.learned = defaultdict(mydict)
	if listofsentences == []:
		listofsentences = brown.raw()
	for i, sent in enumerate(listofsentences):
		if i >= n: # Limit to the first nth sentences of the corpus
			break
	for word in sent:
		self.learned[self.specialhash(word)][word.lower()] += 1
示例#4
0
def demo():
    import tnt
    from nltk_lite.corpora import brown
    sents = list(brown.tagged())
    test = list(brown.raw())

    # create and train the tagger
    tagger = tnt.Tnt()
    tagger.train(sents[200:1000])

    # tag some data
    tagged_data = tagger.tagdata(test[100:120])

    # print results
    for j in range(len(tagged_data)):
        s = tagged_data[j]
        t = sents[j + 100]
        for i in range(len(s)):
            print s[i], '--', t[i]
        print
示例#5
0
文件: tnt.py 项目: DrDub/icsisumm
def demo():
   import tnt
   from nltk_lite.corpora import brown
   sents = list(brown.tagged())
   test = list(brown.raw())

   # create and train the tagger
   tagger = tnt.Tnt()
   tagger.train(sents[200:1000])

   # tag some data
   tagged_data = tagger.tagdata(test[100:120])

   # print results
   for j in range(len(tagged_data)):
      s = tagged_data[j]
      t = sents[j+100]
      for i in range(len(s)):
         print s[i],'--', t[i]
      print