示例#1
0
 def __init__(self, text):
     self.text = text.strip()
     self.sents = [Sentence(x.strip()) for x in Paragraph.sent_tokenizer.tokenize(text) if x.strip()]
     self.pwords = catlist([x.pwords for x in self.sents])
     self.cwords = catlist([x.cwords for x in self.sents])
     self.scwords = catlist([x.scwords for x in self.sents])
     self.pos_tagged = False
示例#2
0
 def __init__(self, text):
     self.text = text.strip()
     tmppars = [x.strip() for x in text.split('\n') if x.strip()]
     self.pars = [Paragraph(x) for x in tmppars]
     self.sents = catlist([x.sents for x in self.pars])
     self.pwords = catlist([x.pwords for x in self.pars])
     self.cwords = catlist([x.cwords for x in self.pars])
     self.scwords = catlist([x.scwords for x in self.pars])
     self.pos_tagged = False
示例#3
0
 def pos_tag(self):
     for x in self.sents:
         x.pos_tag()
     self.nouns = catlist([x.nouns for x in self.sents])
     self.pos_tagged = True