Пример #1
0
 def gen_content_ne(self):
     content_tokens = self._pure_tokenize_content()
     names = TextPreprocesser.gen_ner(content_tokens)
     return names
Пример #2
0
 def gen_title_ne(self):
     title_tokens = self._pure_tokenize_title()
     names = TextPreprocesser.gen_ner(title_tokens)
     return names
Пример #3
0
 def gen_tokenized_content(self):
     tokens = self._pure_tokenize_content()
     tokens = TextPreprocesser.filter(tokens)
     return tokens
Пример #4
0
 def gen_tokenized_title(self):
     tokens = self._pure_tokenize_title()
     tokens = TextPreprocesser.filter(tokens)
     return tokens
Пример #5
0
 def _pure_tokenize_content(self):
     if self._tokenized_content is None:
         self._tokenized_content = TextPreprocesser.tokenizor(self.content)
     return self._tokenized_content
Пример #6
0
 def _pure_tokenize_title(self):
     if self._tokenized_title is None:
         self._tokenized_title = TextPreprocesser.tokenizor(self.title)
     return self._tokenized_title