示例#1
0
 def word_tokenize(text: str) -> List[str]:
     # or words = nltk.word_tokenize(sample)
     sentences = NlpProcessor.get_sentences(text)
     words = [
         item for sublist in sentences
         for item in re.split(r'\s+', sublist)
     ]
     return Helpers.remove_empty_list_items(words)
示例#2
0
 def get_sentences(text: str) -> List[str]:
     sentences = re.split(r'[.!?]', text)
     sentences = [x.strip() for x in sentences]
     return Helpers.remove_empty_list_items(sentences)
示例#3
0
    def get_sentences(self):
        if self._sentences is None:
            self._sentences = Helpers.remove_empty_list_items(
                NlpProcessor.get_sentences(self._all_text))

        return self._sentences