def word_tokenize(text: str) -> List[str]: # or words = nltk.word_tokenize(sample) sentences = NlpProcessor.get_sentences(text) words = [ item for sublist in sentences for item in re.split(r'\s+', sublist) ] return Helpers.remove_empty_list_items(words)
def get_sentences(text: str) -> List[str]: sentences = re.split(r'[.!?]', text) sentences = [x.strip() for x in sentences] return Helpers.remove_empty_list_items(sentences)
def get_sentences(self): if self._sentences is None: self._sentences = Helpers.remove_empty_list_items( NlpProcessor.get_sentences(self._all_text)) return self._sentences