def to_list_of_tokenized_sentences(self, text): """Create list of list of tokens from a text. Each list of tokens correspond to a sentence. Parameters ---------- text : str Returns ------- list of list of strings """ sentences_list = split_message_to_sentences(text) tokenized_sentences_list = [ self.tokenizer._tokenize(sentence) for sentence in sentences_list if sentence != "" ] return tokenized_sentences_list
def to_list_of_tokenized_sentences(self, text): """Create list of list of tokens from a text. Each list of tokens correspond to a sentence. Parameters ---------- text : str Returns ------- list of list of strings """ sentences_list = split_message_to_sentences(text) tokenized_sentences_list = [nltk.regexp_tokenize(sentence, pattern="\w+(?:[\?\-\'\"_]\w+)*") for sentence in sentences_list if sentence != ""] return tokenized_sentences_list