Пример #1
0
    def to_list_of_tokenized_sentences(self, text):
        """Create list of list of tokens from a text.
        Each list of tokens correspond to a sentence.

        Parameters
        ----------
        text : str

        Returns
        -------
        list of list of strings
        """
        sentences_list = split_message_to_sentences(text)
        tokenized_sentences_list = [
            self.tokenizer._tokenize(sentence) for sentence in sentences_list
            if sentence != ""
        ]
        return tokenized_sentences_list
Пример #2
0
    def to_list_of_tokenized_sentences(self, text):
        """Create list of list of tokens from a text.
        Each list of tokens correspond to a sentence.

        Parameters
        ----------
        text : str

        Returns
        -------
        list of list of strings
        """
        sentences_list = split_message_to_sentences(text)
        tokenized_sentences_list = [nltk.regexp_tokenize(sentence,
                                                         pattern="\w+(?:[\?\-\'\"_]\w+)*")
                                    for sentence in sentences_list
                                    if sentence != ""]
        return tokenized_sentences_list