Python StopWordsManager示例，chatterbot.utils.stop_words.StopWordsManager Python示例

示例#1

0

显示文件

文件： test_utils.py 项目： Anhmike/ChatterBot

    def test_remove_stop_words(self):
        stopwords_manager = StopWordsManager()

        tokens = ['this', 'is', 'a', 'test', 'string']
        words = stopwords_manager.remove_stopwords('english', tokens)

        # This example list of words should end up with only two elements
        self.assertEqual(len(words), 2)
        self.assertIn('test', list(words))
        self.assertIn('string', list(words))

示例#2

0

显示文件

文件： test_utils.py 项目： zipwu/ChatterBot

    def test_remove_stop_words(self):
        stopwords_manager = StopWordsManager()

        tokens = ['this', 'is', 'a', 'test', 'string']
        words = stopwords_manager.remove_stopwords('english', tokens)

        # This example list of words should end up with only two elements
        self.assertEqual(len(words), 2)
        self.assertIn('test', list(words))
        self.assertIn('string', list(words))

示例#3

0

显示文件

文件： comparisons.py 项目： xuh5156/ChatterBot

def synset_distance(statement, other_statement):
    """
    Calculate the similarity of two statements.
    This is based on the total similarity between
    each word in each sentence.
    """
    from chatterbot.utils.pos_tagger import POSTagger
    from chatterbot.utils.stop_words import StopWordsManager
    from chatterbot.utils.word_net import Wordnet
    import itertools

    wordnet = Wordnet()
    tagger = POSTagger()
    stopwords = StopWordsManager()

    def get_tokens(text, exclude_stop_words=True):
        """
        Takes a string and converts it to a tuple
        of each word. Skips common stop words such
        as ("is, the, a, ...") is 'exclude_stop_words'
        is True.
        """
        lower = text.lower()
        tokens = tagger.tokenize(lower)

        # Remove any stop words from the string
        if exclude_stop_words:
            excluded_words = stopwords.words('english')

            tokens = set(tokens) - set(excluded_words)

        return tokens

    tokens1 = get_tokens(statement.text)
    tokens2 = get_tokens(other_statement.text)

    total_similarity = 0

    # Get the highest matching value for each possible combination of words
    for combination in itertools.product(*[tokens1, tokens2]):

        synset1 = wordnet.synsets(combination[0])
        synset2 = wordnet.synsets(combination[1])

        if synset1 and synset2:

            max_similarity = 0

            # Get the highest similarity for each combination of synsets
            for synset in itertools.product(*[synset1, synset2]):
                similarity = synset[0].path_similarity(synset[1])

                if similarity and (similarity > max_similarity):
                    max_similarity = similarity

            # Add the most similar path value to the total
            total_similarity += max_similarity

    return total_similarity

示例#4

0

显示文件

文件： tokenizer.py 项目： datonli/SelfThinkingRobot

    def get_tokens(self, text, language='english', exclude_stop_words=True):
        """
        Takes a string and converts it to a tuple of each word.
        Skips common stop words such as ("is, the, a, ...")
        if 'exclude_stop_words' is True.
        """
        from chatterbot.utils.stop_words import StopWordsManager
        from nltk import word_tokenize

        stopwords = StopWordsManager()
        tokens = word_tokenize(text.lower())

        # Remove all stop words from the list of word tokens
        if exclude_stop_words:
            tokens = stopwords.remove_stopwords(language, tokens)

        return tokens

示例#5

0

显示文件

文件： tokenizer.py 项目： AugustoQueiroz/ChatterBot

    def get_tokens(self, text, language='english', exclude_stop_words=True):
        """
        Takes a string and converts it to a tuple of each word.
        Skips common stop words such as ("is, the, a, ...")
        if 'exclude_stop_words' is True.
        """
        from chatterbot.utils.stop_words import StopWordsManager
        from nltk import word_tokenize

        stopwords = StopWordsManager()
        tokens = word_tokenize(text.lower())

        # Remove all stop words from the list of word tokens
        if exclude_stop_words:
            tokens = stopwords.remove_stopwords(language, tokens)

        return tokens

示例#6

0

显示文件

    def __init__(self, **kwargs):
        super(DeveloperAssistant, self).__init__(**kwargs)

        # Initializing variables
        self.program_data = {"name": "", "path": ""}
        self.stage = ""
        self.data_dir = ""
        self.data = self.read_program_file()

        self.stopwords = StopWordsManager()
        self.tagger = POSTagger()
        self.conversation = []

示例#7

0

显示文件

class StopWordsTestCase(TestCase):
    def setUp(self):
        super(StopWordsTestCase, self).setUp()
        from chatterbot.utils.stop_words import StopWordsManager

        self.stopwords_manager = StopWordsManager()

    def test_remove_stop_words(self):
        tokens = ['this', 'is', 'a', 'test', 'string']
        words = self.stopwords_manager.remove_stopwords('english', tokens)

        # This example list of words should end up with only two elements
        self.assertEqual(len(words), 2)
        self.assertIn('test', list(words))
        self.assertIn('string', list(words))

示例#8

0

显示文件

文件： test_utils.py 项目： AugustoQueiroz/ChatterBot

class StopWordsTestCase(TestCase):

    def setUp(self):
        super(StopWordsTestCase, self).setUp()
        from chatterbot.utils.stop_words import StopWordsManager

        self.stopwords_manager = StopWordsManager()

    def test_remove_stop_words(self):
        tokens = ['this', 'is', 'a', 'test', 'string']
        words = self.stopwords_manager.remove_stopwords('english', tokens)

        # This example list of words should end up with only two elements
        self.assertEqual(len(words), 2)
        self.assertIn('test', list(words))
        self.assertIn('string', list(words))

示例#9

0

显示文件

文件： closest_meaning.py 项目： fmoliveira/ChatterBot

class ClosestMeaningAdapter(BaseMatchAdapter):

    def __init__(self, **kwargs):
        super(ClosestMeaningAdapter, self).__init__(**kwargs)

        self.wordnet = Wordnet()
        self.tagger = POSTagger()
        self.stopwords = StopWordsManager()

    def get_tokens(self, text, exclude_stop_words=True):
        """
        Takes a string and converts it to a tuple
        of each word. Skips common stop words such
        as ("is, the, a, ...") is 'exclude_stop_words'
        is True.
        """
        lower = text.lower()
        tokens = self.tagger.tokenize(lower)

        # Remove any stop words from the string
        if exclude_stop_words:
            excluded_words = self.stopwords.words("english")

            tokens = set(tokens) - set(excluded_words)

        return tokens

    def get_similarity(self, string1, string2):
        """
        Calculate the similarity of two statements.
        This is based on the total similarity between
        each word in each sentence.
        """
        import itertools

        tokens1 = self.get_tokens(string1)
        tokens2 = self.get_tokens(string2)

        total_similarity = 0

        # Get the highest matching value for each possible combination of words
        for combination in itertools.product(*[tokens1, tokens2]):

            synset1 = self.wordnet.synsets(combination[0])
            synset2 = self.wordnet.synsets(combination[1])

            if synset1 and synset2:

                # Compare the first synset in each list of synsets
                similarity = synset1[0].path_similarity(synset2[0])

                if similarity:
                    total_similarity = total_similarity + similarity

        return total_similarity

    def get(self, input_statement, statement_list=None):
        """
        Takes a statement string and a list of statement strings.
        Returns the closest matching statement from the list.
        """
        statement_list = self.get_available_statements(statement_list)

        if not statement_list:
            if self.has_storage_context:
                # Use a randomly picked statement
                return 0, self.context.storage.get_random()
            else:
                raise EmptyDatasetException

        # Get the text of each statement
        text_of_all_statements = []
        for statement in statement_list:
            text_of_all_statements.append(statement.text)

        # Check if an exact match exists
        if input_statement.text in text_of_all_statements:
            return 1, input_statement

        closest_statement = None
        closest_similarity = -1
        total_similarity = 0

        # For each option in the list of options
        for statement in text_of_all_statements:
            similarity = self.get_similarity(input_statement.text, statement)

            total_similarity += similarity

            if similarity > closest_similarity:
                closest_similarity = similarity
                closest_statement = statement

        try:
            confidence = closest_similarity / total_similarity
        except:
            confidence = 0

        return confidence, next(
            (s for s in statement_list if s.text == closest_statement), None
        )

示例#10

0

显示文件

文件： closest_meaning.py 项目： fmoliveira/ChatterBot

    def __init__(self, **kwargs):
        super(ClosestMeaningAdapter, self).__init__(**kwargs)

        self.wordnet = Wordnet()
        self.tagger = POSTagger()
        self.stopwords = StopWordsManager()

示例#11

0

显示文件

    def setUp(self):
        super(StopWordsTestCase, self).setUp()
        from chatterbot.utils.stop_words import StopWordsManager

        self.stopwords_manager = StopWordsManager()

示例#12

0

显示文件

class ClosestMeaningAdapter(BaseMatchAdapter):
    """
    This adapter selects a response by comparing the tokenized form of the
    input statement's text, with the tokenized form of possible matching
    statements. For each possible match, the sum of the Cartesian product of
    the path similarity of each statement is compared. This process simulates
    an evaluation of the closeness of synonyms. The known statement with the
    greatest path similarity is then returned.
    """
    def __init__(self, **kwargs):
        super(ClosestMeaningAdapter, self).__init__(**kwargs)

        self.wordnet = Wordnet()
        self.tagger = POSTagger()
        self.stopwords = StopWordsManager()

    def get_tokens(self, text, exclude_stop_words=True):
        """
        Takes a string and converts it to a tuple
        of each word. Skips common stop words such
        as ("is, the, a, ...") is 'exclude_stop_words'
        is True.
        """
        lower = text.lower()
        tokens = self.tagger.tokenize(lower)

        # Remove any stop words from the string
        if exclude_stop_words:
            excluded_words = self.stopwords.words("english")

            tokens = set(tokens) - set(excluded_words)

        return tokens

    def get_similarity(self, string1, string2):
        """
        Calculate the similarity of two statements.
        This is based on the total similarity between
        each word in each sentence.
        """
        import itertools

        tokens1 = self.get_tokens(string1)
        tokens2 = self.get_tokens(string2)

        total_similarity = 0

        # Get the highest matching value for each possible combination of words
        for combination in itertools.product(*[tokens1, tokens2]):

            synset1 = self.wordnet.synsets(combination[0])
            synset2 = self.wordnet.synsets(combination[1])

            if synset1 and synset2:

                max_similarity = 0

                # Get the highest similarity for each combination of synsets
                for synset in itertools.product(*[synset1, synset2]):
                    similarity = synset[0].path_similarity(synset[1])

                    if similarity and (similarity > max_similarity):
                        max_similarity = similarity

                # Add the most similar path value to the total
                total_similarity += max_similarity

        return total_similarity

    def get(self, input_statement):
        """
        Takes a statement string and a list of statement strings.
        Returns the closest matching statement from the list.
        """
        statement_list = self.context.storage.get_response_statements()

        if not statement_list:
            if self.has_storage_context:
                # Use a randomly picked statement
                return 0, self.context.storage.get_random()
            else:
                raise self.EmptyDatasetException()

        # Get the text of each statement
        text_of_all_statements = []
        for statement in statement_list:
            text_of_all_statements.append(statement.text)

        # Check if an exact match exists
        if input_statement.text in text_of_all_statements:
            return 1, input_statement

        closest_statement = None
        closest_similarity = -1
        total_similarity = 0

        # For each option in the list of options
        for statement in text_of_all_statements:
            similarity = self.get_similarity(input_statement.text, statement)

            total_similarity += similarity

            if similarity > closest_similarity:
                closest_similarity = similarity
                closest_statement = statement

        try:
            confidence = closest_similarity / total_similarity
        except:
            confidence = 0

        return confidence, next(
            (s for s in statement_list if s.text == closest_statement), None)

示例#13

0

显示文件

    def __init__(self, **kwargs):
        super(ClosestMeaningAdapter, self).__init__(**kwargs)

        self.wordnet = Wordnet()
        self.tagger = POSTagger()
        self.stopwords = StopWordsManager()

示例#14

0

显示文件

文件： test_utils.py 项目： johndpope/SAAI

    def test_stop_words(self):
        stopwords_manager = StopWordsManager()
        words = stopwords_manager.words("english")
        test_case = set(["too"]) - set(words)

        self.assertEqual(test_case, set([]))

示例#15

0

显示文件

文件： closest_meaning.py 项目： Endika/ChatterBot

class ClosestMeaningAdapter(BaseMatchAdapter):
    def __init__(self, **kwargs):
        super(ClosestMeaningAdapter, self).__init__(**kwargs)

        self.wordnet = Wordnet()
        self.tagger = POSTagger()
        self.stopwords = StopWordsManager()

    def get_tokens(self, text, exclude_stop_words=True):
        """
        Takes a string and converts it to a tuple
        of each word. Skips common stop words such
        as ("is, the, a, ...") is 'exclude_stop_words'
        is True.
        """
        lower = text.lower()
        tokens = self.tagger.tokenize(lower)

        # Remove any stop words from the string
        if exclude_stop_words:
            excluded_words = self.stopwords.words("english")

            tokens = set(tokens) - set(excluded_words)

        return tokens

    def get_similarity(self, string1, string2):
        """
        Calculate the similarity of two statements.
        This is based on the total similarity between
        each word in each sentence.
        """
        import itertools

        tokens1 = self.get_tokens(string1)
        tokens2 = self.get_tokens(string2)

        total_similarity = 0

        # Get the highest matching value for each possible combination of words
        for combination in itertools.product(*[tokens1, tokens2]):

            synset1 = self.wordnet.synsets(combination[0])
            synset2 = self.wordnet.synsets(combination[1])

            if synset1 and synset2:

                # Compare the first synset in each list of synsets
                similarity = synset1[0].path_similarity(synset2[0])

                if similarity:
                    total_similarity = total_similarity + similarity

        return total_similarity

    def get(self, input_statement, statement_list=None):
        """
        Takes a statement string and a list of statement strings.
        Returns the closest matching statement from the list.
        """
        statement_list = self.get_available_statements(statement_list)

        if not statement_list:
            if self.has_storage_context:
                # Use a randomly picked statement
                return 0, self.context.storage.get_random()
            else:
                raise EmptyDatasetException

        # Get the text of each statement
        text_of_all_statements = []
        for statement in statement_list:
            text_of_all_statements.append(statement.text)

        # Check if an exact match exists
        if input_statement.text in text_of_all_statements:
            return 1, input_statement

        closest_statement = None
        closest_similarity = -1
        total_similarity = 0

        # For each option in the list of options
        for statement in text_of_all_statements:
            similarity = self.get_similarity(input_statement.text, statement)

            total_similarity += similarity

            if similarity > closest_similarity:
                closest_similarity = similarity
                closest_statement = statement

        try:
            confidence = closest_similarity / total_similarity
        except:
            confidence = 0

        return confidence, next(
            (s for s in statement_list if s.text == closest_statement), None)

示例#16

0

显示文件

文件： test_utils.py 项目： AugustoQueiroz/ChatterBot

    def setUp(self):
        super(StopWordsTestCase, self).setUp()
        from chatterbot.utils.stop_words import StopWordsManager

        self.stopwords_manager = StopWordsManager()

示例#17

0

显示文件

文件： closest_meaning.py 项目： Indirabh/ChatterBot

class ClosestMeaningAdapter(BaseMatchAdapter):
    """
    This adapter selects a response by comparing the tokenized form of the
    input statement's text, with the tokenized form of possible matching
    statements. For each possible match, the sum of the Cartesian product of
    the path similarity of each statement is compared. This process simulates
    an evaluation of the closeness of synonyms. The known statement with the
    greatest path similarity is then returned.
    """

    def __init__(self, **kwargs):
        super(ClosestMeaningAdapter, self).__init__(**kwargs)

        self.wordnet = Wordnet()
        self.tagger = POSTagger()
        self.stopwords = StopWordsManager()

    def get_tokens(self, text, exclude_stop_words=True):
        """
        Takes a string and converts it to a tuple
        of each word. Skips common stop words such
        as ("is, the, a, ...") is 'exclude_stop_words'
        is True.
        """
        lower = text.lower()
        tokens = self.tagger.tokenize(lower)

        # Remove any stop words from the string
        if exclude_stop_words:
            excluded_words = self.stopwords.words("english")

            tokens = set(tokens) - set(excluded_words)

        return tokens

    def get_similarity(self, string1, string2):
        """
        Calculate the similarity of two statements.
        This is based on the total similarity between
        each word in each sentence.
        """
        import itertools

        tokens1 = self.get_tokens(string1)
        tokens2 = self.get_tokens(string2)

        total_similarity = 0

        # Get the highest matching value for each possible combination of words
        for combination in itertools.product(*[tokens1, tokens2]):

            synset1 = self.wordnet.synsets(combination[0])
            synset2 = self.wordnet.synsets(combination[1])

            if synset1 and synset2:

                max_similarity = 0

                # Get the highest similarity for each combination of synsets
                for synset in itertools.product(*[synset1, synset2]):
                    similarity = synset[0].path_similarity(synset[1])

                    if similarity and (similarity > max_similarity):
                        max_similarity = similarity

                # Add the most similar path value to the total
                total_similarity += max_similarity

        return total_similarity

    def get(self, input_statement):
        """
        Takes a statement string and a list of statement strings.
        Returns the closest matching statement from the list.
        """
        statement_list = self.context.storage.get_response_statements()

        if not statement_list:
            if self.has_storage_context:
                # Use a randomly picked statement
                return 0, self.context.storage.get_random()
            else:
                raise self.EmptyDatasetException()

        # Get the text of each statement
        text_of_all_statements = []
        for statement in statement_list:
            text_of_all_statements.append(statement.text)

        # Check if an exact match exists
        if input_statement.text in text_of_all_statements:
            return 1, input_statement

        closest_statement = None
        closest_similarity = -1
        total_similarity = 0

        # For each option in the list of options
        for statement in text_of_all_statements:
            similarity = self.get_similarity(input_statement.text, statement)

            total_similarity += similarity

            if similarity > closest_similarity:
                closest_similarity = similarity
                closest_statement = statement

        try:
            confidence = closest_similarity / total_similarity
        except:
            confidence = 0

        return confidence, next(
            (s for s in statement_list if s.text == closest_statement), None
        )

示例#18

0

显示文件

文件： test_utils.py 项目： Byunglim/ChatterBot

    def test_stop_words(self):
        stopwords_manager = StopWordsManager()
        words = stopwords_manager.words("english")
        test_case = set(["too"]) - set(words)

        self.assertEqual(test_case, set([]))