Python StopWordsManager.StopWordsManager示例

编程语言: Python

命名空间/包名称: chatterbot.utils.stop_words

类/类型: StopWordsManager

方法/功能: StopWordsManager

hotexamples.com的示例: 7

Python StopWordsManager.StopWordsManager - 已找到7个示例。这些是从开源项目中提取的最受好评的chatterbot.utils.stop_words.StopWordsManager.StopWordsManager现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

StopWordsManager(7)

remove_stopwords(3)

words(3)

示例#1

显示文件

文件： comparisons.py 项目： xuh5156/ChatterBot

def synset_distance(statement, other_statement):
    """
    Calculate the similarity of two statements.
    This is based on the total similarity between
    each word in each sentence.
    """
    from chatterbot.utils.pos_tagger import POSTagger
    from chatterbot.utils.stop_words import StopWordsManager
    from chatterbot.utils.word_net import Wordnet
    import itertools

    wordnet = Wordnet()
    tagger = POSTagger()
    stopwords = StopWordsManager()

    def get_tokens(text, exclude_stop_words=True):
        """
        Takes a string and converts it to a tuple
        of each word. Skips common stop words such
        as ("is, the, a, ...") is 'exclude_stop_words'
        is True.
        """
        lower = text.lower()
        tokens = tagger.tokenize(lower)

        # Remove any stop words from the string
        if exclude_stop_words:
            excluded_words = stopwords.words('english')

            tokens = set(tokens) - set(excluded_words)

        return tokens

    tokens1 = get_tokens(statement.text)
    tokens2 = get_tokens(other_statement.text)

    total_similarity = 0

    # Get the highest matching value for each possible combination of words
    for combination in itertools.product(*[tokens1, tokens2]):

        synset1 = wordnet.synsets(combination[0])
        synset2 = wordnet.synsets(combination[1])

        if synset1 and synset2:

            max_similarity = 0

            # Get the highest similarity for each combination of synsets
            for synset in itertools.product(*[synset1, synset2]):
                similarity = synset[0].path_similarity(synset[1])

                if similarity and (similarity > max_similarity):
                    max_similarity = similarity

            # Add the most similar path value to the total
            total_similarity += max_similarity

    return total_similarity

示例#2

显示文件

文件： test_utils.py 项目： zipwu/ChatterBot

    def test_remove_stop_words(self):
        stopwords_manager = StopWordsManager()

        tokens = ['this', 'is', 'a', 'test', 'string']
        words = stopwords_manager.remove_stopwords('english', tokens)

        # This example list of words should end up with only two elements
        self.assertEqual(len(words), 2)
        self.assertIn('test', list(words))
        self.assertIn('string', list(words))

示例#3

显示文件

    def __init__(self, **kwargs):
        super(DeveloperAssistant, self).__init__(**kwargs)

        # Initializing variables
        self.program_data = {"name": "", "path": ""}
        self.stage = ""
        self.data_dir = ""
        self.data = self.read_program_file()

        self.stopwords = StopWordsManager()
        self.tagger = POSTagger()
        self.conversation = []

示例#4

显示文件

文件： tokenizer.py 项目： datonli/SelfThinkingRobot

    def get_tokens(self, text, language='english', exclude_stop_words=True):
        """
        Takes a string and converts it to a tuple of each word.
        Skips common stop words such as ("is, the, a, ...")
        if 'exclude_stop_words' is True.
        """
        from chatterbot.utils.stop_words import StopWordsManager
        from nltk import word_tokenize

        stopwords = StopWordsManager()
        tokens = word_tokenize(text.lower())

        # Remove all stop words from the list of word tokens
        if exclude_stop_words:
            tokens = stopwords.remove_stopwords(language, tokens)

        return tokens

示例#5

显示文件

    def setUp(self):
        super(StopWordsTestCase, self).setUp()
        from chatterbot.utils.stop_words import StopWordsManager

        self.stopwords_manager = StopWordsManager()

示例#6

显示文件

    def __init__(self, **kwargs):
        super(ClosestMeaningAdapter, self).__init__(**kwargs)

        self.wordnet = Wordnet()
        self.tagger = POSTagger()
        self.stopwords = StopWordsManager()

示例#7

显示文件

文件： test_utils.py 项目： johndpope/SAAI

    def test_stop_words(self):
        stopwords_manager = StopWordsManager()
        words = stopwords_manager.words("english")
        test_case = set(["too"]) - set(words)

        self.assertEqual(test_case, set([]))