def test_remove_stop_words(self): stopwords_manager = StopWordsManager() tokens = ['this', 'is', 'a', 'test', 'string'] words = stopwords_manager.remove_stopwords('english', tokens) # This example list of words should end up with only two elements self.assertEqual(len(words), 2) self.assertIn('test', list(words)) self.assertIn('string', list(words))
class StopWordsTestCase(TestCase): def setUp(self): super(StopWordsTestCase, self).setUp() from chatterbot.utils.stop_words import StopWordsManager self.stopwords_manager = StopWordsManager() def test_remove_stop_words(self): tokens = ['this', 'is', 'a', 'test', 'string'] words = self.stopwords_manager.remove_stopwords('english', tokens) # This example list of words should end up with only two elements self.assertEqual(len(words), 2) self.assertIn('test', list(words)) self.assertIn('string', list(words))
def get_tokens(self, text, language='english', exclude_stop_words=True): """ Takes a string and converts it to a tuple of each word. Skips common stop words such as ("is, the, a, ...") if 'exclude_stop_words' is True. """ from chatterbot.utils.stop_words import StopWordsManager from nltk import word_tokenize stopwords = StopWordsManager() tokens = word_tokenize(text.lower()) # Remove all stop words from the list of word tokens if exclude_stop_words: tokens = stopwords.remove_stopwords(language, tokens) return tokens