class EightBitTests(unittest.TestCase): data = 'the quick brown fox jumps over the lazy dog and i am so proud' words = ('i','am','so','lazy') def setUp(self): self._sw = Stopwords('en', self.words) verifyClass(StopwordsInterface, self._sw.__class__) def testSimple(self): self.assertEqual( self._sw.getLanguage(), 'en') self.assertEqual( len(self._sw.getStopWords()), len(self.words) ) for w in self.words: self.assertEqual(self._sw.isStopWord(w.lower()), 1) self.assertEqual(self._sw.isStopWord(w.upper()), 1) self.assertEqual(self._sw.isStopWord(w.capitalize()), 1) def testNonStopwords(self): self.assertEqual(self._sw.isStopWord('foo'), 0) self.assertEqual(self._sw.isStopWord('bar'), 0) def testFilter1(self): data = '' expected = [ w for w in data.split() if w not in self.words] self.assertEqual(self._sw.process(data.split()), expected) def testFilter2(self): data = 'the quick brown fox jumps over the lazy dog and i am so proud' expected = [ w for w in data.split() if w not in self.words] self.assertEqual(self._sw.process(data.split()), expected)
def setUp(self): self._sw = Stopwords('en', self.words) verifyClass(StopwordsInterface, self._sw.__class__)