def test_RemoveUrls(self): # set up teststring = 'I like cats cats.org' unigrams = ['i', 'like', 'cats'] # test testtokenize = TokenizeOnWhitespacePunctuation(teststring) self.assertEqual(unigrams, testtokenize.getUnigrams())
def test_TokenizeOnWhitespacePunctuationUnigrams(self): # set up teststring = 'I like cats and birds.' unigrams = ['i', 'like', 'cats', 'and', 'birds'] # test testtokenize = TokenizeOnWhitespacePunctuation(teststring) self.assertEqual(unigrams, testtokenize.getUnigrams())
def test_UnigramsApplyStopwords(self): # set up teststring = 'I like cats and birds.' unigrams = ['cats', 'birds'] # test testtokenize = TokenizeOnWhitespacePunctuation(teststring, applyStopwords=True) self.assertEqual(unigrams, testtokenize.getUnigrams())