Пример #1
0
    def test_RemoveUrls(self):
        # set up
        teststring = 'I like cats cats.org'
        unigrams = ['i', 'like', 'cats']

        # test
        testtokenize = TokenizeOnWhitespacePunctuation(teststring)
        self.assertEqual(unigrams, testtokenize.getUnigrams())
Пример #2
0
    def test_TokenizeOnWhitespacePunctuationUnigrams(self):
        # set up
        teststring = 'I like cats and birds.'
        unigrams = ['i', 'like', 'cats', 'and', 'birds']

        # test
        testtokenize = TokenizeOnWhitespacePunctuation(teststring)
        self.assertEqual(unigrams, testtokenize.getUnigrams())
    def test_RemoveUrls(self):
        # set up
        teststring = 'I like cats cats.org'
        unigrams = ['i', 'like', 'cats']

        # test
        testtokenize = TokenizeOnWhitespacePunctuation(teststring)
        self.assertEqual(unigrams, testtokenize.getUnigrams())
    def test_TokenizeOnWhitespacePunctuationUnigrams(self):
        # set up
        teststring = 'I like cats and birds.'
        unigrams = ['i', 'like', 'cats', 'and', 'birds']

        # test
        testtokenize = TokenizeOnWhitespacePunctuation(teststring)
        self.assertEqual(unigrams, testtokenize.getUnigrams())
    def test_UnigramsApplyStopwords(self):
        # set up
        teststring = 'I like cats and birds.'
        unigrams = ['cats', 'birds']

        # test
        testtokenize = TokenizeOnWhitespacePunctuation(teststring, applyStopwords=True)
        self.assertEqual(unigrams, testtokenize.getUnigrams())
Пример #6
0
    def test_UnigramsApplyStopwords(self):
        # set up
        teststring = 'I like cats and birds.'
        unigrams = ['cats', 'birds']

        # test
        testtokenize = TokenizeOnWhitespacePunctuation(teststring,
                                                       applyStopwords=True)
        self.assertEqual(unigrams, testtokenize.getUnigrams())