Exemplo n.º 1
0
 def test_cmp(self):
     a = BagOfWords('car', 'chair', 'chicken')
     b = BagOfWords('car', 'chair', 'chicken')
     self.assertEqual(a == b, True)
     #
     a.add('car')
     self.assertEqual(a == b, False)
Exemplo n.º 2
0
 def test_join_sub(self):
     a = BagOfWords('car', 'chair', 'chicken')
     b = BagOfWords({'chicken': 2}, ['eye', 'ugly'])
     c = BagOfWords('plane')
     self.assertCountEqual(dict(a - b - c), {'car': 1, 'chair': 1})
     self.assertCountEqual(dict(c - b - a), {'plane': 1})
     self.assertCountEqual(dict(b - c - a), {
         'chicken': 1,
         'eye': 1,
         'ugly': 1
     })
     #
     total = b - c - a
     total = 'eye' - total
     self.assertCountEqual(dict(total), {'chicken': 1, 'ugly': 1})
     #
     total = b - c - a
     total = 'eye' - total
     total = total - 'eye'
     self.assertCountEqual(dict(total), {'chicken': 1, 'ugly': 1})
     #
     total = b - c - a
     total = total - ['chicken', 'ugly']
     self.assertCountEqual(dict(total), {'eye': 1})
Exemplo n.º 3
0
                             preprocessor=dummy,
                             max_features=2000)

preprocessor_setup = {
    "pre__use_standartize": [True, False],
    "pre__use_slang": [True, False],
    "pre__use_stopword": [True, False],
    "pre__use_lemmatization": [True, False],
    "pre__use_stemmer": [True, False],
    "pre__use_lowercase": [True, False],
    "pre__use_punctation": [True, False]
}

# Create the pipelines
rnn_pipeline = Pipeline([("pre", preprocessor),
                         ("bow", BagOfWords(max_features=2000)),
                         ("rnn", RnnClassifier())],
                        memory='cache')

bayes_pipeline = Pipeline([("pre", preprocessor), ("vectorizer", vectorizer),
                           ("bayes", MultinomialNB())],
                          memory='cache')

forest_pipeline = Pipeline([("pre", preprocessor), ("vectorizer", vectorizer),
                            ("forest", RandomForestClassifier())],
                           memory='cache')

# Load the datasets
datasets = [
    SingleFile('data/Youtube01-Psy.csv'),
    SplittedFile('data/Youtube01-Psy.csv', 'data/Youtube02-KatyPerry.csv'),
Exemplo n.º 4
0
 def test_join_add(self):
     a = BagOfWords('car', 'chair', 'chicken')
     b = BagOfWords({'chicken': 2}, ['eye', 'ugly'])
     c = BagOfWords('plane')
     self.assertCountEqual(dict(a + b + c), {
         'car': 1,
         'chair': 1,
         'eye': 1,
         'chicken': 3,
         'plane': 1,
         'ugly': 1
     })
     self.assertCountEqual(dict(c + b + a), {
         'car': 1,
         'chair': 1,
         'eye': 1,
         'chicken': 3,
         'plane': 1,
         'ugly': 1
     })
     self.assertCountEqual(dict(b + c + a), {
         'car': 1,
         'chair': 1,
         'eye': 1,
         'chicken': 3,
         'plane': 1,
         'ugly': 1
     })
     #
     total = a + b + c
     total = 'ugly' + total
     self.assertCountEqual(dict(total), {
         'car': 1,
         'chair': 1,
         'eye': 1,
         'chicken': 3,
         'plane': 1,
         'ugly': 2
     })
     #
     total = a + b + c
     total = 'ugly' + total
     total = total + 'plane'
     self.assertCountEqual(dict(total), {
         'car': 1,
         'chair': 1,
         'eye': 1,
         'chicken': 3,
         'plane': 2,
         'ugly': 2
     })
     #
     total = a + b + c
     total = total + ['car', 'chair', 'chicken'
                      ] + ['chicken', 'chicken', 'eye']
     self.assertCountEqual(dict(total), {
         'car': 2,
         'chair': 2,
         'eye': 2,
         'chicken': 6,
         'plane': 1,
         'ugly': 1
     })
Exemplo n.º 5
0
 def test_copy(self):
     a = BagOfWords('car', 'chair', 'chicken')
     b = a.copy()
     self.assertEqual(a == b, True)
Exemplo n.º 6
0
 def setUp(self):
     self.bow = BagOfWords()
Exemplo n.º 7
0
class BagOfWordsTest(TestCase):
    def __init__(self, *args, **kwargs):
        super(BagOfWordsTest, self).__init__(*args, **kwargs)

    def setUp(self):
        self.bow = BagOfWords()

    def test_add_one_word(self):
        self.bow.add('David')
        self.bow.add({'David': 2})
        self.assertCountEqual(self.bow.words(), ['David'])
        self.assertEqual(len(self.bow), 1)
        self.assertEqual(self.bow.num(), 3)
        self.assertEqual(self.bow.freq('David'), 3)
        self.assertCountEqual(dict(self.bow), {'David': 3})

    def test_add_two_words(self):
        self.bow.add('David', ['David', 'Álex'])
        self.assertCountEqual(self.bow.words(), ['Álex', 'David'])
        self.assertEqual(len(self.bow), 2)
        self.assertEqual(self.bow.num(), 3)
        self.assertEqual(self.bow.freq('David'), 2)
        self.assertCountEqual(dict(self.bow), {'Álex': 1, 'David': 2})

    def test_del_one_word(self):
        self.bow.delete('David')
        self.assertCountEqual(dict(self.bow), {})
        #
        self.bow.add('David')
        self.bow.delete('David')
        self.assertCountEqual(dict(self.bow), {})
        #
        self.bow.add('David', 'David')
        self.bow.delete('David')
        self.assertCountEqual(self.bow.words(), ['David'])
        self.assertEqual(len(self.bow), 1)
        self.assertEqual(self.bow.num(), 1)
        self.assertEqual(self.bow.freq('David'), 1)
        self.assertCountEqual(dict(self.bow), {'David': 1})

    def test_del_two_word(self):
        self.bow.delete('David', 'Álex')
        self.assertCountEqual(dict(self.bow), {})
        #
        self.bow.add('David', 'Álex')
        self.bow.delete('David', 'Álex')
        self.assertCountEqual(dict(self.bow), {})
        #
        self.bow.add({'David': 2})
        self.bow.delete('David')
        self.bow.add('Álex')
        self.assertCountEqual(self.bow.words(), ['Álex', 'David'])
        self.assertEqual(len(self.bow), 2)
        self.assertEqual(self.bow.num(), 2)
        self.assertEqual(self.bow.freq('David'), 1)
        self.assertCountEqual(dict(self.bow), {'Álex': 1, 'David': 1})

    def test_join_add(self):
        a = BagOfWords('car', 'chair', 'chicken')
        b = BagOfWords({'chicken': 2}, ['eye', 'ugly'])
        c = BagOfWords('plane')
        self.assertCountEqual(dict(a + b + c), {
            'car': 1,
            'chair': 1,
            'eye': 1,
            'chicken': 3,
            'plane': 1,
            'ugly': 1
        })
        self.assertCountEqual(dict(c + b + a), {
            'car': 1,
            'chair': 1,
            'eye': 1,
            'chicken': 3,
            'plane': 1,
            'ugly': 1
        })
        self.assertCountEqual(dict(b + c + a), {
            'car': 1,
            'chair': 1,
            'eye': 1,
            'chicken': 3,
            'plane': 1,
            'ugly': 1
        })
        #
        total = a + b + c
        total = 'ugly' + total
        self.assertCountEqual(dict(total), {
            'car': 1,
            'chair': 1,
            'eye': 1,
            'chicken': 3,
            'plane': 1,
            'ugly': 2
        })
        #
        total = a + b + c
        total = 'ugly' + total
        total = total + 'plane'
        self.assertCountEqual(dict(total), {
            'car': 1,
            'chair': 1,
            'eye': 1,
            'chicken': 3,
            'plane': 2,
            'ugly': 2
        })
        #
        total = a + b + c
        total = total + ['car', 'chair', 'chicken'
                         ] + ['chicken', 'chicken', 'eye']
        self.assertCountEqual(dict(total), {
            'car': 2,
            'chair': 2,
            'eye': 2,
            'chicken': 6,
            'plane': 1,
            'ugly': 1
        })

    def test_join_sub(self):
        a = BagOfWords('car', 'chair', 'chicken')
        b = BagOfWords({'chicken': 2}, ['eye', 'ugly'])
        c = BagOfWords('plane')
        self.assertCountEqual(dict(a - b - c), {'car': 1, 'chair': 1})
        self.assertCountEqual(dict(c - b - a), {'plane': 1})
        self.assertCountEqual(dict(b - c - a), {
            'chicken': 1,
            'eye': 1,
            'ugly': 1
        })
        #
        total = b - c - a
        total = 'eye' - total
        self.assertCountEqual(dict(total), {'chicken': 1, 'ugly': 1})
        #
        total = b - c - a
        total = 'eye' - total
        total = total - 'eye'
        self.assertCountEqual(dict(total), {'chicken': 1, 'ugly': 1})
        #
        total = b - c - a
        total = total - ['chicken', 'ugly']
        self.assertCountEqual(dict(total), {'eye': 1})

    def test_clear(self):
        self.bow.add('item', 'item')
        self.bow.clear()
        self.assertEqual(len(self.bow), 0)
        self.assertEqual(self.bow.num(), 0)
        self.assertEqual(self.bow.freq('item'), 0)
        self.assertCountEqual(dict(self.bow), {})

    def test_item(self):
        self.bow.add('item1', 'item2', 'item2', 'item3')
        self.assertEqual(self.bow['item2'], 2)
        self.assertEqual(self.bow['item3'], 1)
        self.assertEqual(self.bow['item1'], 1)

    def test_copy(self):
        a = BagOfWords('car', 'chair', 'chicken')
        b = a.copy()
        self.assertEqual(a == b, True)

    def test_del(self):
        self.bow.add(['car', 'chair', 'chicken'])
        del self.bow['car']
        self.assertCountEqual(dict(self.bow), {'chair': 1, 'chicken': 1})

    def test_cmp(self):
        a = BagOfWords('car', 'chair', 'chicken')
        b = BagOfWords('car', 'chair', 'chicken')
        self.assertEqual(a == b, True)
        #
        a.add('car')
        self.assertEqual(a == b, False)

    def test_has_key(self):
        self.bow.add('car', 'chair', 'chicken')
        self.assertEqual('car' in self.bow, True)
        self.assertEqual('car' in self.bow, True)

    def test_rate(self):
        self.bow.add(['b', 'a', 'a', 'a'])
        self.assertCountEqual(self.bow.rates, {'a': 0.75, 'b': 0.25})
        self.assertCountEqual(self.bow.sorted_rates, [('a', 0.75),
                                                      ('b', 0.25)])
        self.assertEqual(self.bow.rate('a'), 0.75)
        self.assertEqual(self.bow.rate('b'), 0.25)
        self.assertEqual(self.bow.rate('c'), 0)
        #
        self.bow.clear()
        self.assertEqual(self.bow.rate('a'), 0)