Exemplo n.º 1
0
 def test_trigram(self):
     expected = TestNGramTransformer.tokensA + ['a+b', 'b+c'] + [
         'a+b+c'
     ] + TestNGramTransformer.tokensB + ['A+B', 'B+C'] + ['A+B+C']
     t = NGramTransformer(Sentinel(), n=3)
     self.assertListEqual(expected,
                          list(t.n_gram(TestNGramTransformer.segments)))
Exemplo n.º 2
0
 def test_bigram(self):
     expected = TestNGramTransformer.tokensA + [
         'a+b', 'b+c'
     ] + TestNGramTransformer.tokensB + [
         'A+B', 'B+C'
     ]
     t = NGramTransformer(Sentinel(), n=2)
     self.assertListEqual(expected,
                          list(t.n_gram(TestNGramTransformer.segments)))
Exemplo n.º 3
0
    def test_extract(self):
        row = [TestNGramTransformer.segments, []]
        expected = [TestNGramTransformer.tokensA +
                    TestNGramTransformer.tokensB, ['~void~']]
        t = NGramTransformer(Sentinel(), n=1)

        for i in range(len(row)):
            t._extract(row, i)

        self.assertListEqual(expected, row)
Exemplo n.º 4
0
    def test_extract(self):
        row = [TestNGramTransformer.segments, []]
        expected = [
            TestNGramTransformer.tokensA + TestNGramTransformer.tokensB,
            ['~void~']
        ]
        t = NGramTransformer(Sentinel(), n=1)

        for i in range(len(row)):
            t._extract(row, i)

        self.assertListEqual(expected, row)
Exemplo n.º 5
0
    def test_iter(self):
        expected = [
            1, TestNGramTransformer.tokensA + TestNGramTransformer.tokensB
        ]
        n = -1
        rows = Rows([[1, TestNGramTransformer.segments]] * 3)

        for n, row in enumerate(NGramTransformer(rows, n=1)):
            self.assertListEqual(expected, row)

        self.assertEqual(2, n)
Exemplo n.º 6
0
 def test_unigram(self):
     expected = TestNGramTransformer.tokensA + TestNGramTransformer.tokensB
     t = NGramTransformer(Sentinel(), n=1)
     self.assertListEqual(expected,
                          list(t.n_gram(TestNGramTransformer.segments)))
Exemplo n.º 7
0
 def test_unigram(self):
     expected = TestNGramTransformer.tokensA + TestNGramTransformer.tokensB
     t = NGramTransformer(Sentinel(), n=1)
     self.assertListEqual(expected,
                          list(t.n_gram(TestNGramTransformer.segments)))
Exemplo n.º 8
0
 def test_setup(self):
     s = Sentinel()
     t = NGramTransformer(s)
     self.assertEqual(s, t.rows)
     self.assertEqual(1, t.N)
     self.assertEqual(None, t.text_columns)