Exemplo n.º 1
0
    def test_generate_token(self):
        ngram = NGram(2, self.sents)
        generator = NGramGenerator(ngram)

        for i in range(100):
            # after 'el' always comes 'gato':
            token = generator.generate_token(('el',))
            self.assertEqual(token, 'gato')

            # after 'come' may come 'pescado' or 'salmón'
            token = generator.generate_token(('come',))
            self.assertTrue(token in ['pescado', 'salmón'])
    def test_generate_token(self):
        ngram = NGram(2, self.sents)
        generator = NGramGenerator(ngram)

        for i in range(100):
            # after 'el' always comes 'gato':
            token = generator.generate_token(('el', ))
            self.assertEqual(token, 'gato')

            # after 'come' may come 'pescado' or 'salmón'
            token = generator.generate_token(('come', ))
            self.assertTrue(token in ['pescado', 'salmón'])
Exemplo n.º 3
0
    def test_generate_token_3and4gram(self):
        ngram = NGram(3, self.sents3)
        ngram2 = NGram(4, self.sents3)
        generator = NGramGenerator(ngram)
        generator2 = NGramGenerator(ngram2)

        for i in range(100):
            # after 'come pescado' always comes 'y'
            token = generator.generate_token(('come', 'pescado'))
            self.assertEqual(token, 'y')
            # after 'come pescado y' always comes 'duerme'
            token = generator2.generate_token(('come', 'pescado', 'y'))
            self.assertEqual(token, 'duerme')
            # sentence may come start with 'el' or 'la'
            token = generator.generate_token(('<s>', '<s>'))
            self.assertTrue(token in ['el', 'la'])
            token = generator2.generate_token(('<s>', '<s>', '<s>'))
            self.assertTrue(token in ['el', 'la'])