def test_set_prefix_pad(self): tok = QgramTokenizer() self.assertEqual(tok.get_prefix_pad(), '#') self.assertEqual( tok.tokenize('database'), ['#d', 'da', 'at', 'ta', 'ab', 'ba', 'as', 'se', 'e$']) tok.set_prefix_pad('^') self.assertEqual(tok.get_prefix_pad(), '^') self.assertEqual( tok.tokenize('database'), ['^d', 'da', 'at', 'ta', 'ab', 'ba', 'as', 'se', 'e$'])
def test_set_prefix_pad_invalid2(self): qg = QgramTokenizer() qg.set_prefix_pad('###')
def test_set_prefix_pad_invalid1(self): qg = QgramTokenizer() qg.set_prefix_pad(10)