def setUp(self):
     r"""Setup both cased and uncased tokenizer instances."""
     self.cased_tokenizer = WhitespaceDictTokenizer()
     self.cased_tokenizer.build_vocab(self.__class__.vocab_source)
     self.uncased_tokenizer = WhitespaceDictTokenizer(is_uncased=True)
     self.uncased_tokenizer.build_vocab(self.__class__.vocab_source)
     self.tokenizers = [self.cased_tokenizer, self.uncased_tokenizer]
示例#2
0
    def test_invalid_input_is_uncased(self):
        r"""Raise `TypeError` when input `is_uncased` is invalid."""
        msg1 = 'Must raise `TypeError` when input `is_uncased` is invalid.'
        msg2 = 'Inconsistent error message.'
        examples = (
            0, 1, -1, 0.0, 1.0, math.nan, -math.nan, math.inf, -math.inf, 0j,
            1j, '', b'', (), [], {}, set(), object(), lambda x: x, type, None,
            NotImplemented, ...,
        )

        for invalid_input in examples:
            with self.assertRaises(TypeError, msg=msg1) as ctx_man:
                WhitespaceDictTokenizer(is_uncased=invalid_input)

            self.assertEqual(
                ctx_man.exception.args[0],
                '`is_uncased` must be an instance of `bool`.',
                msg=msg2
            )
示例#3
0
 def setUp(self):
     r"""Setup both cased and uncased tokenizer instances."""
     self.cased_tokenizer = WhitespaceDictTokenizer()
     self.uncased_tokenizer = WhitespaceDictTokenizer(is_uncased=True)
     self.tokenizers = [self.cased_tokenizer, self.uncased_tokenizer]