def setUp(self):
     r"""Setup both cased and uncased tokenizer instances."""
     self.cased_tokenizer = WhitespaceListTokenizer()
     self.cased_tokenizer.build_vocab(self.__class__.vocab_source)
     self.uncased_tokenizer = WhitespaceListTokenizer(is_uncased=True)
     self.uncased_tokenizer.build_vocab(self.__class__.vocab_source)
     self.tokenizers = [self.cased_tokenizer, self.uncased_tokenizer]
    def test_invalid_input_is_uncased(self):
        r"""Raise `TypeError` when input `is_uncased` is invalid."""
        msg1 = 'Must raise `TypeError` when input `is_uncased` is invalid.'
        msg2 = 'Inconsistent error message.'
        examples = (
            0,
            1,
            -1,
            0.0,
            1.0,
            math.nan,
            -math.nan,
            math.inf,
            -math.inf,
            0j,
            1j,
            '',
            b'',
            (),
            [],
            {},
            set(),
            object(),
            lambda x: x,
            type,
            None,
            NotImplemented,
            ...,
        )

        for invalid_input in examples:
            with self.assertRaises(TypeError, msg=msg1) as ctx_man:
                WhitespaceListTokenizer(is_uncased=invalid_input)

            self.assertEqual(ctx_man.exception.args[0],
                             '`is_uncased` must be an instance of `bool`.',
                             msg=msg2)
Пример #3
0
 def setUp(self):
     r"""Setup both cased and uncased tokenizer instances."""
     self.cased_tokenizer = WhitespaceListTokenizer()
     self.uncased_tokenizer = WhitespaceListTokenizer(is_uncased=True)
     self.tokenizers = [self.cased_tokenizer, self.uncased_tokenizer]