Пример #1
0
    def test_bert_tensorizer(self):
        tokenizer, rand_tokens = self._mock_tokenizer()
        vocab = self._mock_vocab()

        bert = ScriptBERTTensorizer(tokenizer, vocab, max_seq_len=100)
        token_ids, _, _, _ = bert.numberize(["mock test"], None)
        self.assertEqual(token_ids[0], 201)
        self.assertEqual(token_ids[-1], 202)
        for token_id, token in zip(token_ids[1:-1], rand_tokens):
            self.assertEqual(token_id, int(token[0]) - 100)
Пример #2
0
    def test_bert_tensorizer(self):
        tokenizer, rand_tokens = self._mock_tokenizer()
        vocab = self._mock_vocab()

        bert = ScriptBERTTensorizer(
            tokenizer,
            vocab,
            max_seq_len=100,
            add_bos_token=False,
            use_eos_token_for_bos=False,
        )
        token_ids, _, _ = bert.numberize("mock test")
        self.assertEqual(token_ids[-1], 202)
        for token_id, token in zip(token_ids[0:-1], rand_tokens):
            self.assertEqual(token_id, int(token[0]) - 100)