def test_convert_tokens_to_ids(self):
        vocab_tokens = [
            "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn",
            "##ing"
        ]

        vocab = {}
        for (i, token) in enumerate(vocab_tokens):
            vocab[token] = i

        self.assertAllEqual(
            tokenization.convert_by_vocab(vocab, ["un", "##want", "##ed", "runn", "##ing"]), [7, 4, 5, 8, 9])
示例#2
0
 def convert_ids_to_tokens(self, ids):
     return tokenization.convert_by_vocab(self.inv_vocab, ids)
示例#3
0
 def convert_tokens_to_ids(self, tokens):
     return tokenization.convert_by_vocab(self.vocab, tokens)