def test_subword_decode_numpy_int32(self): encoder = text_encoder_utils.create_text_encoder( "subword", _SUBWORD_VOCAB) ids = np.array([9, 10, 11, 12, 1, 0], dtype=np.int32) # Without tolist(), the test will not pass for any other np array types # other than int64. self.assertEqual(encoder.decode(ids.tolist()), "quick brown fox")
def test_py_decode(self, encoder_type): text = "the quick brown fox jumps \n over the lazy dog." e1 = text_encoder_utils.create_text_encoder(encoder_type, _SPM_VOCAB) e2 = public_parsing_ops.create_text_encoder(encoder_type, _SPM_VOCAB) ids = e1.encode(text) self.assertEqual(e1.decode(ids), e2.decode(ids))
def test_py_encode(self, encoder_type): text = "the quick brown fox\n jumps over the lazy dog.\n" e1 = text_encoder_utils.create_text_encoder(encoder_type, _SPM_VOCAB) e2 = public_parsing_ops.create_text_encoder(encoder_type, _SPM_VOCAB) self.assertEqual(e1.encode(text), e2.encode(text))
def test_vocab(self, encoder_type): e1 = text_encoder_utils.create_text_encoder(encoder_type, _SPM_VOCAB) e2 = public_parsing_ops.create_text_encoder(encoder_type, _SPM_VOCAB) self.assertEqual(e1.vocab_size, e2.vocab_size)
def test_subword_decode(self): encoder = text_encoder_utils.create_text_encoder( "subword", _SUBWORD_VOCAB) self.assertEqual(encoder.decode([9, 10, 11, 12, 1, 0]), "quick brown fox")
def test_sentencepiece_offset(self): e = text_encoder_utils.create_text_encoder("sentencepiece_newline", _SPM_VOCAB) in_text = "the quick brown fox jumps over the lazy dog" ids = [25] + e.encode(in_text) self.assertEqual(in_text, e.decode(ids))
def test_sentencepiece(self): e = text_encoder_utils.create_text_encoder("sentencepiece", _SPM_VOCAB) in_text = "the quick brown fox jumps over the lazy dog" self.assertEqual(in_text, e.decode(e.encode(in_text)))