def test_spm_prefix(self): string = ["25 the quick brown fox.", "23 the quick brown"] ids = parsing_ops.encode(string, 10, _SPM, "sentencepiece_newline", has_length_token=True) self.assertAllEqual(25, ids[0][0]) self.assertAllEqual(23, ids[1][0]) decodes = parsing_ops.decode(ids, _SPM, "sentencepiece_newline") self.assertAllEqual(["the quick brown fox.", "the quick brown"], decodes)
def test_decode(self): ids = tf.constant([[8, 9, 10, 11, 12, 38, 1, 0, 0, 0]], tf.int64) strings = parsing_ops.decode(ids, _SUBWORDS, "subword") self.assertAllEqual([b"the quick brown fox."], strings)
def test_tf_decode(self, encoder_type): string = tf.constant(["the quick brown fox.", "the quick brown\n"]) ids = parsing_ops.encode(string, 10, _SPM_VOCAB, encoder_type) self.assertAllEqual( parsing_ops.decode(ids, _SPM_VOCAB, encoder_type), public_parsing_ops.decode(ids, _SPM_VOCAB, encoder_type))