示例#1
0
 def test_spm_prefix(self):
     string = ["25 the quick brown fox.", "23 the quick brown"]
     ids = parsing_ops.encode(string,
                              10,
                              _SPM,
                              "sentencepiece_newline",
                              has_length_token=True)
     self.assertAllEqual(25, ids[0][0])
     self.assertAllEqual(23, ids[1][0])
     decodes = parsing_ops.decode(ids, _SPM, "sentencepiece_newline")
     self.assertAllEqual(["the quick brown fox.", "the quick brown"],
                         decodes)
示例#2
0
 def test_decode(self):
     ids = tf.constant([[8, 9, 10, 11, 12, 38, 1, 0, 0, 0]], tf.int64)
     strings = parsing_ops.decode(ids, _SUBWORDS, "subword")
     self.assertAllEqual([b"the quick brown fox."], strings)
示例#3
0
 def test_tf_decode(self, encoder_type):
   string = tf.constant(["the quick brown fox.", "the quick brown\n"])
   ids = parsing_ops.encode(string, 10, _SPM_VOCAB, encoder_type)
   self.assertAllEqual(
       parsing_ops.decode(ids, _SPM_VOCAB, encoder_type),
       public_parsing_ops.decode(ids, _SPM_VOCAB, encoder_type))