def encode_tf(self, s): """Encode a tf.Scalar string to a tf.Tensor. This will be necessary for on-the-fly tokenization. Args: s: a tf.Scalar with dtype tf.string Returns: a 1d tf.Tensor with dtype tf.int32 """ ids = subword_text_encoder_ops.subword_text_encoder_encode( s, self._filepath) # the c++ op apppends 1=EOS - drop it. return ids[:-1]
def test_subword_text_encoder_encode(self): s = "the quick brown fox jumps over the lazy dog" encoded = subword_text_encoder_ops.subword_text_encoder_encode( s, vocab_file) self.assertAllEqual(encoded, [2, 3, 4, 5, 6, 7, 8, 9, 2, 11, 12, 1])