示例#1
0
    def encode_tf(self, s):
        """Encode a tf.Scalar string to a tf.Tensor.

    This will be necessary for on-the-fly tokenization.

    Args:
      s: a tf.Scalar with dtype tf.string
    Returns:
      a 1d tf.Tensor with dtype tf.int32
    """
        ids = subword_text_encoder_ops.subword_text_encoder_encode(
            s, self._filepath)
        # the c++ op apppends 1=EOS - drop it.
        return ids[:-1]
 def test_subword_text_encoder_encode(self):
     s = "the quick brown fox jumps over the lazy dog"
     encoded = subword_text_encoder_ops.subword_text_encoder_encode(
         s, vocab_file)
     self.assertAllEqual(encoded, [2, 3, 4, 5, 6, 7, 8, 9, 2, 11, 12, 1])