def _StringsToIdsImpl(self, strs, max_length, append_eos, languages): p = self.params return ops.ascii_to_token_id( strs, maxlen=max_length, pad_to_maxlen=p.pad_to_max_length, append_eos=append_eos)
def testLabelsToTokenIdNoPadToMaxlen(self): with self.session(use_gpu=False): token_ids, target_ids, paddings = self.evaluate( ops.ascii_to_token_id([ 'hElLo', 'sIr<epsilon>', 'What a <unk> day', 'america\'s', '<noise> early', '1:00 AM', '<text_only>morning' ], append_eos=True, maxlen=20, pad_to_maxlen=False)) self.assertAllEqual(token_ids, [ [1, 12, 9, 16, 16, 19, 2, 2, 2, 2, 2, 2, 2], [1, 23, 13, 22, 73, 2, 2, 2, 2, 2, 2, 2, 2], [1, 27, 12, 5, 24, 3, 5, 3, 0, 3, 8, 5, 29], [1, 5, 17, 9, 22, 13, 7, 5, 32, 23, 2, 2, 2], [1, 4, 3, 9, 5, 22, 16, 29, 2, 2, 2, 2, 2], [1, 40, 34, 39, 39, 3, 5, 17, 2, 2, 2, 2, 2], [1, 74, 17, 19, 22, 18, 13, 18, 11, 2, 2, 2, 2], ]) self.assertAllEqual(target_ids, [ [12, 9, 16, 16, 19, 2, 2, 2, 2, 2, 2, 2, 2], [23, 13, 22, 73, 2, 2, 2, 2, 2, 2, 2, 2, 2], [27, 12, 5, 24, 3, 5, 3, 0, 3, 8, 5, 29, 2], [5, 17, 9, 22, 13, 7, 5, 32, 23, 2, 2, 2, 2], [4, 3, 9, 5, 22, 16, 29, 2, 2, 2, 2, 2, 2], [40, 34, 39, 39, 3, 5, 17, 2, 2, 2, 2, 2, 2], [74, 17, 19, 22, 18, 13, 18, 11, 2, 2, 2, 2, 2], ]) self.assertAllEqual(paddings, [ [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], ])