Пример #1
0
 def _StringsToIdsImpl(self, strs, max_length, append_eos, languages):
   p = self.params
   return ops.ascii_to_token_id(
       strs,
       maxlen=max_length,
       pad_to_maxlen=p.pad_to_max_length,
       append_eos=append_eos)
Пример #2
0

        
Пример #3
0
 def testLabelsToTokenIdNoPadToMaxlen(self):
     with self.session(use_gpu=False):
         token_ids, target_ids, paddings = self.evaluate(
             ops.ascii_to_token_id([
                 'hElLo', 'sIr<epsilon>', 'What a <unk> day', 'america\'s',
                 '<noise> early', '1:00 AM', '<text_only>morning'
             ],
                                   append_eos=True,
                                   maxlen=20,
                                   pad_to_maxlen=False))
     self.assertAllEqual(token_ids, [
         [1, 12, 9, 16, 16, 19, 2, 2, 2, 2, 2, 2, 2],
         [1, 23, 13, 22, 73, 2, 2, 2, 2, 2, 2, 2, 2],
         [1, 27, 12, 5, 24, 3, 5, 3, 0, 3, 8, 5, 29],
         [1, 5, 17, 9, 22, 13, 7, 5, 32, 23, 2, 2, 2],
         [1, 4, 3, 9, 5, 22, 16, 29, 2, 2, 2, 2, 2],
         [1, 40, 34, 39, 39, 3, 5, 17, 2, 2, 2, 2, 2],
         [1, 74, 17, 19, 22, 18, 13, 18, 11, 2, 2, 2, 2],
     ])
     self.assertAllEqual(target_ids, [
         [12, 9, 16, 16, 19, 2, 2, 2, 2, 2, 2, 2, 2],
         [23, 13, 22, 73, 2, 2, 2, 2, 2, 2, 2, 2, 2],
         [27, 12, 5, 24, 3, 5, 3, 0, 3, 8, 5, 29, 2],
         [5, 17, 9, 22, 13, 7, 5, 32, 23, 2, 2, 2, 2],
         [4, 3, 9, 5, 22, 16, 29, 2, 2, 2, 2, 2, 2],
         [40, 34, 39, 39, 3, 5, 17, 2, 2, 2, 2, 2, 2],
         [74, 17, 19, 22, 18, 13, 18, 11, 2, 2, 2, 2, 2],
     ])
     self.assertAllEqual(paddings, [
         [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1],
         [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1],
         [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1],
         [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
     ])