Пример #1
0
 def Proc(record):
     """Parses a serialized tf.Example record."""
     features = [
         ('uttid', tf.VarLenFeature(tf.string)),
         ('transcript', tf.VarLenFeature(tf.string)),
         ('frames', tf.VarLenFeature(tf.float32)),
     ]
     example = tf.parse_single_example(record, dict(features))
     fval = {k: v.values for k, v in six.iteritems(example)}
     # Reshape the flattened vector into its original time-major
     # representation.
     fval['frames'] = tf.reshape(fval['frames'],
                                 shape=[-1, self.params.frame_size])
     # Input duration determines the bucket.
     bucket_key = tf.cast(tf.shape(fval['frames'])[0], tf.int32)
     if self.params.append_eos_frame:
         bucket_key += 1
     tgt_ids, tgt_labels, tgt_paddings = self.StringsToIds(
         fval['transcript'])
     src_paddings = tf.zeros([tf.shape(fval['frames'])[0]],
                             dtype=tf.float32)
     return [
         fval['uttid'], tgt_ids, tgt_labels, tgt_paddings,
         fval['frames'], src_paddings
     ], bucket_key
Пример #2
0
    def Proc(record):
      """Parses a serialized tf.Example record."""
      outputs = [
          ('inputs', tf.VarLenFeature(tf.int64)),
          ('targets', tf.VarLenFeature(tf.int64)),
      ]
      features = tf.parse_single_example(record, dict(outputs))
      for k, v in six.iteritems(features):
        features[k] = v.values

      src_ids = features['inputs']
      tgt_labels = features['targets']

      # Derive src_paddings, tgt_ids, tgt_paddings.
      # tgt_ids is tgt_labels shifted right by one, with a SOS ID prepended.
      tgt_ids = tf.concat([[p.sos_id], tgt_labels[:-1]], axis=0)
      src_paddings = tf.zeros(tf.shape(src_ids), dtype=tf.float32)
      tgt_paddings = tf.zeros(tf.shape(tgt_ids), dtype=tf.float32)
      tgt_weights = tf.ones(tf.shape(tgt_ids), dtype=tf.float32)
      bucket_key = tf.cast(
          tf.maximum(
              tf.reduce_sum(1.0 - src_paddings),
              tf.reduce_sum(1.0 - tgt_paddings)), tf.int32)

      return [
          src_ids, src_paddings, tgt_ids, tgt_paddings, tgt_labels, tgt_weights
      ], bucket_key
Пример #3
0
 def Proc(record):
     """Parses a serialized tf.Example record."""
     outputs = [
         ('source_id', tf.VarLenFeature(tf.int64)),
         ('source_padding', tf.VarLenFeature(tf.float32)),
         ('target_id', tf.VarLenFeature(tf.int64)),
         ('target_padding', tf.VarLenFeature(tf.float32)),
         ('target_label', tf.VarLenFeature(tf.int64)),
         ('target_weight', tf.VarLenFeature(tf.float32)),
     ]
     features = tf.parse_single_example(record, dict(outputs))
     for k, v in six.iteritems(features):
         features[k] = v.values
     bucket_key = tf.to_int32(
         tf.maximum(tf.reduce_sum(1.0 - features['source_padding']),
                    tf.reduce_sum(1.0 - features['target_padding'])))
     return [features[k] for k, _ in outputs] + [bucket_key]
Пример #4
0
        def _Proc(record):
            """Parses a serialized tf.Example record."""
            outputs = [
                ('inputs', tf.VarLenFeature(tf.int64)),
                ('targets', tf.VarLenFeature(tf.int64)),
            ]
            features = tf.parse_single_example(record, dict(outputs))
            for k, v in six.iteritems(features):
                features[k] = v.values

            src_ids = features['inputs']
            tgt_labels = features['targets']

            src_paddings, tgt_ids, tgt_paddings, tgt_weights, bucket_key = _DerivePaddingsAndIds(
                src_ids, tgt_labels)
            return [
                src_ids, src_paddings, tgt_ids, tgt_paddings, tgt_labels,
                tgt_weights
            ], bucket_key
Пример #5
0
    def _ProcPacked(record):
      """TFExample -> Tensors for PackedInput."""
      outputs = [
          ('inputs', tf.VarLenFeature(tf.int64)),
          ('targets', tf.VarLenFeature(tf.int64)),
          ('inputs_segmentation', tf.VarLenFeature(tf.int64)),
          ('inputs_position', tf.VarLenFeature(tf.int64)),
          ('targets_segmentation', tf.VarLenFeature(tf.int64)),
          ('targets_position', tf.VarLenFeature(tf.int64)),
      ]

      features = tf.parse_single_example(record, dict(outputs))
      for k, v in six.iteritems(features):
        features[k] = v.values

      src_ids = features['inputs']
      tgt_labels = features['targets']

      src_pos = features['inputs_position']
      src_seg = features['inputs_segmentation']

      tgt_pos = features['targets_position']
      tgt_seg = features['targets_segmentation']

      src_paddings, tgt_ids, tgt_paddings, tgt_weights, bucket_key = _DerivePaddingsAndIds(
          src_ids, tgt_labels)
      return [
          src_ids,
          src_paddings,
          tgt_ids,
          tgt_paddings,
          tgt_labels,
          tgt_weights,
          src_pos,
          src_seg,
          tgt_pos,
          tgt_seg,
      ], bucket_key
Пример #6
0
    def _Proc(record):
      """Parses a serialized tf.Example record."""
      outputs = [
          ('inputs', tf.VarLenFeature(tf.int64)),
          ('targets', tf.VarLenFeature(tf.int64)),
      ]
      features = tf.parse_single_example(record, dict(outputs))
      for k, v in six.iteritems(features):
        features[k] = v.values

      src_ids = features['inputs']
      tgt_labels = features['targets']

      # Derive trivial segmentation for unpacked input.
      src_paddings, tgt_ids, tgt_paddings, tgt_weights, bucket_key = _DerivePaddingsAndIds(
          src_ids, tgt_labels)

      src_len = tf.shape(src_ids)[0]
      tgt_len = tf.shape(tgt_ids)[0]
      src_pos = tf.range(src_len, dtype=tf.int32)
      src_seg = tf.zeros_like(src_paddings)
      tgt_pos = tf.range(tgt_len, dtype=tf.int32)
      tgt_seg = tf.zeros_like(tgt_paddings)

      return [
          src_ids,
          src_paddings,
          tgt_ids,
          tgt_paddings,
          tgt_labels,
          tgt_weights,
          src_pos,
          src_seg,
          tgt_pos,
          tgt_seg,
      ], bucket_key