示例#1
0
    def testDecoderSampleTargetSequences(self):
        p = self._DecoderParams(vn_config=py_utils.VariationalNoiseParams(
            None, False, False),
                                num_classes=8)
        p.target_seq_len = 5
        p.random_seed = 1
        config = tf.ConfigProto(graph_options=tf.GraphOptions(
            optimizer_options=tf.OptimizerOptions(do_function_inlining=False)))
        with self.session(use_gpu=False, config=config) as sess:
            tf.set_random_seed(8372740)
            np.random.seed(35315)
            dec = p.Instantiate()
            source_sequence_length = 5
            batch_size = 4
            source_encodings = tf.constant(np.random.normal(
                size=[source_sequence_length, batch_size, p.source_dim]),
                                           dtype=tf.float32)
            source_encoding_padding = tf.constant(
                [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0],
                 [0.0, 1.0, 1.0, 1.0], [0.0, 1.0, 1.0, 1.0],
                 [0.0, 1.0, 1.0, 1.0]],
                dtype=tf.float32)
            encoder_outputs = py_utils.NestedMap(
                encoded=source_encodings, padding=source_encoding_padding)
            sampled_sequences = dec.SampleTargetSequences(
                dec.theta, encoder_outputs, random_seed=tf.to_int32(123))
            self.assertAllEqual([batch_size, p.target_seq_len],
                                sampled_sequences.ids.shape)
            tf.global_variables_initializer().run()
            decoder_output = sess.run(sampled_sequences)
            print('ids=%s' % np.array_repr(decoder_output.ids))
            lens = np.sum(1 - decoder_output.paddings, axis=1)
            print('lens=%s' % lens)
            # pyformat: disable
            # pylint: disable=bad-whitespace,bad-continuation
            expected_ids = [[6, 2, 2, 2, 2], [0, 0, 7, 5, 1], [6, 1, 5, 1, 5],
                            [6, 7, 7, 4, 4]]
            # pylint: enable=bad-whitespace,bad-continuation
            # pyformat: enable
            expected_lens = [2, 5, 5, 5]
            self.assertAllEqual(expected_lens, lens)
            self.assertAllEqual(expected_ids, decoder_output.ids)

            # Sample again with the same random seed.
            decoder_output2 = sess.run(
                dec.SampleTargetSequences(dec.theta,
                                          encoder_outputs,
                                          random_seed=tf.to_int32(123)))
            # Get the same output.
            self.assertAllEqual(decoder_output.ids, decoder_output2.ids)
            self.assertAllEqual(decoder_output.paddings,
                                decoder_output2.paddings)

            # Sample again with a different random seed.
            decoder_output3 = sess.run(
                dec.SampleTargetSequences(dec.theta,
                                          encoder_outputs,
                                          random_seed=tf.to_int32(123456)))
            # Get different sequences.
            self.assertNotAllClose(expected_ids, decoder_output3.ids)
    def _Extract(self, features):
        p = self.params
        # Label values match the proto enum car.open_dataset.Label.Type. The value
        # range is [1..4] for non-background labels.
        labels = tf.to_int32(_Dense(features['labels']))
        labels = py_utils.PadOrTrimTo(labels, [p.max_num_objects])
        label_ids = tf.reshape(_Dense(features['label_ids'], ''), [-1])
        label_ids = py_utils.PadOrTrimTo(label_ids, [p.max_num_objects], '')
        bboxes_3d = tf.reshape(_Dense(features['bboxes_3d']), [-1, 7])
        bboxes_3d_mask = tf.ones([tf.shape(bboxes_3d)[0]])
        bboxes_3d_num_points = tf.to_int32(
            _Dense(features['bboxes_3d_num_points']))
        bboxes_3d = py_utils.PadOrTrimTo(bboxes_3d, [p.max_num_objects, 7])
        bboxes_3d_mask = py_utils.PadOrTrimTo(bboxes_3d_mask,
                                              [p.max_num_objects])
        bboxes_3d_num_points = py_utils.PadOrTrimTo(bboxes_3d_num_points,
                                                    [p.max_num_objects])
        label_metadata = tf.reshape(_Dense(features['label_metadata']),
                                    [-1, 4])
        label_metadata = py_utils.PadOrTrimTo(label_metadata,
                                              [p.max_num_objects, 4])

        detection_difficulties = py_utils.PadOrTrimTo(
            tf.to_int32(_Dense(features['detection_difficulties'])),
            [p.max_num_objects])
        tracking_difficulties = py_utils.PadOrTrimTo(
            tf.to_int32(_Dense(features['tracking_difficulties'])),
            [p.max_num_objects])
        unfiltered_bboxes_3d_mask = bboxes_3d_mask

        if p.filter_labels:
            valid_labels = tf.constant([p.filter_labels])
            bbox_mask = tf.reduce_any(tf.equal(tf.expand_dims(labels, 1),
                                               valid_labels),
                                      axis=1)
            bboxes_3d_mask *= tf.to_float(bbox_mask)

        outputs = {
            'labels': labels,
            'label_ids': label_ids,
            'detection_difficulties': detection_difficulties,
            'tracking_difficulties': tracking_difficulties,
            'bboxes_3d': bboxes_3d,
            'bboxes_3d_mask': bboxes_3d_mask,
            'bboxes_3d_num_points': bboxes_3d_num_points,
            'unfiltered_bboxes_3d_mask': unfiltered_bboxes_3d_mask,
            'speed': label_metadata[:, :2],
            'acceleration': label_metadata[:, 2:],
        }

        return py_utils.NestedMap(outputs)
示例#3
0
def SequenceConcat(x, x_paddings, y, y_paddings, pad=0):
    """Concats sequence `x` with sequence `y`.

  This function is length aware (based off the paddings).

  Args:
    x: A sequence of tokens of shape [batch_size, x_len_max].
    x_paddings: The paddings of `x`.
    y: A sequence of tokens of shape [batch_size, y_len_max].
    y_paddings: The paddings of `y`.
    pad: The <pad> token to fill the concatenated sequence (of type integer).

  Returns:
    A tuple.
      - Concatenation of `x` and `y` of shape
        [batch_size, x_len_max + y_len_max].
      - Paddings of the concatenation of shape
        [batch_size, x_len_max + y_len_max].
  """
    # Get the length (w/ eos).
    x_len = tf.to_int32(tf.round(tf.reduce_sum(1 - x_paddings, 1)))
    y_len = tf.to_int32(tf.round(tf.reduce_sum(1 - y_paddings, 1)))

    batch_size = py_utils.GetShape(x)[0]
    y_len_max = py_utils.GetShape(y)[1]

    # Pad `x` with necessary <pad>.
    x = tf.concat([x, tf.fill(py_utils.GetShape(y), pad)], 1)
    # Replace all <pad> with 0.
    x = tf.where(tf.not_equal(x, pad), x, tf.fill(py_utils.GetShape(x), 0))

    # Compute the write indices of `y` in `xy`.
    indices = tf.stack([
        tf.tile(tf.expand_dims(tf.range(batch_size), 1), [1, y_len_max]),
        (tf.tile(tf.expand_dims(tf.range(y_len_max), 0), [batch_size, 1]) +
         tf.expand_dims(x_len, 1)),
    ], 2)

    xy = x + tf.scatter_nd(indices, y, py_utils.GetShape(x))

    # We need to remap all <pad> to `pad`.
    xy = tf.where(
        tf.less(tf.expand_dims(tf.range(py_utils.GetShape(xy)[1]), 0),
                tf.expand_dims(x_len + y_len, 1)), xy,
        tf.fill(py_utils.GetShape(xy), pad))
    xy_paddings = 1 - tf.sequence_mask(x_len + y_len,
                                       py_utils.GetShape(xy)[1],
                                       x_paddings.dtype)
    return xy, xy_paddings
示例#4
0
  def CreateDenseCoordinates(self, ranges):
    """Create a matrix of coordinate locations corresponding to a dense grid.

    Example: To create (x, y) coordinates corresponding over a 10x10 grid with
      step sizes 1, call CreateDenseCoordinates([(1, 10, 10), (1, 10, 10)]).

    Args:
      ranges: A list of 3-tuples, each tuple is expected to contain (min, max,
        num_steps). Each list element corresponds to one dimesion. Each tuple
        will be passed into np.linspace to create the values for a single
        dimension.

    Returns:
      tf.float32 tensor of shape [total_points, len(ranges)], where
      total_points = product of all num_steps.

    """
    total_points = int(np.prod([r_steps for _, _, r_steps in ranges]))
    cycle_steps = total_points
    stack_coordinates = []

    for r_start, r_stop, r_steps in ranges:
      values = tf.lin_space(
          tf.to_float(r_start), tf.to_float(r_stop), tf.to_int32(r_steps))
      cycle_steps //= r_steps
      gather_idx = (tf.range(total_points) // cycle_steps) % r_steps
      stack_coordinates.append(tf.gather(values, gather_idx))

    return tf.stack(stack_coordinates, axis=1)
示例#5
0
def SequenceAppendToken(x, x_paddings, token, extend=False):
    """Appends <token> to sequence `x`.

  Args:
    x: A sequence of tokens of shape [batch_size, x_len_max].
    x_paddings: The paddings of `x`.
    token: The token to append (of type integer).
    extend: Whether to extend `x` along the length dimension, this must be true
      for any sequence length in `x` that is `x_len_max` or else an invalid
      sequence will be emitted.

  Returns:
    A tuple.
      - The new sequence, Tensor of shape [batch_size, x_len_max].
      - The new paddings, Tensor of shape [batch_size, x_len_max].
  """
    batch_size = py_utils.GetShape(x)[0]
    x_len = tf.to_int32(tf.round(tf.reduce_sum(1 - x_paddings, 1)))
    if extend:
        x = tf.pad(x, [[0, 0], [0, 1]])
    # Mask all invalid entries of `x` to 0.
    x *= tf.sequence_mask(x_len, py_utils.GetShape(x)[1], x.dtype)
    # Append the <token> based on `x_len`.
    x += tf.scatter_nd(tf.stack([tf.range(batch_size), x_len], axis=1),
                       tf.cast(tf.fill([batch_size], token), x.dtype),
                       py_utils.GetShape(x))
    x_paddings = 1 - tf.sequence_mask(x_len + 1,
                                      py_utils.GetShape(x)[1],
                                      x_paddings.dtype)
    return x, x_paddings
示例#6
0
 def _FlatOutputProcessor(inputs):
     """Returns a flattened list of 'processor(inputs)'."""
     output, bucketing_key = processor(inputs)
     if isinstance(output, list):
         assert output
         assert all(isinstance(x, tf.Tensor)
                    for x in output), '{}'.format(output)
     else:
         assert isinstance(output, py_utils.NestedMap), '{}'.format(output)
         assert output
         assert all(isinstance(x, tf.Tensor)
                    for x in output.Flatten()), '{}'.format(
                        output.DebugString())
     bucketing_key = tf.to_int32(bucketing_key)
     tf.logging.debug('Processor outputs=%s bucketing_key=%s', output,
                      bucketing_key)
     output_tmpl.values = output
     flat_output_tmpl = output_tmpl.Flatten()
     tf.logging.debug('Processor flat outputs=%s', flat_output_tmpl)
     tf.logging.debug('extra_inputs=%s extra_args=%s extra_vars=%s',
                      function.get_extra_inputs(),
                      function.get_extra_args(), function.get_extra_vars())
     assert not function.get_extra_args(), (
         'fns {} is not pure: extra_args={}'.format(
             processor, function.get_extra_args()))
     return flat_output_tmpl + [bucketing_key]
def ComputeKITTIDifficulties(box_image_height, occlusion, truncation):
  """Compute difficulties from box height, occlusion, and truncation."""
  # Easy: No occlusion, max truncation 15%
  easy_level = tf.to_int32((box_image_height >= 40.) & (occlusion <= 0.)
                           & (truncation <= 0.15)) * 3
  # Moderate: max occlusion: partly occluded, max truncation 30%
  moderate_level = tf.to_int32((occlusion <= 1.) & (truncation <= 0.3)
                               & (box_image_height >= 25.)) * 2
  # Hard: Difficult to see, max truncation 50%
  hard_level = tf.to_int32((occlusion <= 2.) & (truncation <= 0.5)
                           & (box_image_height >= 25.)) * 1

  # Occlusion = 3 and higher truncation is "super hard", and
  # will map to 0 (ignored).
  difficulties = tf.maximum(tf.maximum(hard_level, moderate_level), easy_level)

  return difficulties
示例#8
0
  def add_labels(self, feature, labels, points_xyz):
    """Add 3d bounding box labels into the output feature map.

    Args:
      feature: A tf.Example feature map.
      labels: A repeated car.open_dataset.Label proto.
      points_xyz: A numpy array of shape [-1, 3] with the pointcloud. This is
        used to calculate the number of points in each 3D bounding box.
    """
    label_classes = []
    label_ids = []
    detection_difficulty_levels = []
    tracking_difficulty_levels = []
    bboxes = []
    label_md = []

    for label in labels:
      box = label.box
      bbox_3d = [
          box.center_x, box.center_y, box.center_z, box.length, box.width,
          box.height, box.heading
      ]
      md = [
          label.metadata.speed_x, label.metadata.speed_y,
          label.metadata.accel_x, label.metadata.accel_y
      ]
      label_md += md
      bboxes += bbox_3d
      label_classes += [label.type]
      label_ids += [tf.compat.as_bytes(label.id)]
      detection_difficulty_levels += [label.detection_difficulty_level]
      tracking_difficulty_levels += [label.tracking_difficulty_level]

    # Calculate the number of points in each ground truth box which are needed
    # to fill in difficulty levels for each ground truth and to filter boxes
    # with less points than a configurable minimum.
    points_xyz = tf.convert_to_tensor(points_xyz, dtype=tf.float32)
    bboxes_3d = tf.convert_to_tensor(
        np.array(bboxes).reshape(-1, 7), dtype=tf.float32)
    points_in_bboxes_mask = geometry.IsWithinBBox3D(points_xyz, bboxes_3d)
    bboxes_3d_num_points = tf.reduce_sum(
        tf.to_int32(points_in_bboxes_mask), axis=0, keepdims=False)
    bboxes_3d_num_points = bboxes_3d_num_points.numpy().reshape([-1])

    bboxes = np.array(bboxes).reshape(-1)
    label_md = np.array(label_md).reshape(-1)
    feature['labels'].int64_list.value[:] = label_classes
    feature['label_ids'].bytes_list.value[:] = label_ids
    feature[
        'detection_difficulties'].int64_list.value[:] = detection_difficulty_levels
    feature[
        'tracking_difficulties'].int64_list.value[:] = tracking_difficulty_levels
    feature['bboxes_3d'].float_list.value[:] = list(bboxes)
    feature['label_metadata'].float_list.value[:] = list(label_md)
    feature['bboxes_3d_num_points'].int64_list.value[:] = list(
        bboxes_3d_num_points)
示例#9
0
 def _ConcatOnehotFn(input_data):
     """Concat the input features with a onehot version of the label ids."""
     features = input_data.features
     label = input_data.label
     num_pts = tf.shape(features)[1]
     label_one_hot = tf.one_hot(tf.to_int32(label), depth=16)
     label_one_hot = tf.tile(tf.expand_dims(label_one_hot, 1),
                             [1, num_pts, 1])
     input_data.features = tf.concat([features, label_one_hot], axis=-1)
     return input_data
示例#10
0
  def _ComputeDecoderMetrics(self, decoder_outs, input_batch):
    """Computes metrics on output from decoder.

    Args:
      decoder_outs: A `BeamSearchDecodeOutput`, a namedtuple containing the
        decode results.
      input_batch:  A `NestedMap` of tensors representing the source, target,
        and other components of the input batch.

    Returns:
      A dict of Tensors containing decoder output and metrics.
    """
    p = self.params
    topk = self._GetTopK(decoder_outs)
    tgt = self._GetTargetForDecoderMetrics(input_batch)
    transcripts = self.input_generator.IdsToStrings(
        tgt.labels,
        tf.to_int32(tf.round(tf.reduce_sum(1.0 - tgt.paddings, 1) - 1.0)))

    # Filter out all isolated '<noise>' tokens.
    noise_pattern = ' <noise> |^<noise> | <noise>$|^<noise>$'
    filtered_refs = tf.regex_replace(transcripts, noise_pattern, ' ')
    filtered_hyps = tf.regex_replace(topk.decoded, noise_pattern, ' ')
    # Compute translation quality scores for all hyps.
    filtered_refs = tf.tile(
        tf.reshape(filtered_refs, [-1, 1]),
        [1, p.decoder.beam_search.num_hyps_per_beam])
    filtered_hyps = tf.reshape(filtered_hyps, [-1])
    filtered_refs = tf.reshape(filtered_refs, [-1])
    norm_wer_errors, norm_wer_words = self._ComputeNormalizedWER(
        filtered_hyps, filtered_refs)

    ret_dict = {
        'target_ids': tgt.ids,
        'target_labels': tgt.labels,
        'target_weights': tgt.weights,
        'target_paddings': tgt.paddings,
        'transcripts': transcripts,
        'topk_decoded': topk.decoded,
        'topk_ids': topk.ids,
        'topk_lens': topk.lens,
        'topk_scores': topk.scores,
        'norm_wer_errors': norm_wer_errors,
        'norm_wer_words': norm_wer_words,
    }

    if not py_utils.use_tpu():
      ret_dict['utt_id'] = input_batch.sample_ids

    ret_dict.update(
        self.AddAdditionalDecoderMetricsToGraph(topk, filtered_hyps,
                                                filtered_refs, input_batch,
                                                decoder_outs))
    return ret_dict
示例#11
0
    def _BeamSearchDecode(self, input_batch):
        p = self.params
        with tf.name_scope('fprop'), tf.name_scope(p.name):
            encoder_outputs = self.enc.FPropDefaultTheta(input_batch.src)
            encoder_outputs = self.dec.AddExtraDecodingInfo(
                encoder_outputs, input_batch.tgt)
            decoder_outs = self.dec.BeamSearchDecode(encoder_outputs)

            topk_hyps = decoder_outs.topk_hyps
            topk_ids = decoder_outs.topk_ids
            topk_lens = decoder_outs.topk_lens
            topk_scores = decoder_outs.topk_scores

            slen = tf.to_int32(
                tf.round(tf.reduce_sum(1 - input_batch.src.paddings, 1) - 1))
            srcs = self.input_generator.IdsToStrings(
                input_batch.src.ids, slen, self._GetTokenizerKeyToUse('src'))
            topk_decoded = self.input_generator.IdsToStrings(
                topk_ids, topk_lens - 1, self._GetTokenizerKeyToUse('tgt'))
            topk_decoded = tf.reshape(topk_decoded, tf.shape(topk_hyps))
            topk_scores = tf.reshape(topk_scores, tf.shape(topk_hyps))

            refs = self.input_generator.IdsToStrings(
                input_batch.tgt.labels,
                tf.to_int32(
                    tf.round(
                        tf.reduce_sum(1.0 - input_batch.tgt.paddings, 1) -
                        1.0)), self._GetTokenizerKeyToUse('tgt'))

            ret_dict = {
                'target_ids': input_batch.tgt.ids,
                'target_labels': input_batch.tgt.labels,
                'target_weights': input_batch.tgt.weights,
                'target_paddings': input_batch.tgt.paddings,
                'sources': srcs,
                'targets': refs,
                'topk_decoded': topk_decoded,
                'topk_lens': topk_lens,
                'topk_scores': topk_scores,
            }
            return ret_dict
示例#12
0
    def _ProcessLine(self, line):
        """A single-text-line processor.

    Gets a string tensor representing a line of text that have been read from
    the input file, and splits it to graphemes (characters).
    We use original characters as the target labels, and the lowercased and
    punctuation-removed characters as the source labels.

    Args:
      line: a 1D string tensor.

    Returns:
      A list of tensors, in the expected order by __init__.
    """
        # Tokenize the input into integer ids.
        # tgt_ids has the start-of-sentence token prepended, and tgt_labels has the
        # end-of-sentence token appended.
        tgt_ids, tgt_labels, tgt_paddings = self.StringsToIds(
            tf.convert_to_tensor([line]))

        def Normalize(line):
            # Lowercase and remove punctuation.
            line = line.lower().translate(None,
                                          string.punctuation.encode('utf-8'))
            # Convert multiple consecutive spaces to a single one.
            line = b' '.join(line.split())
            return line

        normalized_line = tf.py_func(Normalize, [line],
                                     tf.string,
                                     stateful=False)
        _, src_labels, src_paddings = self.StringsToIds(tf.convert_to_tensor(
            [normalized_line]),
                                                        is_source=True)
        # The model expects the source without a start-of-sentence token.
        src_ids = src_labels

        # Compute the length for bucketing.
        bucket_key = tf.to_int32(
            tf.round(
                tf.maximum(tf.reduce_sum(1.0 - src_paddings),
                           tf.reduce_sum(1.0 - tgt_paddings))))
        tgt_weights = 1.0 - tgt_paddings

        # Return tensors in an order consistent with __init__.
        out_tensors = [
            src_ids, src_paddings, tgt_ids, tgt_paddings, tgt_labels,
            tgt_weights
        ]
        return [tf.squeeze(t, axis=0) for t in out_tensors], bucket_key
示例#13
0
def SequenceLength(padding):
    """Computes the length of a sequence based on binary padding.

  Args:
    padding: A tensor of binary paddings shaped [batch, seqlen].

  Returns:
    seq_lens, A tensor of shape [batch] containing the non-padded length of each
      element of plot_tensor along the batch dimension.
  """
    seq_lens = tf.to_int32(tf.round(tf.reduce_sum(1 - padding, axis=1)))
    # Get rid of any extra dimensions.
    batch_size = tf.shape(padding)[0]
    seq_lens = tf.reshape(seq_lens, [batch_size], name='seq_lens')
    return seq_lens
示例#14
0
  def Filter(self, outputs):
    """Optionally filters the data based on context info."""
    p = self.params
    if p.equality_filters is None:
      return 1

    allowed_example = tf.convert_to_tensor(True)
    for filter_key, filter_values in p.equality_filters:
      if filter_key not in outputs:
        raise ValueError(
            'Filter key `{}` not found in extracted data.'.format(filter_key))
      has_allowed_data = tf.reduce_any(
          tf.equal(outputs[filter_key], filter_values))
      allowed_example = tf.logical_and(allowed_example, has_allowed_data)

    not_allowed_example = 1 - tf.to_int32(allowed_example)
    return 1 + (not_allowed_example * input_extractor.BUCKET_UPPER_BOUND)
示例#15
0
 def Proc(record):
     """Parses a serialized tf.Example record."""
     outputs = [
         ('source_id', tf.VarLenFeature(tf.int64)),
         ('source_padding', tf.VarLenFeature(tf.float32)),
         ('target_id', tf.VarLenFeature(tf.int64)),
         ('target_padding', tf.VarLenFeature(tf.float32)),
         ('target_label', tf.VarLenFeature(tf.int64)),
         ('target_weight', tf.VarLenFeature(tf.float32)),
     ]
     features = tf.parse_single_example(record, dict(outputs))
     for k, v in six.iteritems(features):
         features[k] = v.values
     bucket_key = tf.to_int32(
         tf.maximum(tf.reduce_sum(1.0 - features['source_padding']),
                    tf.reduce_sum(1.0 - features['target_padding'])))
     return [features[k] for k, _ in outputs] + [bucket_key]
示例#16
0
    def _MaybePadSourceInputs(self, src_inputs, src_paddings):
        p = self.params
        if not p.append_eos_frame:
            return src_inputs, src_paddings

        per_src_len = tf.reduce_sum(1 - src_paddings, 1)
        per_src_len += 1
        max_src_len = tf.reduce_max(per_src_len)
        input_shape = tf.shape(src_inputs)
        input_len = tf.maximum(input_shape[1], tf.to_int32(max_src_len))
        pad_steps = input_len - input_shape[1]
        src_inputs = tf.concat([
            src_inputs,
            tf.zeros(inplace_ops.inplace_update(input_shape, 1, pad_steps),
                     src_inputs.dtype)
        ], 1)
        src_paddings = 1 - tf.sequence_mask(
            tf.reshape(per_src_len, [input_shape[0]]), tf.reshape(
                input_len, []), src_paddings.dtype)
        return src_inputs, src_paddings
示例#17
0
def bleu_score(predictions, labels, **unused_kwargs):
    """BLEU score computation between labels and predictions.

  An approximate BLEU scoring method since we do not glue word pieces or
  decode the ids and tokenize the output. By default, we use ngram order of 4
  and use brevity penalty. Also, this does not have beam search.

  Args:
    predictions: tensor, model predictions
    labels: tensor, gold output.

  Returns:
    bleu: int, approx bleu score
  """
    outputs = tf.to_int32(tf.argmax(predictions, axis=-1))
    # Convert the outputs and labels to a [batch_size, input_length] tensor.
    outputs = tf.squeeze(outputs, axis=[-1, -2])
    labels = tf.squeeze(labels, axis=[-1, -2])

    bleu = tf.py_func(compute_bleu, (labels, outputs), tf.float32)
    return bleu, tf.constant(1.0)
示例#18
0
 def Proc(record):
   """Parses a serialized tf.Example record."""
   features = [
       ('uttid', tf.VarLenFeature(tf.string)),
       ('transcript', tf.VarLenFeature(tf.string)),
       ('frames', tf.VarLenFeature(tf.float32)),
   ]
   example = tf.parse_single_example(record, dict(features))
   fval = {k: v.values for k, v in six.iteritems(example)}
   # Reshape the flattened vector into its original time-major
   # representation.
   fval['frames'] = tf.reshape(
       fval['frames'], shape=[-1, self.params.frame_size])
   # Input duration determines the bucket.
   bucket_key = tf.to_int32(tf.shape(fval['frames'])[0])
   if self.params.append_eos_frame:
     bucket_key += 1
   tgt_ids, tgt_labels, tgt_paddings = self.StringsToIds(fval['transcript'])
   src_paddings = tf.zeros([tf.shape(fval['frames'])[0]], dtype=tf.float32)
   return fval['uttid'], tgt_ids, tgt_labels, tgt_paddings, fval[
       'frames'], src_paddings, bucket_key
示例#19
0
 def _FlatOutputProcessor(source_id, record):
   """Returns a flattened list of 'processor(inputs)'."""
   processor_spec = tf_inspect.getargspec(processor)
   tf.logging.debug('GenericInput.processor.argspec=%s', processor_spec)
   processor_args = set(processor_spec.args) - set(['self'])
   if len(processor_args) == 1:
     output, bucketing_key = processor(record)
   elif processor_args == set(['source_id', 'record']):
     output, bucketing_key = processor(source_id=source_id, record=record)
   else:
     raise ValueError(
         'GenericInput: processor should take either a single arg '
         'or two args named as "source_id" and "record". '
         'Actual: %s' % processor_args)
   if isinstance(output, list):
     assert output
     assert all(isinstance(x, tf.Tensor) for x in output), '{}'.format(output)
   else:
     assert isinstance(output, py_utils.NestedMap), '{}'.format(output)
     assert output
     assert all(
         isinstance(x, tf.Tensor) for x in output.Flatten()), '{}'.format(
             output.DebugString())
   bucketing_key = tf.to_int32(bucketing_key)
   tf.logging.debug('Processor outputs=%s bucketing_key=%s', output,
                    bucketing_key)
   output_tmpl.out_values = output
   flat_output_tmpl = output_tmpl.Flatten()
   tf.logging.debug('Processor flat outputs=%s', flat_output_tmpl)
   tf.logging.debug('extra_inputs=%s extra_args=%s extra_vars=%s',
                    function.get_extra_inputs(), function.get_extra_args(),
                    function.get_extra_vars())
   assert not function.get_extra_args(), (
       'fns {} is not pure: extra_args={}'.format(processor,
                                                  function.get_extra_args()))
   return flat_output_tmpl + [bucketing_key]
示例#20
0
    def FProp(self,
              theta,
              x,
              x_paddings=None,
              eos_id=1,
              force_sample_last_token=True):
        """Applies SymbolInsertionLayer.

    We take in a `x`, which represents the groundtruth sequence (i.e., English
    sequence). We return a sampled rollin (observed) canvas (i.e., random subset
    of the English sequence), as well as the target (indices) for an
    insertion-based model (i.e., the targets given the random observed subset).

    Args:
      theta: Ignored, this can be None.
      x: The symbol ids of shape `[batch_size, time_dim]`.
      x_paddings: The paddings (1 or 0) of shape `[batch_size, time_dim]` where
        0 is valid and 1 is invalid.
      eos_id: The <eos> token id to represent end-of-slot.
      force_sample_last_token: Set True to force sample the last token of `x`.

    Returns:
      A `NestedMap`.
        - canvas: The canvas (based off of the `rollin_policy`) of shape
          [batch_size, c_dim]. Note that, `c_dim` <= `time_dim` but need not be
          equal.
        - canvas_indices: The canvas indices (into `x`).
        - canvas_paddings: The paddings of `canvas_indices`.
        - target_indices: The target indices of shape [num_targets, 3].
          `num_targets` is the number of total targets in the entire batch.
          [:, 0] captures the batch, [:, 1] captures the slot, and [:, 2]
          captures the token. Each row [batch, slot, vocab] represents the
          indices of the target -- i.e., the batch, slot and vocab combination
          of the target. Typical usage of these indices is to tf.gather_nd
          the log-probs (from the softmax layer).
        - target_weights: The target weights.

    Raises:
      ValueError: If invalid params.
    """
        p = self.params

        batch_size = py_utils.GetShape(x)[0]
        time_dim = py_utils.GetShape(x)[1]

        if x_paddings is None:
            x_paddings = tf.zeros([batch_size, time_dim], tf.float32)

        oracle_policy = p.oracle_policy
        rollin_policy = (oracle_policy
                         if p.rollin_policy == 'oracle' else p.rollin_policy)

        if rollin_policy != 'uniform':
            raise ValueError('Unknown or unsupported rollin policy: %s' %
                             rollin_policy)
        if oracle_policy != 'uniform':
            raise ValueError('Unknown or unsupported oracle policy: %s' %
                             oracle_policy)

        x_len = tf.to_int32(tf.round(tf.reduce_sum(1 - x_paddings, 1)))

        # Compute the desired length per example in the batch.
        ratio = tf.random.uniform([batch_size], 0.0, 1.0, seed=p.random_seed)
        if force_sample_last_token:
            c_len = tf.minimum(
                tf.cast(ratio * tf.cast(x_len, tf.float32), tf.int32),
                x_len - 1) + 1
        else:
            c_len = tf.minimum(
                tf.cast(ratio * tf.cast(x_len + 1, tf.float32), tf.int32),
                x_len)
        # Compute the maximum length across the batch.
        c_len_max = tf.reduce_max(c_len)

        # Grab subset of random valid indices per example.
        z_logits = tf.cast(
            tf.expand_dims(tf.range(time_dim), 0) >= tf.expand_dims(x_len, 1),
            tf.float32) * -1e9
        if force_sample_last_token:
            # Force sample the last token -- i.e., as indexed by `x_len - 1`. We can
            # accomplish this by add +LARGE_NUMBER to the logits.
            z_logits += tf.cast(
                tf.equal(tf.expand_dims(tf.range(time_dim), 0),
                         tf.expand_dims(x_len - 1, 1)), tf.float32) * 1e9
        # Gumbel-max trick to sample (we only sample valid positions per sample in
        # the batch).
        z = -tf.math.log(-tf.math.log(
            tf.random.uniform([batch_size, time_dim], seed=p.random_seed)))
        unused_c_values, c_indices = tf.nn.top_k(z_logits + z, time_dim)

        # Trim everything > c_len_max.
        c_indices = c_indices[:, :c_len_max]

        # Invalidate any indices >= c_len, we use the last index as the default
        # invalid index.
        c_indices = tf.where(
            tf.expand_dims(tf.range(c_len_max), 0) < tf.expand_dims(c_len, 1),
            c_indices, tf.fill(py_utils.GetShape(c_indices), time_dim - 1))

        # Materialize the canvas.
        c_indices = tf.sort(c_indices)
        c = tf.gather_nd(
            x,
            tf.stack([
                tf.reshape(
                    tf.tile(tf.expand_dims(tf.range(batch_size), 1),
                            [1, c_len_max]), [-1]),
                tf.reshape(c_indices, [-1])
            ], 1))
        c = tf.reshape(c, [batch_size, c_len_max])

        # Compute the paddings.
        c_paddings = 1 - tf.sequence_mask(
            c_len, c_len_max, dtype=x_paddings.dtype)
        c *= tf.cast(1 - c_paddings, tf.int32)

        indices = tf.concat([
            tf.reshape(
                tf.tile(tf.expand_dims(tf.range(batch_size), 1),
                        [1, c_len_max]), [batch_size * c_len_max, 1]),
            tf.reshape(c_indices, [batch_size * c_len_max, 1])
        ], 1)
        x_token_is_observed = tf.scatter_nd(
            indices, tf.ones([batch_size * c_len_max], tf.int32),
            py_utils.GetShape(x))
        # `x_segments` captures which slot each `x` belongs to (both observed and
        # tokens that need to be observed).
        x_segments = tf.cumsum(x_token_is_observed, 1, exclusive=True)

        x_token_is_observed = tf.cast(x_token_is_observed, tf.bool)
        prev_x_token_is_observed = tf.pad(x_token_is_observed[:, :-1],
                                          [[0, 0], [1, 0]],
                                          constant_values=True)
        x_token_is_observed = tf.reshape(x_token_is_observed, [-1])
        prev_x_token_is_observed = tf.reshape(prev_x_token_is_observed, [-1])
        x_is_valid = tf.cast(1 - x_paddings, tf.bool)
        x_is_valid = tf.reshape(x_is_valid, [-1])

        # Remap all the observed to <eos>, note some of these need a zero weight
        # (or else there would be <eos> and valid token in the same slot).
        target_indices = tf.cast(tf.reshape(x, [-1, 1]), tf.int32)
        target_indices = tf.where(
            x_token_is_observed,
            tf.fill(py_utils.GetShape(target_indices), eos_id), target_indices)

        # TODO(williamchan): We give uniform 1.0 weight, however, math suggests
        # we may want to weigh this term by the original sequence length.
        target_weights = tf.ones_like(target_indices, tf.float32)

        # We need to set all the weights for <eos> which actually have valid tokens
        # in the slot to zero.
        target_weights = tf.where(
            x_token_is_observed & ~prev_x_token_is_observed,
            tf.zeros_like(target_weights), target_weights)

        # TODO(williamchan): Consider dropping the entries w/ weight zero.

        # Add the batch and slot indices.
        target_indices = tf.concat([
            tf.reshape(
                tf.tile(tf.expand_dims(tf.range(batch_size), 1),
                        [1, time_dim]), [batch_size * time_dim, 1]),
            tf.reshape(x_segments, [-1, 1]), target_indices
        ], 1)

        # Select only the valid indices. The selected valid ones include slots w/
        # <eos>.
        target_indices = target_indices[x_is_valid]
        target_weights = target_weights[x_is_valid]

        return py_utils.NestedMap(canvas=c,
                                  canvas_indices=c_indices,
                                  canvas_paddings=c_paddings,
                                  target_indices=target_indices,
                                  target_weights=target_weights)
示例#21
0
  def Sample(self, decoder_theta, encoder_outputs, random_seed,
             init_state_callback, pre_step_callback, post_step_callback):
    """Samples target sequences, one target sequence per source sequence.

    (Please see beam_search_helper.py for description of decoder callbacks.)

    Args:
      decoder_theta: A NestedMap object containing weights' values of the
        decoder layer and its children layers, to be passed to decoder
        callbacks.
      encoder_outputs: the outputs of the encoder, to be passed to callbacks.
      random_seed: a scalar int32 tensor representing the random seed.
      init_state_callback: decoder._InitBeamSearchStateCallback.
      pre_step_callback: decoder._PreBeamSearchStepCallback.
      post_step_callback: decoder._PostBeamSearchStepCallback.

    Returns:
      A NestedMap containing the following tensors:
      - 'logits': [batch, max_target_length, vocab_size], representing the
        distribution from which target sequences are sampled.
      - 'ids': [batch, max_target_length] of int32, representing the target
        sequence ids, not including target_sos_id, but maybe ending with
        target_eos_id if end-of-sequence is reached before target_seq_len.
      - 'paddings': [batch, max_target_length] of 0/1, where 1 represents
        a padded timestep.
    """
    p = self.params
    assert p.temperature > 0
    # 'recurrent_theta' represents all cross-timestep information used by the
    # recurrent loop below, including layer theta and encoder outputs.
    recurrent_theta = py_utils.NestedMap(
        theta=decoder_theta,
        random_seed=random_seed,
        encoder_outputs=encoder_outputs)
    bs_result, bs_state = init_state_callback(
        recurrent_theta.theta, encoder_outputs, num_hyps_per_beam=1)
    batch = tf.shape(bs_result.log_probs)[0]
    recurrent_state0 = py_utils.NestedMap(
        timestep=tf.zeros(shape=[], dtype=tf.int32),
        logits=bs_result.log_probs,
        # Start with target_sos_id.
        ids=tf.fill([batch], tf.to_int32(p.target_sos_id)),
        bs_state=bs_state)
    inputs = py_utils.NestedMap(dummy=tf.zeros([p.target_seq_len, batch]))

    def Step(recurrent_theta, state0, inputs):
      """Computes one decoder step."""
      del inputs
      with tf.name_scope('single_sampler_step'):
        # Compute logits and states.
        bs_result, bs_state1 = pre_step_callback(
            recurrent_theta.theta,
            recurrent_theta.encoder_outputs,
            tf.expand_dims(state0.ids, 1),  # [batch, 1].
            state0.bs_state,
            num_hyps_per_beam=1)
        batch = tf.shape(bs_result.log_probs)[0]
        state1 = py_utils.NestedMap(timestep=state0.timestep + 1)
        state1.logits = bs_result.log_probs
        # Sample ids from logits. [batch].
        state1.ids = tf.reshape(
            tf.random.stateless_multinomial(
                state1.logits / p.temperature,
                num_samples=1,
                seed=tf.stack([recurrent_theta.random_seed, state0.timestep]),
                output_dtype=state0.ids.dtype,
                name='sample_next_id'), [batch])
        if 'is_last_chunk' in bs_result and p.target_eoc_id >= 0:
          state1.ids = tf.where(
              tf.logical_and(bs_result.is_last_chunk,
                             tf.equal(state1.ids, p.target_eoc_id)),
              tf.fill(tf.shape(state1.ids), p.target_eos_id), state1.ids)
        state1.bs_state = post_step_callback(recurrent_theta.theta,
                                             recurrent_theta.encoder_outputs,
                                             state1.ids, bs_state1)
      return state1, py_utils.NestedMap()

    accumulated_states, _ = recurrent.Recurrent(recurrent_theta,
                                                recurrent_state0, inputs, Step)
    result = py_utils.NestedMap(
        logits=tf.transpose(accumulated_states.logits, [1, 0, 2]),
        ids=tf.transpose(accumulated_states.ids))
    result.paddings = tf.cast(
        _ComputePaddings(result.ids, p.target_eos_id), result.logits.dtype)
    # Force ids to be eos_id if the timestep is padded.
    result.ids = tf.where(
        tf.equal(result.paddings, 0), result.ids,
        tf.fill(tf.shape(result.ids), p.target_eos_id))
    static_batch_size = bs_result.log_probs.shape[0]
    result.ids.set_shape([static_batch_size, p.target_seq_len])
    result.paddings.set_shape([static_batch_size, p.target_seq_len])
    return result
示例#22
0
def _ComputePaddings(ids, eos_id):
  is_eos = tf.to_int32(tf.equal(ids, eos_id))
  # eos_in_prefix[i, j] = any(ids[i, k] == eos_id for k in range(j))
  eos_in_prefix = tf.cumsum(is_eos, axis=-1, exclusive=True)
  return tf.where(
      tf.equal(eos_in_prefix, 0), tf.zeros_like(ids), tf.ones_like(ids))
示例#23
0
文件: model.py 项目: jairsan/lingvo-1
    def _InferenceSubgraph_Default(self):
        """Default inference subgraph.

    Returns:
      (fetches, feeds), with:

      - fetches: A dictionary of fetches, containing:

        - log_pplx_per_token: A matrix of shape [batch, time]. [i, j]
          is i-th input text's j-th token's log prob.
        - paddings: A matrix of shape [batch, time]. The padding mask.
        - log_pplx_per_sample: A vector of shape [batch]. [i]
          is i-th input text's log prob.
        - num_oovs_per_sample: A vector of shape [batch] counting the total
          number of out-of-vocabulary tokens in each input.
        - tokens_from_labels: A vector of shape [batch] returning the predicted
          tokens as a sequence after mapping them back to strings from ids using
          the vocabulary.
        - ids: A matrix of shape [batch, time]. [i, j]
          is i-th input text's j-th token's id.

      - feeds: A dictionary of feeds, containing:

        - text: A placeholder for a vector of strings.
    """
        text = tf.placeholder(tf.string, shape=[None])
        # [batch, time]
        ids, labels, paddings = self.input_generator.StringsToIds(text)
        lengths = tf.reduce_sum(tf.to_int32(1 - paddings), axis=1)
        tokens_from_labels = self.input_generator.IdsToStrings(labels, lengths)
        oovs = tf.equal(labels, self.input_generator.tokenizer.unk_id)
        num_oovs_per_sample = tf.to_int32(
            tf.reduce_sum(tf.to_float(oovs) * (1 - paddings), axis=1))
        # [time, batch]
        ids, paddings, labels, weights = self._TrimIfPossibleThenTranspose(
            ids, paddings, labels, 1.0 - paddings)
        batch_size = tf.shape(ids)[1]
        xent_output, _ = self.lm.FPropDefaultTheta(
            inputs=ids,
            paddings=paddings,
            state0=self.lm.zero_state(self.theta.lm, batch_size),
            labels=py_utils.NestedMap(class_ids=labels, class_weights=weights))

        per_example_xent = py_utils.HasShape(xent_output.per_example_xent,
                                             tf.shape(ids))
        log_pplx_per_sample = tf.reduce_sum(per_example_xent * (1 - paddings),
                                            axis=0)
        fetches = {
            'log_pplx_per_token':  # [batch, time]
            tf.transpose(per_example_xent),
            'paddings':  # [batch, time]
            tf.transpose(paddings),
            'lengths':  # [batch]
            lengths,
            'log_pplx_per_sample':  # [batch]
            log_pplx_per_sample,
            'num_oovs_per_sample':  # [batch], int32
            num_oovs_per_sample,
            'tokens_from_labels':  # [batch], string
            tokens_from_labels,
            'ids':  # [batch, time], int32
            ids
        }
        feeds = {'text': text}
        return fetches, feeds
示例#24
0
    def BeamSearchDecode(self,
                         theta,
                         encoder_outputs,
                         num_hyps_per_beam_override=0,
                         init_beam_search_state=None,
                         pre_beam_search_step_callback=None,
                         post_beam_search_step_callback=None,
                         max_steps=None):
        """Performs beam-search based decoding.

    Args:
      theta: A NestedMap object containing weights' values of the decoder layer
        and its children layers.
      encoder_outputs: A NestedMap containing encoder outputs to be passed to
        the callbacks.
      num_hyps_per_beam_override: If set to a value <= 0, this parameter is
        ignored. If set to a value > 0, then this value will be used to override
        `p.num_hyps_per_beam`.
      init_beam_search_state: The `InitBeamSearchState` callback. Please refer
        to the class header comments for more details.
      pre_beam_search_step_callback: The `PreBeamSearchStepCallback` callback.
        Please refer to the class header comments for more details.
      post_beam_search_step_callback: The `PostBeamSearchStepCallback` callback.
        Please refer to the class header comments for more details.
      max_steps: maximum beam search steps. If None, use
        self.params.target_seq_len.

    Returns:
      A `BeamSearchDecodeOutput`.
    """
        p = self.params
        num_hyps_per_beam = p.num_hyps_per_beam
        if num_hyps_per_beam_override > 0:
            num_hyps_per_beam = num_hyps_per_beam_override
        if max_steps is None:
            max_steps = p.target_seq_len

        initial_results, other_states = init_beam_search_state(
            theta, encoder_outputs, num_hyps_per_beam)

        num_hyps = tf.shape(initial_results.log_probs)[0]
        num_beams = num_hyps // num_hyps_per_beam

        if 'step_ids' in initial_results:
            # [num_hyps, 1]
            step_ids = tf.ensure_shape(initial_results.step_ids, [None, 1])
        else:
            step_ids = tf.fill([num_hyps, 1],
                               tf.constant(p.target_sos_id, dtype=tf.int32))

        min_score = -1e36
        best_scores = (tf.zeros(shape=[num_beams], dtype=p.dtype) + min_score)
        cumulative_scores = tf.zeros(shape=[num_hyps], dtype=p.dtype)
        in_scores = tf.zeros([max_steps, num_hyps], dtype=p.dtype)
        in_hyps = tf.zeros([max_steps, num_hyps], dtype=tf.int32)
        in_prev_hyps = tf.zeros([max_steps, num_hyps], dtype=tf.int32)
        in_done_hyps = tf.zeros([max_steps, num_hyps], dtype=tf.string)
        bs_atten_probs = tf.zeros(
            [max_steps, num_hyps,
             tf.shape(initial_results.atten_probs)[1]],
            dtype=p.dtype)
        cur_step = tf.constant(0, dtype=tf.int32)
        all_done = tf.constant(False, dtype=tf.bool)
        core_bs_states = (best_scores, cumulative_scores, in_scores, in_hyps,
                          in_prev_hyps, in_done_hyps, bs_atten_probs)

        def LoopContinue(cur_step, all_done, unused_step_ids,
                         unused_core_bs_states, unused_other_states_list):
            return tf.logical_and(cur_step < max_steps,
                                  tf.logical_not(all_done))

        def LoopBody(cur_step, unused_all_done, step_ids, core_bs_states,
                     other_states_list):
            (cur_step, all_done, new_step_ids, new_bs_states,
             new_other_states) = self._BeamSearchStep(
                 theta, encoder_outputs, cur_step, step_ids, core_bs_states,
                 other_states.Pack(other_states_list), num_hyps_per_beam,
                 pre_beam_search_step_callback, post_beam_search_step_callback)
            return (cur_step, all_done, new_step_ids, new_bs_states,
                    new_other_states.Flatten())

        flat_other_states = other_states.Flatten()
        _, _, _, final_bs_states, flat_final_other_states = tf.while_loop(
            LoopContinue,
            LoopBody,
            loop_vars=(cur_step, all_done, step_ids, core_bs_states,
                       flat_other_states),
            parallel_iterations=10,
            back_prop=False,
            swap_memory=False,
            shape_invariants=(tf.TensorShape(cur_step.get_shape()),
                              tf.TensorShape(all_done.get_shape()),
                              tf.TensorShape(step_ids.get_shape()),
                              _GetShapes(core_bs_states),
                              _GetShapes(flat_other_states, none_shapes=True)))
        # [target_seq_len, num_beams * num_hyps_per_beam].
        final_done_hyps = final_bs_states[5]
        final_other_states = other_states.Pack(flat_final_other_states)

        # TODO(rpang): avoid inspecting 'encoder_outputs'.
        source_paddings = encoder_outputs.padding
        if isinstance(source_paddings, py_utils.NestedMap):
            source_seq_lengths = tf.to_int32(
                tf.reduce_sum(1.0 - tf.transpose(source_paddings.Flatten()[0]),
                              1))
        else:
            source_seq_lengths = tf.to_int32(
                tf.reduce_sum(1.0 - tf.transpose(source_paddings), 1))

        # [num_beams, num_hyps_per_beam].
        topk_hyps = ops.top_k_terminated_hyps(
            final_done_hyps,
            source_seq_lengths,
            k=num_hyps_per_beam,
            num_hyps_per_beam=num_hyps_per_beam,
            length_normalization=p.length_normalization,
            coverage_penalty=p.coverage_penalty,
            target_seq_length_ratio=p.target_seq_length_ratio,
            eoc_id=p.target_eoc_id,
            merge_paths=p.merge_paths)
        # [num_beams * num_hyps_per_beam, ...].
        max_seq_length = 0 if isinstance(max_steps, tf.Tensor) else max_steps
        topk_ids, topk_lens, topk_scores = ops.unpack_hyp(
            tf.reshape(topk_hyps, [-1]), max_seq_length=max_seq_length)
        # [num_beams, num_hyps_per_beam].
        topk_scores = tf.reshape(topk_scores, tf.shape(topk_hyps))

        return BeamSearchDecodeOutput(final_done_hyps, topk_hyps, topk_ids,
                                      topk_lens, topk_scores, None,
                                      final_other_states)
示例#25
0
  def AssignAnchors(self,
                    anchor_bboxes,
                    gt_bboxes,
                    gt_bboxes_labels,
                    gt_bboxes_mask,
                    foreground_assignment_threshold=0.5,
                    background_assignment_threshold=0.35,
                    background_class_id=0,
                    force_match=True,
                    similarity_fn=None):
    """Assigns anchors to bboxes using a similarity function (SSD-based).

    Each anchor box is assigned to the top matching ground truth box.
    Ground truth boxes can be assigned to multiple anchor boxes.

    Assignments can result in 3 outcomes:
      Positive assignment (if score >= foreground_assignment_threshold):
        assigned_gt_labels will reflect the assigned box label and
        assigned_cls_mask will be set to 1.0
      Background assignment (if score <= background_assignment_threshold):
        assigned_gt_labels will be background_class_id and assigned_cls_mask
        will be set to 1.0
      Ignore assignment (otherwise):
        assigned_gt_labels will be background_class_id and assigned_cls_mask
        will be set to 0.0

    The detection loss function would usually:

      Use assigned_cls_mask for weighting the classification loss. The mask
      is set such that the loss applies to foreground and background assignments
      only - ignored anchors will be set to 0.

      Use assigned_reg_mask for weighting the regression loss. The mask is set
      such that the loss applies to foreground assignments only.

    The thresholds (foreground_assignment_threshold and
    background_assignment_threshold) should be tuned per dataset.

    TODO(jngiam): Consider having a separate threshold for regression boxes; a
    separate threshold is used in PointRCNN.

    Args:
      anchor_bboxes: tf.float32. [A, 7], where [..., :] corresponds to box
        parameters (x, y, z, dx, dy, dz, r).
      gt_bboxes: tf.float32. [G, 7], where [..., :] corresponds to ground truth
        box parameters (x, y, z, dx, dy, dz, r).
      gt_bboxes_labels: tensor with shape [G]. Ground truth labels for each
        bounding box.
      gt_bboxes_mask: tensor with shape [G]. Mask for ground truth boxes, 1 iff
        the gt_bbox is a real bbox.
      foreground_assignment_threshold: Similarity score threshold for assigning
        foreground bounding boxes; scores need to be >=
        foreground_assignment_threshold to be assigned to foreground.
      background_assignment_threshold: Similarity score threshold for assigning
        background bounding boxes; scores need to be <=
        background_assignment_threshold to be assigned to background.
      background_class_id: class id to be assigned to anchors_gt_class if no
        anchor boxes match.
      force_match: Boolean specifying if force matching is enabled. If
        force matching is enabled, then matched anchors which are also the
        highest scoring with a ground-truth box are considered foreground
        matches as long as their similarity score > 0.
      similarity_fn: Function that computes the a similarity score (e.g., IOU)
        between pairs of bounding boxes. This function should take in two
        tensors corresponding to anchor and ground-truth bboxes, and return a
        matrix [A, G] with the similarity score between each pair of bboxes. The
        score must be non-negative, with greater scores representing more
        similar. The fore/background_assignment_thresholds will be applied to
        this score to determine if the an anchor is foreground, background or
        ignored. If set to None, the function will default to IOU2DRotatedBoxes.

    Returns:
      NestedMap with the following keys:

        assigned_gt_bbox: shape [A, 7] bbox parameters assigned to each anchor.

        assigned_gt_similarity_score: shape [A] (iou) score between the anchor
        and the gt bbox.

        assigned_gt_labels: shape [A] label assigned to bbox.

        assigned_cls_mask: shape [A] mask for classification loss per anchor.
        This should be 1.0 if the anchor has a foreground or background
        assignment; otherwise, it will be assigned to 0.0.

        assigned_reg_mask: shape [A] mask for regression loss per anchor.
        This should be 1.0 if the anchor has a foreground assignment;
        otherwise, it will be assigned to 0.0.
        Note: background anchors do not have regression targets.

    """
    if similarity_fn is None:
      similarity_fn = self.IOU2DRotatedBoxes

    # Shape validation.
    anchor_bboxes = py_utils.HasShape(anchor_bboxes, [-1, 7])
    num_anchor_bboxes, _ = py_utils.GetShape(anchor_bboxes, 2)
    gt_bboxes = py_utils.HasShape(gt_bboxes, [-1, 7])
    num_gt_bboxes, _ = py_utils.GetShape(gt_bboxes, 2)

    # Compute similarity score and reduce max by anchors and by ground-truth.
    similarity_score = similarity_fn(anchor_bboxes, gt_bboxes)
    similarity_score = py_utils.HasShape(similarity_score,
                                         [num_anchor_bboxes, num_gt_bboxes])

    # Reduce over ground-truth boxes, so we have the max score per anchor.
    anchor_max_score = tf.reduce_max(similarity_score, axis=1)
    anchor_max_idx = tf.argmax(similarity_score, axis=1)

    if force_match:
      # Reduce over anchors, so we have the max score per ground truth box.
      gt_max_score = tf.reduce_max(similarity_score, axis=0, keep_dims=True)

      # Force matches occur when the top matching gt bbox for an anchor is the
      # top matching anchor for the gt bbox. When force matching, we match
      # these boxes as long as their similarity score exceeds 0.
      force_matches = (
          tf.equal(similarity_score, gt_max_score)
          & tf.equal(similarity_score, anchor_max_score[..., tf.newaxis])
          & tf.greater(similarity_score, 0.)
          & tf.cast(gt_bboxes_mask[tf.newaxis, ...], tf.bool))
      force_match_indicator = tf.reduce_any(force_matches, axis=1)
      force_match_idx = tf.argmax(tf.to_int32(force_matches), axis=1)

      # In assigning foreground/background anchors later, force_match_indicator
      # is used to determine which anchors are force foreground, and the index
      # assigned will be taken from anchor_max_idx.

      # Force matchers must also be the max scoring gt bbox per anchor.
      # We overwrite anchor_max_idx to ensure that the right match is done.
      anchor_max_idx = tf.where(force_match_indicator, force_match_idx,
                                anchor_max_idx)

    # Ensure that max score boxes are not padded boxes by setting score to 0
    # for boxes that are padded.
    gathered_mask = tf.batch_gather(gt_bboxes_mask, anchor_max_idx)
    anchor_max_score = tf.where(
        tf.equal(gathered_mask, 1), anchor_max_score,
        tf.zeros_like(anchor_max_score))

    # Boolean tensors corresponding to whether an anchor is background or
    # foreground based on thresholding.
    background_anchors = tf.less_equal(anchor_max_score,
                                       background_assignment_threshold)
    foreground_anchors = tf.greater_equal(anchor_max_score,
                                          foreground_assignment_threshold)
    if force_match:
      # Background anchors are below threshold and not force matches.
      background_anchors &= ~force_match_indicator
      # Foreground anchors are above thresholds or force matches.
      foreground_anchors |= force_match_indicator

    # Add dummy background bbox to gt_boxes to facilitate batch gather.
    dummy_bbox = tf.constant([[0, 0, 0, 1, 1, 1, 0]], dtype=tf.float32)

    # Since we are concatenating the dummy bbox, the index corresponds to the
    # number of boxes.
    dummy_bbox_idx = py_utils.GetShape(gt_bboxes, 1)[0]

    gt_bboxes = tf.concat([gt_bboxes, dummy_bbox], axis=0)
    gt_bboxes_labels = tf.concat([gt_bboxes_labels, [background_class_id]],
                                 axis=0)

    # Gather indices so that all foreground boxes are gathered from gt_bboxes,
    # while all background and ignore boxes gather the dummy_bbox.
    anchor_gather_idx = tf.where(
        foreground_anchors, anchor_max_idx,
        tf.constant(
            dummy_bbox_idx,
            shape=py_utils.GetShape(anchor_max_idx),
            dtype=anchor_max_idx.dtype))

    # Gather the bboxes and weights.
    assigned_gt_bbox = tf.batch_gather(gt_bboxes, anchor_gather_idx)
    assigned_gt_labels = tf.batch_gather(gt_bboxes_labels, anchor_gather_idx)

    # Set masks for classification and regression losses.
    assigned_cls_mask = tf.to_float(background_anchors | foreground_anchors)
    assigned_reg_mask = tf.to_float(foreground_anchors)

    return py_utils.NestedMap(
        assigned_gt_bbox=assigned_gt_bbox,
        assigned_gt_similarity_score=anchor_max_score,
        assigned_gt_labels=assigned_gt_labels,
        assigned_cls_mask=assigned_cls_mask,
        assigned_reg_mask=assigned_reg_mask)
 def bucket_fn(num):
     # Drops record if num[0] is odd.
     return tf.cond(tf.equal(tf.mod(num[0], 2), 0), lambda: 1,
                    lambda: -tf.to_int32(num[0]))
示例#27
0
  def _StringsToIdsImpl(self, strs, max_length, append_eos, languages):
    """Takes a tensor of strings and returns id/padding tensors.

    This generates `token_ids`, `target_ids`, and `paddings` in the format that
    is expected for tokenizers. This performs padding to a fixed length and
    appends the end-of-sentence token as appropriate.

    Args:
      strs: a string Tensor.
      max_length: a python integer. The second dimension of the returned arrays.
        All sequences are padded or truncated to that length.
      append_eos: a python bool. See `BaseTokenizer` for explanation.
      languages: A vector of strings with the same length as `strs`.

    Returns:
      token_ids: a tensor of sequences of WPM ids starting with SOS. Sequences
        always end with EOS unless the sequence exceeds the maximum length.
        Always padded with EOS.
      target_ids: a tensor of sequences of WPM ids not starting with SOS
        but ending with EOS. Always padded with EOS.
      paddings: a tensor of floats indicating, at each position, whether
        the corresponding position is padded.
    """
    p = self.params
    if append_eos is None:
      append_eos = p.append_eos

    batch_size = py_utils.GetShape(strs)[0]
    token_ids_ta = tf.TensorArray(tf.int32, batch_size)
    target_ids_ta = tf.TensorArray(tf.int32, batch_size)
    paddings_ta = tf.TensorArray(tf.float32, batch_size)

    def _TokenizeOneSentence(i, strs, token_ids_ta, target_ids_ta, paddings_ta):
      """Tokenizes a single sentence."""
      ids, _ = self._wpm_encoder.Encode(strs[i])

      if append_eos:
        ids = tf.concat([ids, [self.eos_id]], axis=0)

      # This truncates after the eos is added, so some sentences might
      # not have </s> at the end.
      token_ids_ta = token_ids_ta.write(
          i,
          py_utils.PadOrTrimTo(
              tf.concat([[self.sos_id], ids], axis=0), [max_length],
              self.eos_id))
      target_ids_ta = target_ids_ta.write(
          i, py_utils.PadOrTrimTo(ids, [max_length], self.eos_id))
      paddings_ta = paddings_ta.write(
          i,
          py_utils.PadOrTrimTo(
              tf.zeros_like(ids, dtype=tf.float32), [max_length], 1.))

      return i + 1, strs, token_ids_ta, target_ids_ta, paddings_ta

    _, _, token_ids_ta, target_ids_ta, paddings_ta = tf.while_loop(
        lambda i, *_: i < batch_size,
        _TokenizeOneSentence,
        loop_vars=(tf.constant(0, tf.int32), strs, token_ids_ta, target_ids_ta,
                   paddings_ta),
        parallel_iterations=30,
        back_prop=False)

    token_ids = token_ids_ta.stack()
    target_ids = target_ids_ta.stack()
    paddings = paddings_ta.stack()

    if not p.pad_to_max_length:
      maxlen = tf.to_int32(
          tf.round(tf.reduce_max(tf.reduce_sum(1.0 - paddings, axis=1))))
      token_ids = token_ids[:, :maxlen]
      target_ids = target_ids[:, :maxlen]
      paddings = paddings[:, :maxlen]

    return token_ids, target_ids, paddings
示例#28
0
文件: pillars.py 项目: lbxcfx/lingvo
  def ComputeLoss(self, theta, predictions, input_batch):
    """Computes loss and other metrics for the given predictions.

    Args:
      theta: A `.NestedMap` object containing variable values of this task.
      predictions: The output of `ComputePredictions`, contains: logits - [b,
        nx, ny, nz, na, 7 + num_classes]. na is the number of anchor
        boxes per cell. [..., :7] are (dx, dy, dz, dw, dl, dh, dt).
      input_batch: The input batch from which we accesses the groundtruth.

    Returns:
      Two dicts defined as BaseTask.ComputeLoss.
    """
    p = self.params
    predicted_residuals = py_utils.HasShape(predictions.residuals,
                                            [-1, -1, -1, -1, p.num_anchors, 7])
    predicted_class_logits = py_utils.HasShape(
        predictions.classification_logits,
        [-1, -1, -1, -1, p.num_anchors, p.num_classes])
    bs, nx, ny, nz, na, _ = py_utils.GetShape(predicted_class_logits, 6)

    # Compute class and regression weights.
    class_weights = input_batch.assigned_cls_mask
    class_weights = py_utils.HasShape(class_weights, [bs, nx, ny, nz, na])
    reg_weights = input_batch.assigned_reg_mask
    reg_weights = py_utils.HasShape(reg_weights, [bs, nx, ny, nz, na])
    reg_weights = tf.expand_dims(reg_weights, -1)

    if p.loss_norm_type == LossNormType.NORM_BY_NUM_POSITIVES:
      # Compute number of positive anchors per example.
      foreground_mask = py_utils.HasShape(input_batch.assigned_reg_mask,
                                          [bs, nx, ny, nz, na])
      # Sum to get the number of foreground anchors for each example.
      loss_normalization = tf.reduce_sum(foreground_mask, axis=[1, 2, 3, 4])
      loss_normalization = tf.maximum(loss_normalization,
                                      tf.ones_like(loss_normalization))
      # Reshape for broadcasting.
      loss_normalization = tf.reshape(loss_normalization, [bs, 1, 1, 1, 1, 1])

      class_weights /= loss_normalization
      reg_weights /= loss_normalization

    # Classification loss.
    assigned_gt_labels = py_utils.HasShape(input_batch.assigned_gt_labels,
                                           [bs, nx, ny, nz, na])
    class_loss = py_utils.SigmoidCrossEntropyFocalLoss(
        logits=predicted_class_logits,
        labels=tf.one_hot(assigned_gt_labels, p.num_classes),
        alpha=p.focal_loss_alpha,
        gamma=p.focal_loss_gamma)
    class_loss *= class_weights[..., tf.newaxis]
    class_loss_sum = tf.reduce_sum(class_loss)

    # Regression loss.
    anchor_localization_residuals = py_utils.HasShape(
        input_batch.anchor_localization_residuals, [bs, nx, ny, nz, na, 7])

    # Location and dimensions loss.
    reg_loc_and_dims_loss = self._utils.ScaledHuberLoss(
        predictions=py_utils.HasShape(predicted_residuals[..., :6],
                                      [bs, nx, ny, nz, na, 6]),
        labels=anchor_localization_residuals[..., :6],
        delta=1 / (3.**2))

    # Rotation loss with SmoothL1(sin(delta)).
    rot_delta = (
        predicted_residuals[..., 6:] -
        input_batch.anchor_localization_residuals[..., 6:])
    reg_rot_loss = self._utils.ScaledHuberLoss(
        predictions=tf.sin(rot_delta),
        labels=tf.zeros_like(rot_delta),
        delta=1 / (3.**2))

    # Direction loss
    if p.direction_classifier_weight > 0.0:
      # The target rotations are in the assigned_gt_bbox tensor,
      # which already has assigned a gt bounding box to every anchor.
      rot_target = input_batch.assigned_gt_bbox[..., 6]
      # If rotation is > 0, the class is 1, else it is 0.
      rot_dir = tf.to_int32(rot_target > 0.)

      # Compute one-hot labels as a target.
      rot_dir_onehot = tf.one_hot(rot_dir, 2)

      # Manually handle loss reduction.
      dir_loss = tf.losses.softmax_cross_entropy(
          onehot_labels=rot_dir_onehot,
          logits=predictions.predicted_dir,
          weights=tf.squeeze(reg_weights, axis=-1),
          reduction=tf.losses.Reduction.NONE)
      # Reduce across all dimensions (we'll divide by the batch size below).
      dir_loss_sum = tf.reduce_sum(dir_loss)
    else:
      dir_loss_sum = 0.0

    # Compute loss contribution from location and dimension separately.
    reg_loc_loss = reg_loc_and_dims_loss[..., :3] * reg_weights
    reg_loc_loss_sum = tf.reduce_sum(reg_loc_loss)

    reg_dim_loss = reg_loc_and_dims_loss[..., 3:6] * reg_weights
    reg_dim_loss_sum = tf.reduce_sum(reg_dim_loss)

    # Compute rotation loss contribution.
    reg_rot_loss *= reg_weights
    reg_rot_loss_sum = tf.reduce_sum(reg_rot_loss)

    # Num. predictions.
    # TODO(zhifengc): Consider other normalization factors. E.g., # of bboxes.
    preds = tf.cast(bs, class_loss_sum.dtype)

    # Normalize all of the components by batch size.
    reg_loc_loss = reg_loc_loss_sum / preds
    reg_dim_loss = reg_dim_loss_sum / preds
    reg_rot_loss = reg_rot_loss_sum / preds
    class_loss = class_loss_sum / preds
    dir_loss = dir_loss_sum / preds

    # Compute total localization regression loss.
    reg_loss = (
        p.location_loss_weight * reg_loc_loss +
        p.dimension_loss_weight * reg_dim_loss +
        p.rotation_loss_weight * reg_rot_loss)

    # Apply weights to normalized class losses.
    loss = (
        class_loss * p.classification_loss_weight +
        reg_loss * p.localization_loss_weight +
        dir_loss * p.direction_classifier_weight)

    metrics_dict = {
        'loss': (loss, preds),
        'loss/class': (class_loss, preds),
        'loss/reg': (reg_loss, preds),
        'loss/reg/rot': (reg_rot_loss, preds),
        'loss/reg/loc': (reg_loc_loss, preds),
        'loss/reg/dim': (reg_dim_loss, preds),
        'loss/dir': (dir_loss, preds),
    }

    per_example_dict = {
        'residuals': predicted_residuals,
        'classification_logits': predicted_class_logits,
    }

    return metrics_dict, per_example_dict