示例#1
0
  def _testBeamSearch(self,
                      expected_values,
                      dtype=tf.float32,
                      init_step_ids=False,
                      has_task_ids=False):
    tf.random.set_seed(_TF_RANDOM_SEED)
    src_batch = 4
    src_time = 5
    p = self._DecoderParams(dtype=dtype, init_step_ids=init_step_ids)
    p.beam_search.num_hyps_per_beam = 2
    p.beam_search.coverage_penalty = 0.0
    p.beam_search.length_normalization = 0
    dec = decoder.TransformerDecoder(p)
    encoder_outputs, _, _ = self._Inputs(
        dtype=dtype, has_task_ids=has_task_ids, init_step_ids=init_step_ids)
    decode = dec.BeamSearchDecode(encoder_outputs)
    # topk_decoded is None in MT decoder, set it to a fake tensor to pass
    # sess.run(decode).
    decode = decode._replace(topk_decoded=tf.constant(0, tf.float32))

    with self.session(use_gpu=True) as sess:
      tf.global_variables_initializer().run()
      actual_decode = sess.run(decode)

    self.assertTupleEqual(
        (src_time, src_batch * p.beam_search.num_hyps_per_beam),
        actual_decode.done_hyps.shape)
    self.assertTupleEqual(
        (src_batch, p.beam_search.num_hyps_per_beam),
        actual_decode.topk_hyps.shape)
    self.assertTupleEqual(
        (src_batch * p.beam_search.num_hyps_per_beam, src_time),
        actual_decode.topk_ids.shape)
    self.assertTupleEqual(
        (src_batch * p.beam_search.num_hyps_per_beam,),
        actual_decode.topk_lens.shape)
    self.assertTupleEqual(
        (src_batch, p.beam_search.num_hyps_per_beam),
        actual_decode.topk_scores.shape)

    # Assert expected IDs etc
    self.assertAllEqual(expected_values['topk_ids'], actual_decode.topk_ids)
    self.assertAllEqual(expected_values['topk_lens'], actual_decode.topk_lens)
    self.assertAllClose(expected_values['topk_scores'],
                        actual_decode.topk_scores)

    # Assert expected attention probs.
    hypstr = actual_decode.topk_hyps.flatten()[1]
    hyp = Hypothesis()
    hyp.ParseFromString(hypstr)
    print('HYP:', hyp)

    atten_vec_0 = list(np.expand_dims(np.array(hyp.atten_vecs[0].prob), 0)[0])
    atten_vec_1 = list(np.expand_dims(np.array(hyp.atten_vecs[1].prob), 0)[0])

    self.assertAllClose(atten_vec_0, expected_values['atten_vec_0'])
    self.assertAllClose(atten_vec_1, expected_values['atten_vec_1'])

    # Test normalized scores of hypotheses.
    CompareToGoldenSingleFloat(self, expected_values['normalized_score'],
                               hyp.normalized_score)
示例#2
0
    def testDecoderSampleTargetSequences(self):
        p = self._DecoderParams(vn_config=py_utils.VariationalNoiseParams(
            None, False, False),
                                num_classes=8)
        p.target_seq_len = 5
        p.random_seed = 1
        config = tf.config_pb2.ConfigProto(graph_options=tf.GraphOptions(
            optimizer_options=tf.OptimizerOptions(do_function_inlining=False)))
        with self.session(use_gpu=False, config=config) as sess:
            tf.random.set_seed(8372740)
            np.random.seed(35315)
            dec = p.Instantiate()
            source_sequence_length = 5
            batch_size = 4
            source_encodings = tf.constant(np.random.normal(
                size=[source_sequence_length, batch_size, p.source_dim]),
                                           dtype=tf.float32)
            source_encoding_padding = tf.constant(
                [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0],
                 [0.0, 1.0, 1.0, 1.0], [0.0, 1.0, 1.0, 1.0],
                 [0.0, 1.0, 1.0, 1.0]],
                dtype=tf.float32)
            encoder_outputs = py_utils.NestedMap(
                encoded=source_encodings, padding=source_encoding_padding)
            sampled_sequences = dec.SampleTargetSequences(dec.theta,
                                                          encoder_outputs,
                                                          random_seed=tf.cast(
                                                              123, tf.int32))
            self.assertAllEqual([batch_size, p.target_seq_len],
                                sampled_sequences.ids.shape)
            self.evaluate(tf.global_variables_initializer())
            decoder_output = sess.run(sampled_sequences)
            print('ids=%s' % np.array_repr(decoder_output.ids))
            lens = np.sum(1 - decoder_output.paddings, axis=1)
            print('lens=%s' % lens)
            # pyformat: disable
            # pylint: disable=bad-whitespace,bad-continuation
            expected_ids = [[6, 2, 2, 2, 2], [0, 0, 7, 5, 1], [6, 1, 5, 1, 5],
                            [6, 7, 7, 4, 4]]
            # pylint: enable=bad-whitespace,bad-continuation
            # pyformat: enable
            expected_lens = [2, 5, 5, 5]
            self.assertAllEqual(expected_lens, lens)
            self.assertAllEqual(expected_ids, decoder_output.ids)

            # Sample again with the same random seed.
            decoder_output2 = sess.run(
                dec.SampleTargetSequences(dec.theta,
                                          encoder_outputs,
                                          random_seed=tf.cast(123, tf.int32)))
            # Get the same output.
            self.assertAllEqual(decoder_output.ids, decoder_output2.ids)
            self.assertAllEqual(decoder_output.paddings,
                                decoder_output2.paddings)

            # Sample again with a different random seed.
            decoder_output3 = sess.run(
                dec.SampleTargetSequences(dec.theta,
                                          encoder_outputs,
                                          random_seed=tf.cast(
                                              123456, tf.int32)))
            # Get different sequences.
            self.assertNotAllClose(expected_ids, decoder_output3.ids)
示例#3
0
    def ComputeLoss(self, theta, predictions, input_batch):
        """Compute loss for the sparse detector model v1.

    Args:
      theta: A `.NestedMap` object containing variable values of this task.
      predictions: A `.NestedMap` object containing residuals and
        classification_logits.
      input_batch: A `.NestedMap` expected to contain cell_center_xyz,
        cell_points_xyz, cell_feature, anchor_bboxes,
        anchor_localization_residuals, assigned_gt_labels, and
        assigned_cls_mask. See class doc string for details.

    Returns:
      Two dicts:
        A dict containing str keys and (metric, weight) pairs as values, where
        one of the keys is expected to be 'loss'.
        A dict containing arbitrary tensors describing something about each
        training example, where the first dimension of each tensor is the batch
        index.
    """
        p = self.params

        batch_size, num_centers = py_utils.GetShape(
            input_batch.cell_center_xyz, 2)

        # Assert shapes of inputs.
        anchor_bboxes = py_utils.HasShape(
            input_batch.anchor_bboxes,
            [batch_size, num_centers, p.num_anchor_bboxes_per_center, 7])
        anchor_localization_residuals = py_utils.HasShape(
            input_batch.anchor_localization_residuals,
            [batch_size, num_centers, p.num_anchor_bboxes_per_center, 7])
        predicted_residuals = py_utils.HasShape(
            predictions.residuals,
            [batch_size, num_centers, p.num_anchor_bboxes_per_center, 7])

        assigned_gt_labels = py_utils.HasShape(
            input_batch.assigned_gt_labels,
            [batch_size, num_centers, p.num_anchor_bboxes_per_center])
        predicted_classification_logits = py_utils.HasShape(
            predictions.classification_logits, [
                batch_size, num_centers, p.num_anchor_bboxes_per_center,
                p.num_classes
            ])

        # assigned_cls_mask is for weighting the classification loss.
        # Ignored targets will have their mask = 0; this happens when their IOU is
        # not high enough to be a foreground object and not low enough to be
        # background.
        class_weights = py_utils.HasShape(
            input_batch.assigned_cls_mask,
            [batch_size, num_centers, p.num_anchor_bboxes_per_center])
        class_weights = tf.reshape(
            class_weights,
            [batch_size, num_centers, p.num_anchor_bboxes_per_center, 1])

        # Broadcast per class loss weights. For each anchor, there are num_classes
        # prediction heads, we weight the outputs of these heads by the per class
        # loss weights.
        per_class_loss_weight = tf.constant([[[p.per_class_loss_weight]]],
                                            dtype=tf.float32)
        per_class_loss_weight = py_utils.HasShape(per_class_loss_weight,
                                                  [1, 1, 1, p.num_classes])
        class_weights *= per_class_loss_weight
        class_weights = py_utils.HasShape(class_weights, [
            batch_size, num_centers, p.num_anchor_bboxes_per_center,
            p.num_classes
        ])

        # We use assigned_reg_mask for masking the regression loss.
        # Only foreground objects will have assigned_reg_mask = 1.
        reg_weights = py_utils.HasShape(
            input_batch.assigned_reg_mask,
            [batch_size, num_centers, p.num_anchor_bboxes_per_center])
        reg_weights = tf.reshape(
            reg_weights,
            [batch_size, num_centers, p.num_anchor_bboxes_per_center, 1])

        if p.loss_norm_type == LossNormType.NORM_BY_NUM_POS_PER_CENTER:
            # Compute number of positive anchors per example.
            foreground_mask = py_utils.HasShape(
                input_batch.assigned_reg_mask,
                [batch_size, num_centers, p.num_anchor_bboxes_per_center])

            # Sum to get the number of foreground anchors for each example.
            loss_normalization = tf.reduce_sum(foreground_mask, axis=2)
            loss_normalization = tf.maximum(loss_normalization,
                                            tf.ones_like(loss_normalization))

            # Reshape for broadcasting.
            loss_normalization = tf.reshape(loss_normalization,
                                            [batch_size, num_centers, 1, 1])

            # Normalize so that the loss is independent of # centers.
            loss_normalization *= num_centers
            class_weights /= loss_normalization
            reg_weights /= loss_normalization

        classification_loss = py_utils.SigmoidCrossEntropyFocalLoss(
            logits=predicted_classification_logits,
            labels=tf.one_hot(assigned_gt_labels, p.num_classes),
            alpha=p.focal_loss_alpha,
            gamma=p.focal_loss_gamma)

        # Apply mask.
        classification_loss *= class_weights

        # TODO(jngiam): Consider normalizing by num_foreground_anchors for each
        # example instead. This would match the 1/N_positive normalization in
        # point pillars.

        # Reduce sum over centers, boxes and classes.
        classification_loss = tf.reduce_sum(classification_loss,
                                            axis=[1, 2, 3])

        # Reduce mean over batch.
        classification_loss = tf.reduce_mean(classification_loss)

        # Localization regression loss with Huber loss (SmoothL1).
        regression_loc_and_dims_loss = self._utils_3d.ScaledHuberLoss(
            labels=anchor_localization_residuals[..., :6],
            predictions=predicted_residuals[..., :6],
            delta=p.huber_loss_delta)

        # TODO(jngiam): Consider other methods for rotation loss such as softmax
        # binning.
        # For the rotation loss, we use SmoothL1(sine(delta)), this enables the
        # rotation loss to be the same independent of direction.
        rotation_delta = (predicted_residuals[..., 6:] -
                          anchor_localization_residuals[..., 6:])
        regression_rotation_loss = self._utils_3d.ScaledHuberLoss(
            labels=tf.zeros_like(rotation_delta),
            predictions=tf.sin(rotation_delta),
            delta=p.huber_loss_delta)

        reg_loc_loss = regression_loc_and_dims_loss[..., :3]
        reg_dim_loss = regression_loc_and_dims_loss[..., 3:6]

        gt_bboxes = self._utils_3d.ResidualsToBBoxes(
            anchor_bboxes, anchor_localization_residuals)
        predicted_bboxes = self._utils_3d.ResidualsToBBoxes(
            anchor_bboxes, predicted_residuals)

        # Apply mask to individual losses.
        #
        # And then reduce sum over centers, boxes, residuals, and batch
        # and divide by the batch_size.
        regression_rotation_loss *= reg_weights
        reg_rot_loss = tf.reduce_sum(regression_rotation_loss) / batch_size

        reg_loc_loss *= reg_weights
        reg_loc_loss = tf.reduce_sum(reg_loc_loss) / batch_size

        reg_dim_loss *= reg_weights
        reg_dim_loss = tf.reduce_sum(reg_dim_loss) / batch_size

        # Do not create corner loss graph if weight is 0.0
        # TODO(bcyang): Remove condition after fixing corner loss NaN issue
        if p.corner_loss_weight != 0.0:
            reg_corner_loss = self._utils_3d.CornerLoss(
                gt_bboxes=gt_bboxes, predicted_bboxes=predicted_bboxes)
            reg_corner_loss = tf.expand_dims(reg_corner_loss, axis=-1)

            reg_corner_loss *= reg_weights
            reg_corner_loss = tf.reduce_sum(reg_corner_loss) / batch_size
        else:
            reg_corner_loss = 0.0

        # Sum components of regression loss.
        regression_loss = (p.location_loss_weight * reg_loc_loss +
                           p.dimension_loss_weight * reg_dim_loss +
                           p.rotation_loss_weight * reg_rot_loss +
                           p.corner_loss_weight * reg_corner_loss)

        # Compute total loss.
        total_loss = (p.loss_weight_localization * regression_loss +
                      p.loss_weight_classification * classification_loss)

        metrics_dict = py_utils.NestedMap({
            'loss': (total_loss, batch_size),
            'loss/regression': (regression_loss, batch_size),
            'loss/regression/loc': (reg_loc_loss, batch_size),
            'loss/regression/dim': (reg_dim_loss, batch_size),
            'loss/regression/rot': (reg_rot_loss, batch_size),
            'loss/regression/corner': (reg_corner_loss, batch_size),
            'loss/classification': (classification_loss, batch_size),
        })

        # Calculate dimension errors
        dimension_errors_dict = self._BBoxDimensionErrors(
            gt_bboxes, predicted_bboxes, reg_weights)
        metrics_dict.update(dimension_errors_dict)

        per_example_dict = py_utils.NestedMap({
            'residuals': predicted_residuals,
            'classification_logits': predicted_classification_logits,
            'predicted_bboxes': predicted_bboxes,
            'gt_bboxes': gt_bboxes,
            'reg_weights': reg_weights,
        })

        return metrics_dict, per_example_dict
示例#4
0
  def _GetMask(self,
               batch_size,
               choose_range,
               mask_size,
               max_length=None,
               masks_per_frame=0.0,
               multiplicity=1,
               dtype=tf.float32,
               max_ratio=1.0):
    """Returns fixed size multi-masks starting from random positions.

    A multi-mask is a mask obtained by applying multiple masks.

    This function when max_length is given:
      1) Sample random mask lengths less than max_length with shape
         (batch_size, multiplicity).
      2) Truncate lengths to a max of (choose_range * max_ratio),
         so that each mask is fully contained within the corresponding sequence.
      3) Random sample start points of shape (batch_size, multiplicity)
         with in (choose_range - lengths).
      4) For each batch, multiple masks (whose number is given by the
         multiplicity) are constructed.
      5) Return a mask of shape (batch_size, mask_size) where masks are
         obtained by composing the masks constructed in step 4).
         If masks_per_frame > 0, the number is given by
         min(masks_per_frame * choose_range, multiplicity).
         If not, all the masks are composed. The masked regions are set to zero.

    This function when max_length is not given:
      1) Sample random mask lengths less than (choose_range * max_ratio)
         with shape (batch_size, multiplicity).
      2) Proceed to steps 3), 4) and 5) of the above.

    Args:
      batch_size: Batch size. Integer number.
      choose_range: Range within which the masked entries must lie. Tensor of
        shape (batch_size,).
      mask_size: Size of the mask. Integer number.
      max_length: Maximum number of allowed consecutive masked entries. Integer
        number or None.
      masks_per_frame: Number of masks per frame. Float number. If > 0, the
        multiplicity of the mask is set to be masks_per_frame * choose_range.
      multiplicity: Maximum number of total masks. Integer number.
      dtype: Data type.
      max_ratio: Maximum portion of the entire range allowed to be masked. Float
        number.

    Returns:
      mask: a fixed size multi-mask starting from a random position with shape
      (batch_size, mask_size).
    """
    p = self.params
    # Non-empty random seed values are only used for testing
    # seed_1 and seed_2 are set separately to avoid correlation of
    # mask size and mask position.
    if p.random_seed:
      seed_1 = p.random_seed + 1
      seed_2 = 2 * p.random_seed
    else:
      seed_1 = p.random_seed
      seed_2 = p.random_seed
    # Sample lengths for multiple masks.
    if max_length and max_length > 0:
      max_length = tf.broadcast_to(tf.cast(max_length, dtype), (batch_size,))
    else:
      max_length = tf.cast(choose_range, dtype=dtype) * max_ratio
    masked_portion = tf.random.uniform((batch_size, multiplicity),
                                       minval=0.0,
                                       maxval=1.0,
                                       dtype=dtype,
                                       seed=seed_1)
    masked_frame_size = tf.einsum('b,bm->bm', max_length, masked_portion)
    masked_frame_size = tf.cast(masked_frame_size, dtype=tf.int32)
    # Make sure the sampled length was smaller than max_ratio * length_bound.
    # Note that sampling in this way was biased
    # (shorter sequence may over-masked.)
    choose_range = tf.expand_dims(choose_range, -1)
    choose_range = tf.tile(choose_range, [1, multiplicity])
    length_bound = tf.cast(choose_range, dtype=dtype)
    length_bound = tf.cast(max_ratio * length_bound, dtype=tf.int32)
    length = tf.minimum(masked_frame_size, tf.maximum(length_bound, 1))

    # Choose starting point.
    random_start = tf.random.uniform((batch_size, multiplicity),
                                     maxval=1.0,
                                     seed=seed_2)
    start_with_in_valid_range = random_start * tf.cast(
        (choose_range - length + 1), dtype=dtype)
    start = tf.cast(start_with_in_valid_range, tf.int32)
    end = start + length - 1

    # Shift starting and end point by small value.
    delta = tf.constant(0.1)
    start = tf.expand_dims(tf.cast(start, dtype) - delta, -1)
    start = tf.tile(start, [1, 1, mask_size])
    end = tf.expand_dims(tf.cast(end, dtype) + delta, -1)
    end = tf.tile(end, [1, 1, mask_size])

    # Construct pre-mask of shape (batch_size, multiplicity, mask_size).
    diagonal = tf.expand_dims(
        tf.expand_dims(tf.cast(tf.range(mask_size), dtype=dtype), 0), 0)
    diagonal = tf.tile(diagonal, [batch_size, multiplicity, 1])
    pre_mask = tf.cast(
        tf.logical_and(diagonal < end, diagonal > start), dtype=dtype)

    # Sum masks with appropriate multiplicity.
    if masks_per_frame > 0:
      multiplicity_weights = tf.tile(
          tf.expand_dims(tf.range(multiplicity, dtype=dtype), 0),
          [batch_size, 1])
      multiplicity_tensor = masks_per_frame * tf.cast(choose_range, dtype=dtype)
      multiplicity_weights = tf.cast(
          multiplicity_weights < multiplicity_tensor, dtype=dtype)
      pre_mask = tf.einsum('bmt,bm->bt', pre_mask, multiplicity_weights)
    else:
      pre_mask = tf.reduce_sum(pre_mask, 1)
    mask = tf.cast(1.0 - tf.cast(pre_mask > 0, dtype=dtype), dtype=dtype)

    if p.fprop_dtype is not None and p.fprop_dtype != p.dtype:
      mask = tf.cast(mask, p.fprop_dtype)

    return mask
示例#5
0
 def __init__(self, params):
   super(TestInputGenerator, self).__init__(params)
   self._input_batch_size = tf.constant(1)
示例#6
0
    def AssignAnchors(self,
                      anchor_bboxes,
                      gt_bboxes,
                      gt_bboxes_labels,
                      gt_bboxes_mask,
                      foreground_assignment_threshold=0.5,
                      background_assignment_threshold=0.35,
                      background_class_id=0,
                      force_match=True,
                      similarity_fn=None):
        """Assigns anchors to bboxes using a similarity function (SSD-based).

    Each anchor box is assigned to the top matching ground truth box.
    Ground truth boxes can be assigned to multiple anchor boxes.

    Assignments can result in 3 outcomes:

      - Positive assignment (if score >= foreground_assignment_threshold):
        assigned_gt_labels will reflect the assigned box label and
        assigned_cls_mask will be set to 1.0
      - Background assignment (if score <= background_assignment_threshold):
        assigned_gt_labels will be background_class_id and assigned_cls_mask
        will be set to 1.0
      - Ignore assignment (otherwise):
        assigned_gt_labels will be background_class_id and assigned_cls_mask
        will be set to 0.0

    The detection loss function would usually:

      - Use assigned_cls_mask for weighting the classification loss. The mask
        is set such that the loss applies to foreground and background
        assignments only - ignored anchors will be set to 0.
      - Use assigned_reg_mask for weighting the regression loss. The mask is set
        such that the loss applies to foreground assignments only.

    The thresholds (foreground_assignment_threshold and
    background_assignment_threshold) should be tuned per dataset.

    TODO(jngiam): Consider having a separate threshold for regression boxes; a
    separate threshold is used in PointRCNN.

    Args:
      anchor_bboxes: tf.float32. [A, 7], where [..., :] corresponds to box
        parameters (x, y, z, dx, dy, dz, r).
      gt_bboxes: tf.float32. [G, 7], where [..., :] corresponds to ground truth
        box parameters (x, y, z, dx, dy, dz, r).
      gt_bboxes_labels: tensor with shape [G]. Ground truth labels for each
        bounding box.
      gt_bboxes_mask: tensor with shape [G]. Mask for ground truth boxes, 1 iff
        the gt_bbox is a real bbox.
      foreground_assignment_threshold: Similarity score threshold for assigning
        foreground bounding boxes; scores need to be >=
        foreground_assignment_threshold to be assigned to foreground.
      background_assignment_threshold: Similarity score threshold for assigning
        background bounding boxes; scores need to be <=
        background_assignment_threshold to be assigned to background.
      background_class_id: class id to be assigned to anchors_gt_class if no
        anchor boxes match.
      force_match: Boolean specifying if force matching is enabled. If
        force matching is enabled, then matched anchors which are also the
        highest scoring with a ground-truth box are considered foreground
        matches as long as their similarity score > 0.
      similarity_fn: Function that computes the a similarity score (e.g., IOU)
        between pairs of bounding boxes. This function should take in two
        tensors corresponding to anchor and ground-truth bboxes, and return a
        matrix [A, G] with the similarity score between each pair of bboxes. The
        score must be non-negative, with greater scores representing more
        similar. The fore/background_assignment_thresholds will be applied to
        this score to determine if the an anchor is foreground, background or
        ignored. If set to None, the function will default to IOU2DRotatedBoxes.

    Returns:
      NestedMap with the following keys

      - assigned_gt_idx: shape [A] index corresponding to the index of the
        assigned ground truth box. Anchors not assigned to a ground truth box
        will have the index set to -1.
      - assigned_gt_bbox: shape [A, 7] bbox parameters assigned to each anchor.
      - assigned_gt_similarity_score: shape [A] (iou) score between the anchor
        and the gt bbox.
      - assigned_gt_labels: shape [A] label assigned to bbox.
      - assigned_cls_mask: shape [A] mask for classification loss per anchor.
        This should be 1.0 if the anchor has a foreground or background
        assignment; otherwise, it will be assigned to 0.0.
      - assigned_reg_mask: shape [A] mask for regression loss per anchor.
        This should be 1.0 if the anchor has a foreground assignment;
        otherwise, it will be assigned to 0.0.
        Note: background anchors do not have regression targets.
    """
        if similarity_fn is None:
            similarity_fn = self.IOU2DRotatedBoxes

        # Shape validation.
        anchor_bboxes = py_utils.HasShape(anchor_bboxes, [-1, 7])
        num_anchor_bboxes, _ = py_utils.GetShape(anchor_bboxes, 2)
        gt_bboxes = py_utils.HasShape(gt_bboxes, [-1, 7])
        num_gt_bboxes, _ = py_utils.GetShape(gt_bboxes, 2)

        # Compute similarity score and reduce max by anchors and by ground-truth.
        similarity_score = similarity_fn(anchor_bboxes, gt_bboxes)
        similarity_score = py_utils.HasShape(
            similarity_score, [num_anchor_bboxes, num_gt_bboxes])

        # Reduce over ground-truth boxes, so we have the max score per anchor.
        anchor_max_score = tf.reduce_max(similarity_score, axis=1)
        anchor_max_idx = tf.argmax(similarity_score, axis=1)

        if force_match:
            # Reduce over anchors, so we have the max score per ground truth box.
            gt_max_score = tf.reduce_max(similarity_score,
                                         axis=0,
                                         keepdims=True)

            # Force matches occur when the top matching gt bbox for an anchor is the
            # top matching anchor for the gt bbox. When force matching, we match
            # these boxes as long as their similarity score exceeds 0.
            force_matches = (
                tf.equal(similarity_score, gt_max_score)
                & tf.equal(similarity_score, anchor_max_score[..., tf.newaxis])
                & tf.greater(similarity_score, 0.)
                & tf.cast(gt_bboxes_mask[tf.newaxis, ...], tf.bool))
            force_match_indicator = tf.reduce_any(force_matches, axis=1)
            force_match_idx = tf.argmax(tf.cast(force_matches, tf.int32),
                                        axis=1)

            # In assigning foreground/background anchors later, force_match_indicator
            # is used to determine which anchors are force foreground, and the index
            # assigned will be taken from anchor_max_idx.

            # Force matchers must also be the max scoring gt bbox per anchor.
            # We overwrite anchor_max_idx to ensure that the right match is done.
            anchor_max_idx = tf.where(force_match_indicator, force_match_idx,
                                      anchor_max_idx)

        # Ensure that max score boxes are not padded boxes by setting score to 0
        # for boxes that are padded.
        gathered_mask = tf.array_ops.batch_gather(gt_bboxes_mask,
                                                  anchor_max_idx)
        anchor_max_score = tf.where(tf.equal(gathered_mask, 1),
                                    anchor_max_score,
                                    tf.zeros_like(anchor_max_score))

        # Boolean tensors corresponding to whether an anchor is background or
        # foreground based on thresholding.
        background_anchors = tf.less_equal(anchor_max_score,
                                           background_assignment_threshold)
        foreground_anchors = tf.greater_equal(anchor_max_score,
                                              foreground_assignment_threshold)
        if force_match:
            # Background anchors are below threshold and not force matches.
            background_anchors &= ~force_match_indicator
            # Foreground anchors are above thresholds or force matches.
            foreground_anchors |= force_match_indicator

        # Add dummy background bbox to gt_boxes to facilitate batch gather.
        dummy_bbox = tf.constant([[0, 0, 0, 1, 1, 1, 0]], dtype=tf.float32)

        # Since we are concatenating the dummy bbox, the index corresponds to the
        # number of boxes.
        dummy_bbox_idx = py_utils.GetShape(gt_bboxes, 1)[0]
        dummy_bbox_idx = tf.cast(dummy_bbox_idx, tf.int64)

        gt_bboxes = tf.concat([gt_bboxes, dummy_bbox], axis=0)
        gt_bboxes_labels = tf.concat([gt_bboxes_labels, [background_class_id]],
                                     axis=0)

        # Gather indices so that all foreground boxes are gathered from gt_bboxes,
        # while all background and ignore boxes gather the dummy_bbox.
        anchor_gather_idx = tf.where(
            foreground_anchors, anchor_max_idx,
            tf.ones_like(anchor_max_idx) * dummy_bbox_idx)

        # Gather the bboxes and weights.
        assigned_gt_bbox = tf.array_ops.batch_gather(gt_bboxes,
                                                     anchor_gather_idx)
        assigned_gt_labels = tf.array_ops.batch_gather(gt_bboxes_labels,
                                                       anchor_gather_idx)

        # Set masks for classification and regression losses.
        assigned_cls_mask = tf.cast(background_anchors | foreground_anchors,
                                    tf.float32)
        assigned_reg_mask = tf.cast(foreground_anchors, tf.float32)

        # Set assigned_gt_idx such that dummy boxes have idx = -1.
        assigned_gt_idx = tf.where(tf.equal(anchor_gather_idx, dummy_bbox_idx),
                                   tf.ones_like(anchor_gather_idx) * -1,
                                   anchor_gather_idx)
        assigned_gt_idx = tf.cast(assigned_gt_idx, tf.int32)

        return py_utils.NestedMap(
            assigned_gt_idx=assigned_gt_idx,
            assigned_gt_bbox=assigned_gt_bbox,
            assigned_gt_similarity_score=anchor_max_score,
            assigned_gt_labels=assigned_gt_labels,
            assigned_cls_mask=assigned_cls_mask,
            assigned_reg_mask=assigned_reg_mask)
示例#7
0
    def BeamSearchDecode(self,
                         theta,
                         encoder_outputs,
                         num_hyps_per_beam_override=0,
                         init_beam_search_state=None,
                         pre_beam_search_step_callback=None,
                         post_beam_search_step_callback=None,
                         max_steps=None):
        """Performs beam-search based decoding.

    Args:
      theta: A NestedMap object containing weights' values of the decoder layer
        and its children layers.
      encoder_outputs: A NestedMap containing encoder outputs to be passed to
        the callbacks. Mostly opaque to BeamSearchHelper, except that it should
        contain either a 'seq_lengths' field of shape [source_batch_size] or
        a 'paddings' field of shape [source_max_lengths, source_batch_size].
      num_hyps_per_beam_override: If set to a value <= 0, this parameter is
        ignored. If set to a value > 0, then this value will be used to override
        `p.num_hyps_per_beam`.
      init_beam_search_state: The `InitBeamSearchState` callback. Please refer
        to the class header comments for more details.
      pre_beam_search_step_callback: The `PreBeamSearchStepCallback` callback.
        Please refer to the class header comments for more details.
      post_beam_search_step_callback: The `PostBeamSearchStepCallback` callback.
        Please refer to the class header comments for more details.
      max_steps: maximum beam search steps. If None, use
        self.params.target_seq_len.

    Returns:
      A `BeamSearchDecodeOutput`.
    """
        p = self.params
        num_hyps_per_beam = p.num_hyps_per_beam
        if num_hyps_per_beam_override > 0:
            num_hyps_per_beam = num_hyps_per_beam_override
        if max_steps is None:
            max_steps = p.target_seq_len

        initial_results, other_states = init_beam_search_state(
            theta, encoder_outputs, num_hyps_per_beam)

        num_hyps = tf.shape(initial_results.log_probs)[0]
        num_beams = num_hyps // num_hyps_per_beam

        if 'step_ids' in initial_results:
            # [num_hyps, 1]
            step_ids = tf.ensure_shape(initial_results.step_ids, [None, 1])
        else:
            step_ids = tf.fill([num_hyps, 1],
                               tf.constant(p.target_sos_id, dtype=tf.int32))

        min_score = -1e36
        best_scores = (tf.zeros(shape=[num_beams], dtype=p.dtype) + min_score)
        cumulative_scores = tf.zeros(shape=[num_hyps], dtype=p.dtype)
        in_scores = tf.zeros([max_steps, num_hyps], dtype=p.dtype)
        in_hyps = tf.zeros([max_steps, num_hyps], dtype=tf.int32)
        in_prev_hyps = tf.zeros([max_steps, num_hyps], dtype=tf.int32)
        in_done_hyps = tf.zeros([max_steps, num_hyps], dtype=tf.string)
        bs_atten_probs = tf.zeros(
            [max_steps, num_hyps,
             tf.shape(initial_results.atten_probs)[1]],
            dtype=p.dtype)
        beam_done = tf.zeros([num_beams], dtype=tf.bool)
        cur_step = tf.constant(0, dtype=tf.int32)
        all_done = tf.constant(False, dtype=tf.bool)
        core_bs_states = (best_scores, cumulative_scores, in_scores, in_hyps,
                          in_prev_hyps, in_done_hyps, bs_atten_probs,
                          beam_done)

        def LoopContinue(cur_step, all_done, unused_step_ids,
                         unused_core_bs_states, unused_other_states_list):
            return tf.math.logical_and(cur_step < max_steps,
                                       tf.math.logical_not(all_done))

        def LoopBody(cur_step, unused_all_done, step_ids, core_bs_states,
                     other_states_list):
            (cur_step, all_done, new_step_ids, new_bs_states,
             new_other_states) = self._BeamSearchStep(
                 theta, encoder_outputs, cur_step, step_ids, core_bs_states,
                 other_states.Pack(other_states_list), num_hyps_per_beam,
                 pre_beam_search_step_callback, post_beam_search_step_callback)
            return (cur_step, all_done, new_step_ids, new_bs_states,
                    new_other_states.Flatten())

        flat_other_states = other_states.Flatten()
        _, _, _, final_bs_states, flat_final_other_states = tf.while_loop(
            LoopContinue,
            LoopBody,
            loop_vars=(cur_step, all_done, step_ids, core_bs_states,
                       flat_other_states),
            parallel_iterations=10,
            back_prop=False,
            swap_memory=False,
            shape_invariants=(tf.TensorShape(cur_step.get_shape()),
                              tf.TensorShape(all_done.get_shape()),
                              tf.TensorShape(step_ids.get_shape()),
                              _GetShapes(core_bs_states),
                              _GetShapes(flat_other_states, none_shapes=True)))
        # [target_seq_len, num_beams * num_hyps_per_beam].
        final_done_hyps = final_bs_states[5]
        final_other_states = other_states.Pack(flat_final_other_states)

        # Assume that `paddings` has shape [source_max_lengths, source_batch_size]
        # by default, and compute `encoded_seq_lengths` accordingly. This can be
        # overridden by directly passing `seq_lengths` in the `encoder_outputs`
        # NestedMap.
        encoded_seq_lengths = getattr(encoder_outputs, 'seq_lengths', None)
        if encoded_seq_lengths is None:
            source_paddings = encoder_outputs.padding
            if isinstance(source_paddings, py_utils.NestedMap):
                encoded_seq_lengths = tf.cast(
                    tf.round(
                        tf.reduce_sum(
                            1.0 - tf.transpose(source_paddings.Flatten()[0]),
                            1)), tf.int32)
            else:
                encoded_seq_lengths = tf.cast(
                    tf.round(
                        tf.reduce_sum(
                            1.0 -
                            tf.cast(tf.transpose(source_paddings), tf.float32),
                            1)), tf.int32)

        # [num_beams, num_hyps_per_beam].
        topk_hyps = ops.top_k_terminated_hyps(
            final_done_hyps,
            encoded_seq_lengths,
            k=num_hyps_per_beam,
            num_hyps_per_beam=num_hyps_per_beam,
            length_normalization=p.length_normalization,
            coverage_penalty=p.coverage_penalty,
            target_seq_length_ratio=p.target_seq_length_ratio)
        # [num_beams * num_hyps_per_beam, ...].
        max_seq_length = 0 if isinstance(max_steps, tf.Tensor) else max_steps
        topk_ids, topk_lens, topk_scores = ops.unpack_hyp(
            tf.reshape(topk_hyps, [-1]), max_seq_length=max_seq_length)
        # [num_beams, num_hyps_per_beam].
        topk_scores = tf.reshape(topk_scores, tf.shape(topk_hyps))

        return BeamSearchDecodeOutput(topk_hyps, topk_ids, topk_lens,
                                      topk_scores, None, final_other_states)
    def testConv2DLayerStridedWithPaddingFProp(self, seq_len):
        """Check strided convs get the same values for different length dim."""
        # TODO(isaace): THIS TEST SHOWS THAT THERE IS A BUG IN THE CODE.
        with self.session(use_gpu=True):
            batch_size = 3
            expected_seq_len = 3

            params = conv_layers.Conv2DLayerWithPadding.Params()
            params.weight_norm = False
            params.filter_stride = [2, 2]
            params.name = 'conv'
            params.filter_shape = [3, 3, 1, 1]
            params.params_init = py_utils.WeightInit.Constant(1.0)
            conv_layer = params.Instantiate()

            # Set up the padding for the sequence length. (starting at 5).
            in_padding = tf.constant([
                [0, 0, 0, 0, 0],
                [0, 0, 0, 0, 1],
                [0, 0, 0, 1, 1],
            ], tf.float32)
            in_padding = tf.pad(in_padding, [[0, 0], [0, seq_len - 5]],
                                constant_values=1.0)

            inputs = 1.0 + tf.tile(
                tf.reshape(tf.range(seq_len, dtype=tf.float32),
                           [1, seq_len, 1, 1]), [batch_size, 1, 3, 1])
            inputs = py_utils.ApplyPadding(
                tf.reshape(in_padding, [batch_size, seq_len, 1, 1]), inputs)

            # [[[[1], [1], [1]], [[2], [2], [2]], [[3], [3], [3]], [[4], [4], [4]],
            #   [[5], [5], [5]], [[0], [0], [0]]],
            # [[[1], [1], [1]], [[2], [2], [2]], [[3], [3], [3]], [[4], [4], [4]],
            #   [[0], [0], [0]], [[0], [0], [0]]],
            # [[[1], [1], [1]], [[2], [2], [2]], [[3], [3], [3]], [[0], [0], [0]],
            #   [[0], [0], [0]], [[0], [0], [0]]]]
            inputs = py_utils.Debug(inputs)

            output, out_padding = conv_layer.FPropDefaultTheta(
                inputs, in_padding)

            output = py_utils.Debug(output)
            out_padding = py_utils.Debug(out_padding)

            self.evaluate(tf.global_variables_initializer())
            output, out_padding = self.evaluate([output, out_padding])

            self.assertEqual((batch_size, expected_seq_len, 2, 1),
                             output.shape)
            self.assertAllClose([
                [0, 0, 1],
                [0, 0, 1],
                [0, 1, 1],
            ], out_padding)

            # This here shows a bug in the implementation; the output should be the
            # same. Also there are bugs with the output not having the correct
            # padding.
            if seq_len == 5:
                self.assertAllClose([
                    [[[6], [6]], [[18], [18]], [[18], [18]]],
                    [[[6], [6]], [[18], [18]], [[8], [8]]],
                    [[[6], [6]], [[10], [10]], [[0], [0]]],
                ], output)
            elif seq_len == 6:
                self.assertAllClose([
                    [[[12], [12]], [[24], [24]], [[10], [10]]],
                    [[[12], [12]], [[14], [14]], [[0], [0]]],
                    [[[12], [12]], [[6], [6]], [[0], [0]]],
                ], output)
            else:
                raise ValueError('Test does not handle length {seq_len}')
    def testCausalConv2DLayerStridedWithPaddingFProp(self, seq_len):
        """Check strided convs get the same values for different length dim."""
        # TODO(isaace): THIS TEST SHOWS THAT THERE IS A BUG WITH PADDING
        with self.session(use_gpu=True):
            batch_size = 5
            expected_seq_len = 3

            params = conv_layers.CausalConv2DLayerWithPadding.Params()
            params.weight_norm = False
            params.filter_stride = [2, 2]
            params.name = 'conv'
            params.filter_shape = [3, 1, 1, 1]
            params.params_init = py_utils.WeightInit.Constant(1.0)
            conv_layer = params.Instantiate()

            # Set up the padding for the sequence length. (starting at 5).
            in_padding = tf.constant([
                [0, 0, 0, 0, 0],
                [0, 0, 0, 0, 1],
                [0, 0, 0, 1, 1],
                [0, 0, 1, 1, 1],
                [0, 1, 1, 1, 1],
            ], tf.float32)
            in_padding = tf.pad(in_padding, [[0, 0], [0, seq_len - 5]],
                                constant_values=1.0)

            inputs = 1.0 + tf.tile(
                tf.reshape(tf.range(seq_len, dtype=tf.float32),
                           [1, seq_len, 1, 1]), [batch_size, 1, 3, 1])
            inputs = py_utils.ApplyPadding(
                tf.reshape(in_padding, [batch_size, seq_len, 1, 1]), inputs)

            inputs = py_utils.Debug(inputs)

            output, out_padding = conv_layer.FPropDefaultTheta(
                inputs, in_padding)

            output = py_utils.Debug(output)
            out_padding = py_utils.Debug(out_padding)

            self.evaluate(tf.global_variables_initializer())
            output, out_padding = self.evaluate([output, out_padding])

            self.assertEqual((batch_size, expected_seq_len, 2, 1),
                             output.shape)
            self.assertAllClose([
                [0, 0, 1],
                [0, 0, 1],
                [0, 1, 1],
                [0, 1, 1],
                [1, 1, 1],
            ], out_padding)

            # NOTE: There is a bug in the output not being padded correctly.
            self.assertAllClose([
                [[[1], [1]], [[6], [6]], [[12], [12]]],
                [[[1], [1]], [[6], [6]], [[7], [7]]],
                [[[1], [1]], [[6], [6]], [[3], [3]]],
                [[[1], [1]], [[3], [3]], [[0], [0]]],
                [[[1], [1]], [[1], [1]], [[0], [0]]],
            ], output)
示例#10
0
  def testTargetSequenceSamplerWithNumHypsPerBeam4(self, use_recurrent):
    with self.session(use_gpu=False):
      np.random.seed(9384758)
      tf.random.set_seed(8274758)
      vocab_size = 12
      src_len = 5
      tgt_len = 7
      batch_size = 2

      def InitBeamSearchCallBack(unused_theta, unused_encoder_outputs,
                                 num_hyps_per_beam):
        self.assertEqual(4, num_hyps_per_beam)
        logits = tf.zeros((batch_size * num_hyps_per_beam, vocab_size),
                          dtype=tf.float32)
        return (py_utils.NestedMap(log_probs=logits),
                py_utils.NestedMap(step=tf.constant(0)))

      def PreBeamSearchStepCallback(unused_theta, unused_encoder_outputs,
                                    unused_step_ids, states, num_hyps_per_beam):
        self.assertEqual(4, num_hyps_per_beam)
        logits = tf.random.stateless_normal(
            [batch_size * num_hyps_per_beam, vocab_size], seed=[8273747, 9])
        return (py_utils.NestedMap(log_probs=logits),
                py_utils.NestedMap(step=states.step + 1))

      def PostBeamSearchStepCallback(unused_theta, unused_encoder_outputs,
                                     unused_new_step_ids, states):
        return states

      src_enc = tf.random.stateless_normal([src_len, batch_size, 8],
                                           seed=[982774838, 9])
      src_enc_padding = tf.constant(
          [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]],
          dtype=tf.float32)
      encoder_outputs = py_utils.NestedMap(
          encoded=src_enc, padding=src_enc_padding)

      theta = py_utils.NestedMap()
      random_seed = tf.constant(123)
      p = target_sequence_sampler.TargetSequenceSampler.Params().Set(
          name='bsh',
          target_seq_len=tgt_len,
          num_hyps_per_beam=4,
          use_recurrent=use_recurrent)
      seq_sampler = p.Instantiate()
      decoder_output = seq_sampler.Sample(theta, encoder_outputs, random_seed,
                                          InitBeamSearchCallBack,
                                          PreBeamSearchStepCallback,
                                          PostBeamSearchStepCallback)

      ids, lens = self.evaluate([
          decoder_output.ids,
          tf.reduce_sum(1 - decoder_output.paddings, 1),
      ])
      print(np.array_repr(ids))
      print(np.array_repr(lens))
      expected_ids = [[9, 0, 2, 2, 2, 2, 2], [0, 0, 11, 8, 1, 0, 7],
                      [10, 4, 8, 4, 9, 3, 7], [8, 9, 10, 3, 4, 2, 2],
                      [11, 9, 7, 9, 8, 7, 11], [1, 4, 2, 2, 2, 2, 2],
                      [2, 2, 2, 2, 2, 2, 2], [9, 3, 6, 9, 6, 2, 2]]
      expected_lens = [3, 7, 7, 6, 7, 3, 1, 6]
      self.assertAllEqual(expected_ids, ids)
      self.assertAllEqual(expected_lens, lens)

      p = target_sequence_sampler.TargetSequenceSampler.Params().Set(
          name='bsh', target_seq_len=tgt_len, top_k=1, num_hyps_per_beam=4)
      seq_sampler = p.Instantiate()
      decoder_output = seq_sampler.Sample(theta, encoder_outputs, random_seed,
                                          InitBeamSearchCallBack,
                                          PreBeamSearchStepCallback,
                                          PostBeamSearchStepCallback)

      ids, lens = self.evaluate([
          decoder_output.ids,
          tf.reduce_sum(1 - decoder_output.paddings, 1),
      ])
      print(np.array_repr(ids))
      print(np.array_repr(lens))
      expected_ids = [[0, 0, 0, 0, 0, 0, 0], [7, 7, 7, 7, 7, 7, 7],
                      [7, 7, 7, 7, 7, 7, 7], [0, 0, 0, 0, 0, 0, 0],
                      [8, 8, 8, 8, 8, 8, 8], [10, 10, 10, 10, 10, 10, 10],
                      [2, 2, 2, 2, 2, 2, 2], [6, 6, 6, 6, 6, 6, 6]]
      expected_lens = [7, 7, 7, 7, 7, 7, 1, 7]
      self.assertAllEqual(expected_ids, ids)
      self.assertAllEqual(expected_lens, lens)

      p = target_sequence_sampler.TargetSequenceSampler.Params().Set(
          name='bsh', target_seq_len=tgt_len, top_k=5, num_hyps_per_beam=4)
      seq_sampler = p.Instantiate()
      decoder_output = seq_sampler.Sample(theta, encoder_outputs, random_seed,
                                          InitBeamSearchCallBack,
                                          PreBeamSearchStepCallback,
                                          PostBeamSearchStepCallback)

      ids, lens = self.evaluate([
          decoder_output.ids,
          tf.reduce_sum(1 - decoder_output.paddings, 1),
      ])
      print(np.array_repr(ids))
      print(np.array_repr(lens))
      expected_ids = [[5, 0, 0, 0, 8, 0, 6], [7, 7, 10, 0, 7, 7, 0],
                      [11, 7, 11, 7, 11, 7, 10], [3, 4, 4, 9, 1, 9, 1],
                      [10, 11, 9, 11, 9, 9, 10], [10, 2, 2, 2, 2, 2, 2],
                      [2, 2, 2, 2, 2, 2, 2], [9, 6, 1, 9, 5, 6, 10]]
      expected_lens = [7, 7, 7, 7, 7, 2, 1, 7]
      self.assertAllEqual(expected_ids, ids)
      self.assertAllEqual(expected_lens, lens)

      p = target_sequence_sampler.TargetSequenceSampler.Params().Set(
          name='bsh',
          target_seq_len=tgt_len,
          temperature=0.2,
          num_hyps_per_beam=4)
      seq_sampler = p.Instantiate()
      decoder_output = seq_sampler.Sample(theta, encoder_outputs, random_seed,
                                          InitBeamSearchCallBack,
                                          PreBeamSearchStepCallback,
                                          PostBeamSearchStepCallback)

      ids, lens = self.evaluate([
          decoder_output.ids,
          tf.reduce_sum(1 - decoder_output.paddings, 1),
      ])
      print(np.array_repr(ids))
      print(np.array_repr(lens))
      expected_ids = [[0, 0, 0, 0, 0, 0, 9], [0, 0, 11, 7, 1, 0, 7],
                      [7, 7, 7, 7, 7, 6, 7], [0, 0, 3, 0, 0, 0, 0],
                      [9, 8, 8, 8, 8, 8, 9], [2, 2, 2, 2, 2, 2, 2],
                      [2, 2, 2, 2, 2, 2, 2], [6, 5, 6, 6, 6, 1, 6]]
      expected_lens = [7, 7, 7, 7, 7, 1, 1, 7]
      self.assertAllEqual(expected_ids, ids)
      self.assertAllEqual(expected_lens, lens)
示例#11
0
 def InitBeamSearchCallBack(unused_theta, unused_encoder_outputs,
                            num_hyps_per_beam):
   self.assertEqual(1, num_hyps_per_beam)
   logits = tf.zeros((batch_size, vocab_size), dtype=tf.float32)
   return (py_utils.NestedMap(log_probs=logits),
           py_utils.NestedMap(step=tf.constant(0)))
示例#12
0
  def testTargetSequenceSamplerWithEOC(self, use_recurrent):
    with self.session(use_gpu=False):
      np.random.seed(9384758)
      tf.random.set_seed(8274758)
      vocab_size = 4
      src_len = 5
      tgt_len = 20
      batch_size = 2
      p = target_sequence_sampler.TargetSequenceSampler.Params().Set(
          name='bsh',
          target_seq_len=tgt_len,
          target_eoc_id=0,
          use_recurrent=use_recurrent)
      seq_sampler = p.Instantiate()

      def InitBeamSearchCallBack(unused_theta, unused_encoder_outputs,
                                 num_hyps_per_beam):
        self.assertEqual(1, num_hyps_per_beam)
        logits = tf.zeros((batch_size, vocab_size), dtype=tf.float32)
        is_last_chunk = tf.constant(False, shape=[batch_size])
        result = py_utils.NestedMap(
            log_probs=logits, is_last_chunk=is_last_chunk)
        states = py_utils.NestedMap(
            step=tf.constant(0),
            src_step=tf.zeros([batch_size], dtype=tf.int32))
        return result, states

      def PreBeamSearchStepCallback(unused_theta, unused_encoder_outputs,
                                    unused_step_ids, states, num_hyps_per_beam):
        self.assertEqual(1, num_hyps_per_beam)
        logits = tf.random.stateless_normal([batch_size, vocab_size],
                                            seed=[8273747, 9])
        # Make it never predict <eos>.
        logits -= tf.one_hot([p.target_eos_id], vocab_size, 1e30)
        is_last_chunk = tf.equal(states.src_step, src_len - 1)
        result = py_utils.NestedMap(
            log_probs=logits, is_last_chunk=is_last_chunk)
        return result, states

      def PostBeamSearchStepCallback(unused_theta, unused_encoder_outputs,
                                     new_step_ids, states):
        return py_utils.NestedMap(
            step=states.step + 1,
            src_step=states.src_step + tf.cast(
                tf.equal(new_step_ids, p.target_eoc_id), dtype=tf.int32))

      src_enc = tf.random.stateless_normal([src_len, batch_size, 8],
                                           seed=[982774838, 9])
      src_enc_padding = tf.constant(
          [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]],
          dtype=tf.float32)
      encoder_outputs = py_utils.NestedMap(
          encoded=src_enc, padding=src_enc_padding)

      theta = py_utils.NestedMap()
      random_seed = tf.constant(123)
      decoder_output = seq_sampler.Sample(
          theta, encoder_outputs, random_seed, InitBeamSearchCallBack,
          PreBeamSearchStepCallback, PostBeamSearchStepCallback)

      ids, lens = self.evaluate([
          decoder_output.ids,
          tf.reduce_sum(1 - decoder_output.paddings, 1),
      ])
      print(np.array_repr(ids))
      print(np.array_repr(lens))
      expected_ids = [
          [0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
          [0, 0, 3, 3, 1, 0, 3, 0, 1, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
      ]
      expected_lens = [5, 11]
      self.assertAllEqual(expected_ids, ids)
      self.assertAllEqual(expected_lens, lens)

      # Now do the same, except with use_stop_fn=True.
      p = target_sequence_sampler.TargetSequenceSampler.Params().Set(
          name='bsh', target_seq_len=tgt_len, target_eoc_id=0, use_stop_fn=True)
      seq_sampler = p.Instantiate()
      decoder_output = seq_sampler.Sample(theta, encoder_outputs, random_seed,
                                          InitBeamSearchCallBack,
                                          PreBeamSearchStepCallback,
                                          PostBeamSearchStepCallback)

      ids, lens = self.evaluate([
          decoder_output.ids,
          tf.reduce_sum(1 - decoder_output.paddings, 1),
      ])
      print(np.array_repr(ids))
      print(np.array_repr(lens))
      expected_ids = [
          [0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
          [0, 0, 3, 3, 1, 0, 3, 0, 1, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
      ]
      expected_lens = [5, 11]
      self.assertAllEqual(expected_ids, ids)
      self.assertAllEqual(expected_lens, lens)
示例#13
0
    def _runBeamSearchOpHelper(self,
                               hyp_size,
                               num_beams,
                               seq_len,
                               init_best_score,
                               probs,
                               init_atten_probs,
                               atten_probs,
                               beam_size=3.0,
                               ensure_full_beam=False,
                               force_eos_in_last_step=False,
                               local_eos_threshold=-100.0,
                               independence=True,
                               use_v2=True):
        eos_id = 2
        num_hyps_per_beam = hyp_size / num_beams

        best_scores = tf.zeros([num_beams])
        cumulative_scores = tf.zeros([hyp_size])
        scores = tf.zeros([seq_len, hyp_size])
        hyps = tf.zeros([seq_len, hyp_size], dtype=tf.int32)
        prev_hyps = tf.zeros([seq_len, hyp_size], dtype=tf.int32)
        done_hyps = tf.constant('', shape=[seq_len, hyp_size], dtype=tf.string)
        best_scores += init_best_score
        beam_done = tf.zeros([num_beams], dtype=tf.bool)

        for i, prob in enumerate(probs):
            if use_v2:
                (best_scores, cumulative_scores, scores, hyps, prev_hyps,
                 done_hyps, atten_probs, beam_done,
                 done) = ops.beam_search_step(
                     prob,
                     init_atten_probs,
                     best_scores,
                     cumulative_scores,
                     scores,
                     hyps,
                     prev_hyps,
                     done_hyps,
                     atten_probs,
                     beam_done, [],
                     i,
                     eos_id=eos_id,
                     beam_size=beam_size,
                     ensure_full_beam=ensure_full_beam,
                     num_hyps_per_beam=num_hyps_per_beam,
                     valid_eos_max_logit_delta=0.1,
                     force_eos_in_last_step=force_eos_in_last_step,
                     local_eos_threshold=local_eos_threshold,
                     beam_independence=independence)
            else:
                (best_scores, cumulative_scores, scores, hyps, prev_hyps,
                 done_hyps, atten_probs,
                 done) = ops.beam_search_step_deprecated(
                     prob,
                     init_atten_probs,
                     best_scores,
                     cumulative_scores,
                     scores,
                     hyps,
                     prev_hyps,
                     done_hyps,
                     atten_probs, [],
                     i,
                     eos_id=eos_id,
                     beam_size=beam_size,
                     ensure_full_beam=ensure_full_beam,
                     num_hyps_per_beam=num_hyps_per_beam,
                     valid_eos_max_logit_delta=0.1,
                     force_eos_in_last_step=force_eos_in_last_step,
                     local_eos_threshold=local_eos_threshold)

        with self.session(use_gpu=False):
            (best_scores, cumulative_scores, scores, hyps, prev_hyps,
             done_hyps, atten_probs, done, beam_done) = self.evaluate([
                 best_scores, cumulative_scores, scores, hyps, prev_hyps,
                 done_hyps, atten_probs, done, beam_done
             ])

        return (best_scores, cumulative_scores, scores, hyps, prev_hyps,
                done_hyps, atten_probs, done, beam_done)
示例#14
0
    def testForwardPass(self):
        with self.session(use_gpu=False) as sess:
            bs = 2
            sl = 21
            tf.set_random_seed(8372749040)
            p = self._EncoderParams()
            mt_enc = encoder.TransformerEncoder(p)
            batch = py_utils.NestedMap()
            batch.ids = tf.constant(
                np.random.randint(low=0,
                                  high=63,
                                  size=[bs, sl],
                                  dtype=np.int32))
            batch.paddings = tf.zeros([bs, sl])
            out = mt_enc.FPropDefaultTheta(batch)
            enc_out_sum = tf.reduce_sum(out.encoded, 0)
            emb_out_sum = tf.reduce_sum(out.embedded_inputs, 0)
            enc_padding = out.padding

            tf.global_variables_initializer().run()
            actual_enc_out, actual_enc_out_sum, actual_emb_out_sum, \
                actual_padding = sess.run(
                    [out.encoded, enc_out_sum, emb_out_sum, enc_padding])

            # pyformat: disable
            # pylint: disable=bad-whitespace
            expected_enc_out = [[
                49.45291519, -31.5743885, 39.43684387, -47.67513275,
                35.39754105, 14.41970444, 29.58752823, -43.06747055,
                24.09403419, -7.62717247, 18.48112106, 20.42408371, 5.1519866,
                -19.66542244, 29.81095314, 56.90407944
            ],
                                [
                                    55.26333618, -30.39743614, 29.68314743,
                                    -37.61392975, 43.02292252, 13.88345146,
                                    15.73033905, -24.68696213, 24.70776558,
                                    -29.18026161, 15.41469955, 27.77672577,
                                    -5.36326742, -22.78984642, 22.15843391,
                                    22.7237072
                                ]]
            expected_emb_out_sum = [[
                3.11785889, 1.33086884, -1.96904886, -4.81911993, 1.25389254,
                1.52582073, 0.79906291, 4.07078457, -1.20546532, -2.97308111,
                0.22460097, 2.99702668, -2.29453254, 6.06631422, 1.68836212,
                5.35728741
            ],
                                    [
                                        1.41723049, -1.39409399, -1.49569404,
                                        -0.24654561, 1.09658146, 4.51638842,
                                        2.72023368, -0.45651400, 3.46091199,
                                        -0.43925080, 1.02091551, 3.89704037,
                                        1.87841535, -0.27947778, -0.91630745,
                                        1.34230828
                                    ]]
            # pylint: enable=bad-whitespace
            # pyformat: enable
            self.assertAllEqual(actual_enc_out.shape, [sl, bs, p.model_dim])
            self.assertAllEqual(actual_padding.shape, [sl, bs])
            self.assertAllClose(expected_enc_out,
                                actual_enc_out_sum,
                                rtol=1e-05,
                                atol=1e-05)
            self.assertAllClose(expected_emb_out_sum,
                                actual_emb_out_sum,
                                rtol=1e-05,
                                atol=1e-05)
    def testCustomStepIds(self):
        with self.session(use_gpu=False) as sess:
            np.random.seed(9384758)
            tf.set_random_seed(8274758)
            vocab_size = 12
            src_len = 5
            tgt_len = 7
            num_hyps_per_beam = 3
            src_batch_size = 2
            tgt_batch_size = src_batch_size * num_hyps_per_beam
            p = beam_search_helper.BeamSearchHelper.Params().Set(
                name='bsh', target_seq_len=tgt_len)
            bs_helper = p.Instantiate()

            def InitBeamSearchState(unused_theta, unused_encoder_outputs,
                                    unused_num_hyps_per_beam):
                atten_probs = tf.constant(
                    np.random.normal(size=(tgt_batch_size, src_len)),
                    dtype=tf.float32)
                return (py_utils.NestedMap({
                    'log_probs':
                    tf.zeros([tgt_batch_size, vocab_size]),
                    'atten_probs':
                    atten_probs,
                    'step_ids':
                    tf.zeros([tgt_batch_size, 1], dtype=tf.int32)
                }), py_utils.NestedMap({'atten_probs': atten_probs}))

            def PreBeamSearchStepCallback(unused_theta, unused_encoder_outputs,
                                          unused_step_ids, states,
                                          unused_num_hyps_per_beam):
                atten_probs = tf.identity(states.atten_probs)
                logits = tf.random_normal([tgt_batch_size, vocab_size],
                                          seed=8273747)
                return (py_utils.NestedMap({
                    'atten_probs': atten_probs,
                    'log_probs': logits
                }), states)

            def PostBeamSearchStepCallback(unused_theta,
                                           unused_encoder_outputs,
                                           unused_new_step_ids, states):
                return states

            src_enc = tf.random_normal([src_len, src_batch_size, 8],
                                       seed=982774838)
            src_enc_padding = tf.constant(
                [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]],
                dtype=tf.float32)
            encoder_outputs = py_utils.NestedMap(encoded=src_enc,
                                                 padding=src_enc_padding)

            theta = py_utils.NestedMap()
            decoder_output = bs_helper.BeamSearchDecode(
                theta, encoder_outputs, num_hyps_per_beam, InitBeamSearchState,
                PreBeamSearchStepCallback, PostBeamSearchStepCallback)

            topk_ids, topk_lens, topk_scores = sess.run([
                decoder_output.topk_ids, decoder_output.topk_lens,
                decoder_output.topk_scores
            ])
            print(np.array_repr(topk_ids))
            print(np.array_repr(topk_lens))
            print(np.array_repr(topk_scores))
            expected_topk_ids = [[4, 3, 4, 3, 2, 0, 0], [4, 3, 11, 2, 0, 0, 0],
                                 [4, 3, 6, 2, 0, 0, 0], [6, 0, 4, 6, 6, 11, 2],
                                 [6, 0, 4, 6, 1, 2, 0], [6, 0, 4, 6, 6, 2, 0]]
            expected_topk_lens = [5, 4, 4, 7, 6, 6]
            expected_topk_scores = [[8.27340603, 6.26949024, 5.59490776],
                                    [9.74691486, 8.46679497, 7.14809656]]
            self.assertEqual(expected_topk_ids, topk_ids.tolist())
            self.assertEqual(expected_topk_lens, topk_lens.tolist())
            self.assertAllClose(expected_topk_scores, topk_scores)
示例#16
0
 def Pos(x):
     return tf.maximum(tf.constant(1e-8, x.dtype), x)
    def testGreedySearchHelper(self):
        with self.session(use_gpu=False) as sess:
            np.random.seed(9384758)
            tf.set_random_seed(8274758)
            vocab_size = 12
            src_len = 5
            tgt_len = 7
            src_batch_size = 2
            tgt_batch_size = src_batch_size
            p = beam_search_helper.GreedySearchHelper.Params().Set(
                name='gsh', target_seq_len=tgt_len)
            gs_helper = p.Instantiate()

            def InitGreedySearchState(unused_theta, unused_encoder_outputs,
                                      unused_num_hyps_per_beam):
                atten_probs = tf.constant(
                    np.random.normal(size=(tgt_batch_size, src_len)),
                    dtype=tf.float32)
                return (py_utils.NestedMap({
                    'log_probs':
                    tf.zeros([tgt_batch_size, vocab_size]),
                    'atten_probs':
                    atten_probs,
                }), py_utils.NestedMap({'atten_probs': atten_probs}))

            def PreGreedySearchStepCallback(unused_theta,
                                            unused_encoder_outputs,
                                            unused_step_ids, states,
                                            unused_num_hyps_per_beam):
                atten_probs = tf.identity(states.atten_probs)
                logits = tf.random_normal([tgt_batch_size, vocab_size],
                                          seed=8273747)
                return (py_utils.NestedMap({
                    'atten_probs': atten_probs,
                    'log_probs': logits
                }), states)

            def PostGreedySearchStepCallback(unused_theta,
                                             unused_encoder_outputs,
                                             unused_new_step_ids, states):
                return states

            src_enc = tf.random_normal([src_len, src_batch_size, 8],
                                       seed=982774838)
            src_enc_padding = tf.constant(
                [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]],
                dtype=tf.float32)
            encoder_outputs = py_utils.NestedMap(encoded=src_enc,
                                                 padding=src_enc_padding)

            theta = py_utils.NestedMap()
            (final_hyp_ids, final_hyp_lens,
             final_done_hyps) = gs_helper.GreedySearchDecode(
                 theta, encoder_outputs, InitGreedySearchState,
                 PreGreedySearchStepCallback, PostGreedySearchStepCallback)

            (final_hyp_ids, final_hyp_lens, final_done_hyps) = sess.run(
                [final_hyp_ids, final_hyp_lens, final_done_hyps])

            print(np.array_repr(final_hyp_ids))
            print(np.array_repr(final_hyp_lens))
            print(np.array_repr(final_done_hyps))

            expected_hyp_ids = [[2, 2, 6, 7, 1, 9, 4], [3, 9, 3, 9, 6, 5, 10]]
            expected_hyp_lens = [1, 7]
            expected_done_hyps = [True, False]
            self.assertEqual(expected_hyp_ids, final_hyp_ids.tolist())
            self.assertEqual(expected_hyp_lens, final_hyp_lens.tolist())
            self.assertEqual(expected_done_hyps, final_done_hyps.tolist())
示例#18
0
    def testDecoderFPropWithAdapters(self):
        """Create decoder with adapters, and verify that FProp runs."""
        with self.session(use_gpu=False):
            tf.random.set_seed(8372749040)

            params = self._DecoderParams(
                num_rnn_layers=2,
                vn_config=py_utils.VariationalNoiseParams(None,
                                                          True,
                                                          False,
                                                          seed=12345))
            params.rnn_cell_dim = 3
            params.adapter_layer_tpl.Set(
                bottleneck_dim=4,
                num_tasks=16,
                projection_params_init=py_utils.WeightInit.Gaussian(0.01))
            params.adapter_task_id_field = 'domain_ids'

            dec = params.Instantiate()
            src_seq_len = 5
            src_enc = tf.random.normal([src_seq_len, 2, 8],
                                       seed=982774838,
                                       dtype=py_utils.FPropDtype(params))
            src_enc_padding = tf.constant(
                [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]],
                dtype=py_utils.FPropDtype(params))
            domain_ids = tf.constant(
                np.random.randint(low=0, high=16, size=[2]))
            encoder_outputs = py_utils.NestedMap(encoded=src_enc,
                                                 padding=src_enc_padding,
                                                 domain_ids=domain_ids)
            # shape=[4, 5]
            target_ids = tf.transpose(
                tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 15],
                             [5, 6, 7, 8], [10, 5, 2, 5]],
                            dtype=tf.int32))
            # shape=[4, 5]
            target_labels = tf.transpose(
                tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 13],
                             [5, 7, 8, 10], [10, 5, 2, 4]],
                            dtype=tf.int32))
            # shape=[4, 5]
            target_paddings = tf.transpose(
                tf.constant([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0],
                             [0, 1, 0, 0], [1, 1, 1, 0]],
                            dtype=py_utils.FPropDtype(params)))
            target_transcripts = tf.constant(
                ['abcd', 'bcde', 'klmp', 'fghi', 'kfcf'])
            target_weights = 1.0 - target_paddings
            # ids/labels/weights/paddings are all in [batch, time] shape.
            targets = py_utils.NestedMap({
                'ids': target_ids,
                'labels': target_labels,
                'weights': target_weights,
                'paddings': target_paddings,
                'transcripts': target_transcripts,
            })
            decoder_outputs = dec.FPropDefaultTheta(encoder_outputs, targets)
            metrics = decoder_outputs.metrics
            per_sequence_loss = decoder_outputs.per_sequence['loss']

            self.assertIn('fraction_of_correct_next_step_preds', metrics)
            self.evaluate(tf.global_variables_initializer())
            metrics_val, per_sequence_loss_val = self.evaluate(
                [metrics, per_sequence_loss])
            tf.logging.info('metrics=%s, per_sequence_loss=%s', metrics_val,
                            per_sequence_loss_val)

            self.assertEqual(metrics_val['loss'], metrics_val['log_pplx'])
            # Target batch size is 4. Therefore, we should expect 4 here.
            self.assertEqual(per_sequence_loss_val.shape, (4, ))
示例#19
0
 def Value(self):
     return tf.constant(self.params.value, self.params.dtype)
示例#20
0
文件: car_lib.py 项目: wzhang1/lingvo
def FarthestPointSampler(points,
                         padding,
                         num_sampled_points,
                         precomputed_squared_distance=None,
                         num_seeded_points=0,
                         random_seed=None):
  """Samples num_sampled_points from points using farthest point sampling.

  Algorithm:
  1. Start by selecting a random point and adding to a selected set.
  2. For all remaining points, find the furthest point from those selected.
  3. Add furthest point to selected.
  4. Repeat 2-3 until num_sampled_points are selected.

  More details at https://en.wikipedia.org/wiki/Farthest-first_traversal

  This output of this function can be used with tf.array_ops.batch_gather to
  extract the desired points, for example:
  tf.array_ops.batch_gather(points, sampled_idx)

  Args:
    points: floating point tf.Tensor of shape [N, P1, dims]
    padding: A floating point tf.Tensor of shape [N, P1] with 0 if the point is
      real, and 1 otherwise.
    num_sampled_points: integer number of points to sample.
    precomputed_squared_distance: optional tf.Tensor of shape [N, P1, P1] of
      distances between each point. if None, distances will be computed on the
      fly.
    num_seeded_points: If num_seeded_points > 0, then the first
      num_seeded_points in points are considered to be seeded in the FPS
      sampling. Note that we assume that these points are *not* padded, and do
      not check padding when seeding them.
    random_seed: optional integer random seed to use with all the random ops.

  Returns:
    A tuple of tf.Tensors (sampled_idx, closest_idx) of types
    (tf.int32, tf.int32).

    sampled_idx is of shape [N, num_sampled_points] representing the indices
    selected using the sampler. This will have range of [0, P1].

    closest_idx is of shape [N, P1] representing the indices of the closest
    sampled points for each input point. closest_idx is used in PCNN as part of
    the pooling operation: each point is assigned to the closest sampled point
    and a max is taken over them. This will have a range of [0, P2] with the
    index of the closest sampled point that remains.
  """
  points = py_utils.HasRank(points, 3)
  batch_size, num_points, dims = py_utils.GetShape(points, 3)

  points = py_utils.with_dependencies(
      [py_utils.assert_greater_equal(num_points, num_sampled_points)], points)

  # Add a tiny bit of noise to the distance matrix or points so all
  # points are unique. This will also ensure true repeated points
  # like padded points are only selected after all valid points are selected.
  if precomputed_squared_distance is not None:
    precomputed_squared_distance = py_utils.HasShape(
        precomputed_squared_distance, [batch_size, num_points, num_points])
    precomputed_squared_distance += tf.random.uniform(
        (batch_size, num_points, 1),
        minval=1e-6,
        maxval=1e-5,
        dtype=tf.float32,
        seed=random_seed)
  else:
    points += tf.random.uniform((batch_size, num_points, dims),
                                minval=1e-6,
                                maxval=1e-5,
                                dtype=tf.float32,
                                seed=random_seed)

  # TensorArray to store the sampled indices in the loop.
  sampled_idx = tf.TensorArray(tf.int32, num_sampled_points)

  # Initialize distance_to_selected to inf for all points.
  distance_to_selected = float('inf') * tf.ones((batch_size, num_points))

  # For tracking the index to the closest selected point.
  closest_idx = tf.zeros((batch_size, num_points), dtype=tf.int32)

  # Current loop index counter.
  curr_idx = tf.constant(0, dtype=tf.int32)

  # Get number of valid points (1 is padded, so num_points - num_padded).
  num_valid_points = tf.cast(
      tf.cast(num_points, dtype=tf.float32) - tf.reduce_sum(padding, axis=1),
      dtype=tf.int32)

  def _BodyFn(curr_idx, distance_to_selected, sampled_idx, closest_idx):
    """Loop body for farthest point sampler."""

    def _GetRandomRealPoint():
      """Select the first point.

      For the first point, we want any random real (non padded) point, so we
      create a random values per point, and then set all padded ones to
      some large value (more than the maxval). We then take the min per batch
      element to get the first points.

      Returns:
        Tensor containing the index of a random point selected for each example
        in the batch.
      """
      random_values = tf.random.uniform((batch_size, num_points),
                                        minval=0,
                                        maxval=1,
                                        dtype=tf.float32,
                                        seed=random_seed)
      random_values = tf.where(
          tf.equal(padding, 0.0), random_values, padding * 10)
      return tf.argmin(random_values, axis=1, output_type=tf.int32)

    def _GetFurthestPoint():
      """Get point that is furthest from those already selected.

      We also bias the sampling towards real points by setting the distance
      to padded points negative until we are out of real points.

      Returns:
        Tensor containing the index of the next farthest point selected for each
        example in the batch.
      """
      # Set padded points distance to negative so they aren't selected.
      padding_masked_distance_to_selected = tf.where(
          tf.equal(padding, 0.0), distance_to_selected, -1.0 * tf.ones(
              (batch_size, num_points), dtype=tf.float32))
      # But only do this when we still have valid points left.
      padding_masked_distance_to_selected = tf.where(
          tf.less(curr_idx, num_valid_points),
          padding_masked_distance_to_selected, distance_to_selected)
      return tf.argmax(
          padding_masked_distance_to_selected, axis=-1, output_type=tf.int32)

    def _GetSeededPoint():
      """Select a seeded point.

      Seeded points are assumed to be at the beginning of the original points.

      Returns:
        Tensor containing the index of the next seeded point to select for each
        example in the batch.
      """
      return tf.ones((batch_size,), dtype=tf.int32) * curr_idx

    # Select indices for this loop iteration.
    def _Seeded():
      return tf.cond(
          tf.less(curr_idx, num_seeded_points), _GetSeededPoint,
          _GetFurthestPoint)

    def _Real():
      return tf.cond(
          tf.equal(curr_idx, 0), _GetRandomRealPoint, _GetFurthestPoint)

    new_selected = tf.cond(tf.greater(num_seeded_points, 0), _Seeded, _Real)
    sampled_idx = sampled_idx.write(curr_idx, new_selected)

    # Extract the distance to the latest point selected to update
    # distance_to_selected.
    new_selected_gather_idx = tf.stack([tf.range(batch_size), new_selected],
                                       axis=1)
    if precomputed_squared_distance is not None:
      new_distance = tf.gather_nd(precomputed_squared_distance,
                                  new_selected_gather_idx)
    else:
      new_points = tf.reshape(
          tf.gather_nd(points, new_selected_gather_idx), [batch_size, 1, dims])
      new_distance = tf.reshape(
          SquaredDistanceMatrix(points, new_points), [batch_size, num_points])

    is_newly_closest = tf.less(new_distance, distance_to_selected)
    distance_to_selected = tf.minimum(distance_to_selected, new_distance)

    # Track the index to the closest selected point.
    new_selected_tiled = tf.tile([[curr_idx]], [batch_size, num_points])
    closest_idx = tf.cond(
        tf.equal(curr_idx, 0),
        # At the first loop iteration, the init points are the closest.
        lambda: new_selected_tiled,
        # Otherwise, update with the new points based on the distances.
        lambda: tf.where(is_newly_closest, new_selected_tiled, closest_idx))
    return curr_idx + 1, distance_to_selected, sampled_idx, closest_idx

  _, _, sampled_idx, closest_idx = tf.while_loop(
      lambda curr_idx, *args: tf.less(curr_idx, num_sampled_points),
      _BodyFn,
      loop_vars=(curr_idx, distance_to_selected, sampled_idx, closest_idx),
      back_prop=False,
      maximum_iterations=num_sampled_points)

  sampled_idx = sampled_idx.stack()  # num_sampled_points x n
  sampled_idx = tf.transpose(sampled_idx, [1, 0])

  if isinstance(batch_size, int) and isinstance(num_sampled_points, int):
    sampled_idx.set_shape((batch_size, num_sampled_points))

  return sampled_idx, closest_idx
示例#21
0
    def GreedySearchDecode(self,
                           theta,
                           encoder_outputs,
                           init_beam_search_state=None,
                           pre_beam_search_step_callback=None,
                           post_beam_search_step_callback=None,
                           max_steps=None):
        """Performs greedy-search based decoding.

    Args:
      theta: A NestedMap object containing weights' values of the decoder layer
        and its children layers.
      encoder_outputs: A NestedMap containing encoder outputs to be passed to
        the callbacks.
      init_beam_search_state: The `InitBeamSearchState` callback. Please refer
        to the class header comments for more details.
      pre_beam_search_step_callback: The `PreBeamSearchStepCallback` callback.
        Please refer to the class header comments for more details.
      post_beam_search_step_callback: The `PostBeamSearchStepCallback` callback.
        Please refer to the class header comments for more details.
      max_steps: maximum beam search steps. If None, use
        self.params.target_seq_len.

    Returns:
      A tuple (hyp_ids, hyp_lens, done_hyps). Note that num_hyps is same as
      src_batch_size.

        - hyp_ids: [num_hyps, max_step]. Hyps end with <eos> token if the <eos>
          token is encountered during search.
        - hyp_lens: [num_hyps].
        - done_hyps: [num_hyps], whether or not an eos is encountered.
    """
        p = self.params
        if max_steps is None:
            max_steps = p.target_seq_len

        initial_results, other_states = init_beam_search_state(
            theta,
            encoder_outputs,
            1  # num_hyps_per_beam
        )

        num_hyps = tf.shape(initial_results.log_probs)[0]

        if 'step_ids' in initial_results:
            # [num_hyps, 1]
            step_ids = tf.ensure_shape(initial_results.step_ids, [None, 1])
        else:
            step_ids = tf.fill([num_hyps, 1],
                               tf.constant(p.target_sos_id, dtype=tf.int32))

        cur_step = tf.constant(0, dtype=tf.int32)
        done_hyps = inplace_ops.empty(shape=[num_hyps],
                                      dtype=tf.bool,
                                      init=True,
                                      name='done_hyps')
        hyp_lens = inplace_ops.empty(shape=[num_hyps],
                                     dtype=tf.int32,
                                     init=True,
                                     name='hyp_lens')
        hyp_ids = inplace_ops.empty(shape=[max_steps, num_hyps],
                                    dtype=tf.int32,
                                    init=True,
                                    name='hyp_ids')

        def LoopContinue(cur_step, unused_step_ids, unused_hyp_ids,
                         unused_hyp_lens, done_hyps, unused_other_states_list):
            return tf.math.logical_and(
                cur_step < max_steps,
                tf.math.logical_not(tf.reduce_all(done_hyps)))

        def LoopBody(cur_step, step_ids, hyp_ids, hyp_lens, done_hyps,
                     other_states_list):
            (cur_step, new_step_ids, hyp_ids, hyp_lens, done_hyps,
             new_other_states) = self._GreedySearchStep(
                 theta, encoder_outputs, cur_step, step_ids, hyp_ids, hyp_lens,
                 done_hyps, other_states.Pack(other_states_list),
                 pre_beam_search_step_callback, post_beam_search_step_callback)
            return (cur_step, new_step_ids, hyp_ids, hyp_lens, done_hyps,
                    new_other_states.Flatten())

        flat_other_states = other_states.Flatten()
        _, _, final_hyp_ids, final_hyp_lens, final_done_hyps, _ = tf.while_loop(
            LoopContinue,
            LoopBody,
            loop_vars=(cur_step, step_ids, hyp_ids, hyp_lens, done_hyps,
                       flat_other_states),
            parallel_iterations=10,
            back_prop=False,
            swap_memory=False,
            shape_invariants=(tf.TensorShape(cur_step.get_shape()),
                              tf.TensorShape(step_ids.get_shape()),
                              tf.TensorShape(hyp_ids.get_shape()),
                              tf.TensorShape(hyp_lens.get_shape()),
                              tf.TensorShape(done_hyps.get_shape()),
                              _GetShapes(flat_other_states, none_shapes=True)))

        # transpose hyp_ids so it matches BeamSearchDecode's output
        final_hyp_ids = tf.transpose(final_hyp_ids)
        return final_hyp_ids, final_hyp_lens, final_done_hyps
示例#22
0
  def try_apply_dense(self, grad, var):
    assert grad is not None

    cond = tf.constant(True)
    is_finite_checks = []
    stats = {}

    grad_dtype = var.dtype  # TODO(lepikhin): add to params
    grad = tf.cast(grad, grad_dtype)
    factored_dims = self._factored_dims(var.shape.as_list())
    if factored_dims:
      vr = self.get_slot(var, 'vr')
      vc = self.get_slot(var, 'vc')
    else:
      v = self.get_slot(var, 'v')
    if self._beta1:
      m = self.get_slot(var, 'm')

    def _Upd(c, k, x):
      stats[k] = x
      is_finite_checks.append(tf.reduce_all(tf.math.is_finite(x)))
      return c

    with tf.variable_scope(var.name[:-2] + '/Adafactor'):
      grad_squared = tf.math.square(grad) + tf.cast(self._epsilon1, grad_dtype)
      cond = _Upd(cond, 'grad_squared', grad_squared)  # 0 (factored)
      decay_rate = tf.cast(self._decay_rate, var.dtype)
      old_val = tf.identity(var)  # TODO(lepikhin): introduce gradient dtype
      assert self._multiply_by_parameter_scale
      if self._multiply_by_parameter_scale:
        parameter_scale = self._parameter_scale(old_val)
        cond = _Upd(cond, 'parameter_scale', parameter_scale)  # 1 (factored)
        update_scale = self._parameter_scale(old_val) * tf.cast(
            self._learning_rate, grad_dtype)

      else:
        update_scale = self._learning_rate
      mixing_rate = tf.cast(1.0 - decay_rate, grad_dtype)
      update_scale = tf.cast(update_scale, grad_dtype)
      if factored_dims:
        d0, d1 = factored_dims
        vr_axis, vc_axis = d0, d1
        grad_squared_row_mean = tf.reduce_mean(grad_squared, axis=vr_axis)
        grad_squared_col_mean = tf.reduce_mean(grad_squared, axis=vc_axis)
        # new_vr = (decay_rate * vr + mixing_rate * grad_squared_row_mean)
        new_vr = vr * decay_rate + grad_squared_row_mean * mixing_rate
        # new_vc = (decay_rate * vc + mixing_rate * grad_squared_col_mean)
        new_vc = vc * decay_rate + grad_squared_col_mean * mixing_rate
        cond = _Upd(cond, 'new_vr', new_vr)  # 2 (factored)
        cond = _Upd(cond, 'new_vc', new_vc)  # 3 (factored)
        # vr_update = _Wrap(tf.assign, vr, new_vr)
        # vc_update = _Wrap(tf.assign, vc, new_vc)
        # updates.extend([vr_update, vc_update])
        long_term_mean = tf.reduce_mean(new_vr, -1, keepdims=True)
        r_factor = tf.math.rsqrt(new_vr / long_term_mean)
        c_factor = tf.math.rsqrt(new_vc)
        mult = tf.expand_dims(r_factor, vr_axis) * tf.expand_dims(
            c_factor, vc_axis)
        cond = _Upd(cond, 'mult', mult)  # 4 (factored)
        x = grad * mult
      else:
        new_v = v * decay_rate + grad_squared * mixing_rate
        cond = _Upd(cond, 'new_v', new_v)
        # v_update = _Wrap(tf.assign, v, new_v)
        # updates.append(v_update)
        x = grad * tf.math.rsqrt(new_v)

      assert self._clipping_threshold is not None

      if self._clipping_threshold is not None:
        clipping_denom = tf.maximum(
            tf.constant(1.0, grad_dtype),
            _ReduceRms(x) / tf.constant(self._clipping_threshold, grad_dtype))
        x /= clipping_denom
      cond = _Upd(cond, 'x', x)
      subtrahend = x * update_scale
      if self._beta1:
        new_m = (
            m * tf.constant(self._beta1, dtype=grad_dtype) +
            subtrahend * tf.constant(1.0 - self._beta1, dtype=grad_dtype))
        subtrahend = new_m
        cond = _Upd(cond, 'new_m', new_m)
        # updates.append(_Wrap(tf.assign, m, new_m))

      # It is critical to use assign_sub instead of tf.assign(var - subtrahend)
      #  for the case of bfloat16 activations, so as to avoid repeatedly
      #  rounding the slice value, which results in poor quality.
      cond = _Upd(cond, 'subtrahend', subtrahend)  # 5 (factored)

      # var_update = _Wrap(tf.assign_sub, var, subtrahend)
      # updates.append(var_update)

      return is_finite_checks, stats
示例#23
0
    def FProp(self, theta, input_batch):
        """Embeds source ids and transforms with TransformerStack.

    Args:
      theta: A `.NestedMap` object containing weights' values of this
        layer and its children layers.
      input_batch: A `.NestedMap` with fields:

        - ids: The inputs tensor. It is expected to be of shape [batch, time].
        - paddings: The paddings tensor. Expected shape [batch, time].
        - task_ids: If p.task_emb is provided, must contain per-token task
            ids of shape [batch, time].

    Returns:
      A NestedMap containing

      - encoded: The encoded features, either a tensor of shape
        [time, batch, depth], or a list of tensors if is_transparent is set in
        transformer_stack.
      - padding: of shape [time, batch]
      - segment_id: [time, batch] if packed inputs are supported by the model
        (and all layers), or None otherwise.
      - embedded_inputs: [time, batch, depth] embedded inputs tokens without
        positional encodings.
    """

        p = self.params
        with tf.name_scope(p.name):
            src_segment_id = None
            src_segment_pos = None
            input_ids = py_utils.with_dependencies([
                py_utils.assert_shape_match(tf.shape(input_batch.ids),
                                            tf.shape(input_batch.paddings)),
                py_utils.assert_equal(tf.rank(input_batch.ids), 2)
            ], input_batch.ids)

            if (not py_utils.use_tpu()
                    and tf.flags.FLAGS.transformer_encoder_truncates_inputs):
                max_seq_length = tf.cast(
                    tf.reduce_max(tf.reduce_sum(1.0 - input_batch.paddings,
                                                1)), tf.int32)
                paddings = py_utils.with_dependencies([
                    py_utils.assert_equal(
                        tf.constant(True, tf.bool),
                        tf.reduce_all(
                            input_batch.paddings[:, max_seq_length:] > 0.5))
                ], input_batch.paddings)
                input_ids = input_ids[:, :max_seq_length]
                paddings = paddings[:, :max_seq_length]
                if p.packed_input:
                    src_segment_id = input_batch.segment_ids[:, :
                                                             max_seq_length]
                    src_segment_pos = input_batch.segment_pos[:, :
                                                              max_seq_length]
            else:
                paddings = input_batch.paddings
                if p.packed_input:
                    src_segment_id = input_batch.segment_ids
                    src_segment_pos = input_batch.segment_pos

            max_time = tf.shape(input_ids)[1]

            # Input token embeddings + positional embeddings
            input_embs = self.token_emb.EmbLookup(theta.token_emb,
                                                  tf.reshape(input_ids, [-1]))
            input_embs = tf.reshape(input_embs,
                                    [-1, max_time, p.token_emb.embedding_dim])
            # [time, batch, dim]
            orig_input_embs = tf.transpose(input_embs, [1, 0, 2])

            if p.packed_input:
                position_embs = self.position_emb.FPropWithPosition(
                    theta.position_emb, src_segment_pos)
            else:
                position_embs = self.position_emb.FProp(
                    theta.position_emb, max_time)
                position_embs = tf.reshape(
                    position_embs, [1, max_time, p.token_emb.embedding_dim])
            input_embs += position_embs
            if p.task_emb:
                input_embs += self.task_emb.EmbLookup(theta.task_emb,
                                                      input_batch.task_ids)

            if p.model_dim != p.token_emb.embedding_dim:
                input_embs = self.emb_proj.FProp(theta.emb_proj, input_embs)

            paddings = tf.transpose(paddings)
            if p.packed_input:
                src_segment_id = tf.transpose(src_segment_id)
            input_embs = self.input_dropout.FProp(theta.input_dropout,
                                                  input_embs)

            # [time, batch, dim]
            transformer_input = tf.transpose(input_embs, [1, 0, 2])

        if not p.is_eval and p.apply_source_mask:
            # Augment padding for masked source word positions.
            dtype = paddings.dtype
            source_mask = tf.where(tf.equal(input_ids, p.source_mask_id),
                                   tf.ones_like(input_ids, dtype=dtype),
                                   tf.zeros_like(input_ids, dtype=dtype))
            # Make sure padding is between 0 and 1.
            paddings = tf.clip_by_value(paddings + tf.transpose(source_mask),
                                        0.0, 1.0)

        encoded, padding, segment_id = self.transformer_stack.FProp(
            theta.transformer_stack, transformer_input, paddings,
            src_segment_id)
        return py_utils.NestedMap(encoded=encoded,
                                  padding=padding,
                                  segment_id=segment_id,
                                  embedded_inputs=orig_input_embs)
示例#24
0
  def _resource_apply_dense(self, grad, var):
    if grad is None:
      tf.logging.warning('Gradient is None for variable %s' % var.name)
      return []

    grad_dtype = var.dtype  # TODO(lepikhin): add to params
    grad = tf.cast(grad, grad_dtype)
    factored_dims = self._factored_dims(var.shape.as_list())
    if factored_dims:
      vr = self.get_slot(var, 'vr')
      vc = self.get_slot(var, 'vc')
    else:
      v = self.get_slot(var, 'v')
    if self._beta1:
      m = self.get_slot(var, 'm')

    cond = tf.constant(True)

    def _Upd(c, x):
      if not self._cond_is_finite:
        return c
      c = tf.math.logical_and(c, tf.reduce_all(tf.math.is_finite(x)))
      c = tf.math.logical_and(
          c, tf.reduce_all(tf.math.logical_not(tf.math.is_inf(x))))
      return c

    def _Wrap(fn, x, y):
      if not self._cond_is_finite:
        return fn(x, y)
      return tf.cond(cond, lambda: fn(x, y), lambda: x)

    with tf.variable_scope(var.name[:-2] + '/Adafactor'):
      grad_squared = tf.math.square(grad) + tf.cast(self._epsilon1, grad_dtype)
      cond = _Upd(cond, grad_squared)
      decay_rate = tf.cast(self._decay_rate, var.dtype)
      old_val = tf.identity(var)  # TODO(lepikhin): introduce gradient dtype
      if self._multiply_by_parameter_scale:
        update_scale = self._parameter_scale(old_val) * tf.cast(
            self._learning_rate, grad_dtype)
      else:
        update_scale = self._learning_rate
      mixing_rate = tf.cast(1.0 - decay_rate, grad_dtype)
      update_scale = tf.cast(update_scale, grad_dtype)
      updates = []
      if factored_dims:
        d0, d1 = factored_dims
        vr_axis, vc_axis = d0, d1
        grad_squared_row_mean = tf.reduce_mean(grad_squared, axis=vr_axis)
        grad_squared_col_mean = tf.reduce_mean(grad_squared, axis=vc_axis)
        # new_vr = (decay_rate * vr + mixing_rate * grad_squared_row_mean)
        new_vr = vr * decay_rate + grad_squared_row_mean * mixing_rate
        # new_vc = (decay_rate * vc + mixing_rate * grad_squared_col_mean)
        new_vc = vc * decay_rate + grad_squared_col_mean * mixing_rate
        cond = _Upd(cond, new_vr)
        cond = _Upd(cond, new_vc)
        vr_update = _Wrap(tf.assign, vr, new_vr)
        vc_update = _Wrap(tf.assign, vc, new_vc)
        updates.extend([vr_update, vc_update])
        long_term_mean = tf.reduce_mean(new_vr, -1, keepdims=True)
        r_factor = tf.math.rsqrt(new_vr / long_term_mean)
        c_factor = tf.math.rsqrt(new_vc)
        x = grad * tf.expand_dims(r_factor, vr_axis) * tf.expand_dims(
            c_factor, vc_axis)
      else:
        new_v = v * decay_rate + grad_squared * mixing_rate
        cond = _Upd(cond, new_v)
        v_update = _Wrap(tf.assign, v, new_v)
        updates.append(v_update)
        x = grad * tf.math.rsqrt(new_v)
      if self._clipping_threshold is not None:
        clipping_denom = tf.maximum(
            tf.constant(1.0, grad_dtype),
            _ReduceRms(x) / tf.constant(self._clipping_threshold, grad_dtype))
        x /= clipping_denom
      subtrahend = x * update_scale
      if self._beta1:
        new_m = (
            m * tf.constant(self._beta1, dtype=grad_dtype) +
            subtrahend * tf.constant(1.0 - self._beta1, dtype=grad_dtype))
        subtrahend = new_m
        cond = _Upd(cond, new_m)
        updates.append(_Wrap(tf.assign, m, new_m))
      # It is critical to use assign_sub instead of tf.assign(var - subtrahend)
      #  for the case of bfloat16 activations, so as to avoid repeatedly
      #  rounding the slice value, which results in poor quality.
      cond = _Upd(cond, subtrahend)
      var_update = _Wrap(tf.assign_sub, var, subtrahend)
      updates.append(var_update)
      return tf.group(*updates)
示例#25
0
    def _testDecoderFPropGradientCheckerHelper(self, func_inline=False):
        config = tf.config_pb2.ConfigProto(graph_options=tf.GraphOptions(
            optimizer_options=tf.OptimizerOptions(
                do_function_inlining=func_inline)))
        with self.session(use_gpu=False, config=config) as sess:
            tf.random.set_seed(8372749040)
            np.random.seed(274854)
            vn_config = py_utils.VariationalNoiseParams(None, False, False)
            p = self._DecoderParams(vn_config)
            p.dtype = tf.float64

            dec = p.Instantiate()
            src_seq_len = 5
            src_enc = tf.constant(np.random.uniform(size=(src_seq_len, 2, 8)),
                                  tf.float64)
            src_enc_padding = tf.constant(
                [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]],
                dtype=tf.float64)
            encoder_outputs = py_utils.NestedMap(encoded=src_enc,
                                                 padding=src_enc_padding)
            target_ids = tf.transpose(
                tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 15],
                             [5, 6, 7, 8], [10, 5, 2, 5]],
                            dtype=tf.int32))
            target_labels = tf.transpose(
                tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 13],
                             [5, 7, 8, 10], [10, 5, 2, 4]],
                            dtype=tf.int32))
            target_paddings = tf.transpose(
                tf.constant([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0],
                             [0, 1, 0, 0], [1, 1, 1, 1]],
                            dtype=tf.float64))
            target_transcripts = tf.constant(
                ['abcd', 'bcde', 'klmp', 'fghi', 'kfcf'])
            target_weights = 1.0 - target_paddings

            targets = py_utils.NestedMap({
                'ids': target_ids,
                'labels': target_labels,
                'weights': target_weights,
                'paddings': target_paddings,
                'transcripts': target_transcripts,
            })
            metrics = dec.FPropDefaultTheta(encoder_outputs, targets).metrics
            loss = metrics['loss'][0]
            all_vars = tf.trainable_variables()
            grads = tf.gradients(loss, all_vars)

            def DenseGrad(var, grad):
                if isinstance(grad, tf.Tensor):
                    return grad
                elif isinstance(grad, tf.IndexedSlices):
                    return tf.math.unsorted_segment_sum(
                        grad.values, grad.indices,
                        tf.shape(var)[0])

            dense_grads = [DenseGrad(x, y) for (x, y) in zip(all_vars, grads)]

            self.evaluate(tf.global_variables_initializer())

            test_utils.CompareToGoldenSingleFloat(self, 3.458078, loss.eval())
            # Second run to make sure the function is determistic.
            test_utils.CompareToGoldenSingleFloat(self, 3.458078, loss.eval())

            symbolic_grads = [x.eval() for x in dense_grads if x is not None]
            numerical_grads = []
            for v in all_vars:
                numerical_grads.append(
                    test_utils.ComputeNumericGradient(sess, loss, v))

            for x, y in zip(symbolic_grads, numerical_grads):
                self.assertAllClose(x, y)
示例#26
0
 def _InputBatch(self):
     return py_utils.NestedMap(
         inp=tf.constant(1.0, shape=[128, 3], dtype=tf.float32))
示例#27
0
    def _InputBatch(self):
        np.random.seed(1)
        bs, sl = 10, 7
        src_ids = tf.constant(
            np.random.randint(low=0,
                              high=8192 - 1,
                              size=[bs, sl],
                              dtype=np.int32))
        tgt_ids = tf.constant(
            np.random.randint(low=0,
                              high=8192 - 1,
                              size=[bs, sl],
                              dtype=np.int32))
        tgt_labels = tf.constant(
            np.random.randint(low=0,
                              high=8192 - 1,
                              size=[bs, sl],
                              dtype=np.int32))
        tgt_weights = tf.constant(np.ones(shape=[bs, sl], dtype=np.float32))

        src_paddings = tf.zeros([bs, sl])
        tgt_paddings = tf.zeros([bs, sl])

        ret = py_utils.NestedMap()
        ret.src = py_utils.NestedMap()
        ret.tgt = py_utils.NestedMap()

        if self.params.split:
            src_ids = tf.split(src_ids, 2, 0)
            src_paddings = tf.split(src_paddings, 2, 0)
            tgt_ids = tf.split(tgt_ids, 2, 0)
            tgt_labels = tf.split(tgt_labels, 2, 0)
            tgt_paddings = tf.split(tgt_paddings, 2, 0)
            tgt_weights = tf.split(tgt_weights, 2, 0)

            ret.src.ids = tf.cond(
                tf.equal(tf.mod(py_utils.GetGlobalStep(), 2), 0),
                lambda: src_ids[0], lambda: src_ids[1])
            ret.src.paddings = tf.cond(
                tf.equal(tf.mod(py_utils.GetGlobalStep(), 2), 0),
                lambda: src_paddings[0], lambda: src_paddings[1])
            ret.tgt.ids = tf.cond(
                tf.equal(tf.mod(py_utils.GetGlobalStep(), 2), 0),
                lambda: tgt_ids[0], lambda: tgt_ids[1])
            ret.tgt.labels = tf.cond(
                tf.equal(tf.mod(py_utils.GetGlobalStep(), 2), 0),
                lambda: tgt_labels[0], lambda: tgt_labels[1])
            ret.tgt.paddings = tf.cond(
                tf.equal(tf.mod(py_utils.GetGlobalStep(), 2), 0),
                lambda: tgt_paddings[0], lambda: tgt_paddings[1])
            ret.tgt.weights = tf.cond(
                tf.equal(tf.mod(py_utils.GetGlobalStep(), 2), 0),
                lambda: tgt_weights[0], lambda: tgt_weights[1])
        else:
            ret.src.ids = src_ids
            ret.src.paddings = src_paddings
            ret.tgt.ids = tgt_ids
            ret.tgt.labels = tgt_labels
            ret.tgt.paddings = tgt_paddings
            ret.tgt.weights = tgt_weights

        return ret
示例#28
0
 def _InputBatch(self):
     return [
         py_utils.NestedMap(
             inp=tf.constant(1.0, shape=[16, 3], dtype=tf.float32))
         for _ in range(8)
     ]
    def testOrientedNMSIndices(self):
        utils_3d = detection_3d_lib.Utils3D()

        # Assignments and IoU scores calculated offline.
        bboxes_data = tf.constant(
            [[
                [10.35, 8.429, -1.003, 3.7, 1.64, 1.49, 1.582],
                [10.35, 8.429, -1.003, 3.7, 1.64, 1.49, 0.0],  # box 0 rotated
                [11.5, 8.429, -1.003, 3.7, 1.64, 1.49, 1.0
                 ],  # Rotated to overlap
                [13.01, 8.149, -0.953, 4.02, 1.55, 1.52, 1.592],
                [13.51, 8.39, -1.0, 4.02, 1.55, 1.52, 1.592
                 ],  # Slight translation
                [13.51, 8.39, -1.0, 1.0, 1.0, 1.52, 1.592],  # Smaller box
                [13.51, 8.39, -1.0, 1.0, 1.0, 1.52, 1.9],  # Smaller box
            ]],
            dtype=tf.float32)

        # Notes on the data:
        # Lets say we have 3 classes and a thresh of 0.1
        # Keep box [0, 3] for class 0
        # Keep box [6] only for class 1
        # Keep box [2] for class 2
        scores_data = tf.constant([[
            [0.9, 0.1, 0.0],
            [0.89, 0.1, 0.01],
            [0.5, 0.01, 0.49],
            [0.8, 0.1, 0.1],
            [0.79, 0.11, 0.2],
            [0.2, 0.8, 0.1],
            [0.1, 0.9, 0.0],
        ]],
                                  dtype=tf.float32)

        with self.session() as sess:
            outputs = utils_3d.BatchedOrientedNMSIndices(bboxes_data,
                                                         scores_data,
                                                         nms_iou_threshold=0.1,
                                                         score_threshold=0.3,
                                                         max_boxes_per_class=5)
            indices, scores, valid_mask = sess.run(outputs)

            class_masks = [
                valid_mask[0, cls_idx, :].astype(np.bool)
                for cls_idx in range(3)
            ]
            # Check the correct number of valid results per class
            self.assertEqual(class_masks[0].sum(), 2)
            self.assertEqual(class_masks[1].sum(), 1)
            self.assertEqual(class_masks[2].sum(), 1)

            # Check the results for each class
            self.assertAllEqual(indices[0, 0, class_masks[0]], [0, 3])
            self.assertAllClose(scores[0, 0, class_masks[0]], [0.9, 0.8])

            self.assertAllEqual(indices[0, 1, class_masks[1]], [6])
            self.assertAllClose(scores[0, 1, class_masks[1]], [0.9])

            self.assertAllEqual(indices[0, 2, class_masks[2]], [2])
            self.assertAllClose(scores[0, 2, class_masks[2]], [0.49])

            # Use a list of score thresholds instead
            outputs = utils_3d.BatchedOrientedNMSIndices(
                bboxes_data,
                scores_data,
                nms_iou_threshold=[0.1, 0.1, 0.1],
                score_threshold=[0.899, 0.5, 0.3],
                max_boxes_per_class=5)
            indices, scores, valid_mask = sess.run(outputs)

            class_masks = [
                valid_mask[0, cls_idx, :].astype(np.bool)
                for cls_idx in range(3)
            ]
            # Check the correct number of valid results per class
            self.assertEqual(class_masks[0].sum(), 1)
            self.assertEqual(class_masks[1].sum(), 1)
            self.assertEqual(class_masks[2].sum(), 1)

            # Check the results for each class
            self.assertAllEqual(indices[0, 0, class_masks[0]], [0])
            self.assertAllClose(scores[0, 0, class_masks[0]], [0.9])

            self.assertAllEqual(indices[0, 1, class_masks[1]], [6])
            self.assertAllClose(scores[0, 1, class_masks[1]], [0.9])

            self.assertAllEqual(indices[0, 2, class_masks[2]], [2])
            self.assertAllClose(scores[0, 2, class_masks[2]], [0.49])
示例#30
0
    def ScaleGradients(self, var_grads, gradient_adjuster=None):
        """Scales gradients according to training params.

    Args:
      var_grads: a `.NestedMap` whose values are (var, grad) pairs.
      gradient_adjuster: if not None, a function that mutates a given var_grads.

    Returns:
      A `.NestedMap` containing

      - final_var_grads: a `.NestedMap` whose values are (var, grad) pairs,
        where gradients have already been scaled.
      - grad_scale: the gradient scale. 0 if gradient updates should be skipped
        for the step. (Optional, only returned in case global norm clipping is
        used.)
    """
        p = self.params

        # Computes gradients' norm and adds their summaries. Note that all_grad_norm
        # may be nan, which may cause grad_scale to be nan.
        for name, vg in var_grads.FlattenItems():
            summary_utils.AddNormSummary(
                py_utils.SanitizeScopeKey(name) + '/' + p.name, vg)
        flatten = py_utils.Flatten(var_grads)
        all_grad_norm = tf.sqrt(py_utils.SumSquared([g for (_, g) in flatten]))
        all_var_norm = tf.sqrt(py_utils.SumSquared([v for (v, _) in flatten]))
        grad_norm_is_nan_or_inf = tf.math.logical_or(
            tf.math.is_nan(all_grad_norm), tf.math.is_inf(all_grad_norm))
        self._AddEvalMetric('grad_norm_is_nan_or_inf', grad_norm_is_nan_or_inf,
                            tf.constant(1.0))

        # Optional gradient adjustment. Note that this happens after computing
        # all_grad_norm.
        if gradient_adjuster is not None:
            tf.logging.info('gradient_adjuster=%s', gradient_adjuster)
            var_grads = gradient_adjuster(var_grads)

        # Handles NaN/Inf gradients.
        has_nan_or_inf = py_utils.HasNanOrInfGradient(var_grads)
        # Grad norm can still be inf even if none of the individual grad is inf.
        has_nan_or_inf = tf.math.logical_or(has_nan_or_inf,
                                            grad_norm_is_nan_or_inf)
        self._AddEvalMetric('has_nan_or_inf', has_nan_or_inf, tf.constant(1.0))

        return_values = py_utils.NestedMap()
        if p.clip_gradient_single_norm_to_value:
            # Currently using both types of clipping simultaneously is unsupported.
            if p.clip_gradient_norm_to_value:
                raise ValueError(
                    'Cannot use clip_gradient_single_norm_to_value=%f and '
                    'clip_gradient_norm_to_value=%f.' %
                    (p.clip_gradient_single_norm_to_value,
                     p.clip_gradient_norm_to_value))
            final_var_grads = py_utils.ApplyGradNormClipping(
                var_grads, p.clip_gradient_single_norm_to_value)

        else:
            grad_scale = self._GetGlobalGradScale(all_grad_norm,
                                                  has_nan_or_inf)
            # grad_norm/all is both a eval metric(collected by trainer) and a summary
            # (collected by controller).
            summary_utils.scalar(f'grad_norm/all/{p.name}', all_grad_norm)
            self._AddEvalMetric('grad_norm/all', all_grad_norm,
                                tf.constant(1.0))
            self._AddEvalMetric('var_norm/all', all_var_norm, tf.constant(1.0))
            self._AddEvalMetric('grad_scale_all', grad_scale, tf.constant(1.0))
            final_var_grads = py_utils.ApplyGradMultiplier(
                var_grads, grad_scale)
            return_values.grad_scale = grad_scale

        return_values.final_var_grads = final_var_grads
        return return_values