示例#1
0
    def _True(anchor, bboxes):
        """True branch when num of bboxes is non-zero."""
        n = tf.shape(bboxes)[0]
        centroid = BBoxesCentroid(bboxes)

        # Computed dot products between centroid and the anchor point.
        dot = tf.squeeze(tf.matmul(centroid, tf.expand_dims(anchor, 1)),
                         axis=1)

        # Normalize dot to get the cosine of the angles.
        norm = tf.norm(anchor) * tf.norm(centroid, axis=1)
        cosine = tf.where(tf.greater(norm, 0), dot / norm,
                          tf.zeros([n], norm.dtype))

        # Disambiguates the angle anchor--O--point is positive or negative by the
        # sign of cross products between angle and points.  tf.linalg.cross takes
        # 3-vector (x, y, z), so we set z to 0.  tf.linalg.cross does not support
        # broadcasting, so we tile anchor to shape [n, 3].
        cross = tf.linalg.cross(
            tf.tile(tf.pad(tf.expand_dims(anchor, 0), [[0, 0], [0, 1]]),
                    [n, 1]), tf.pad(centroid, [[0, 0], [0, 1]]))

        # If the sign is positive, the points lie on the clockwise side of
        # O-->anchor. Hence, -1 - cosine moves the cosine values to [-2, 0].  If the
        # sign is negative, the points lie on the counter-clockwise side of
        # O-->anchor. 1 + cosine moves the cosine values to [0, 2].
        #
        # The car dataset shows that the points are scanned in the counter-clockwise
        # fashion. Therefore, top-k orders the points in the same order in which
        # bboxes appears in the spin.
        score = tf.where(tf.greater(cross, 0)[:, 2], -1 - cosine, 1 + cosine)

        _, indices = tf.nn.top_k(score, n, sorted=True)
        return indices
示例#2
0
    def _finish(self, update_ops, name_scope):
        with tf.control_dependencies(update_ops):
            ops1 = self.magnitude_optimizer._finish([], name_scope + "_m")  # pylint: disable=protected-access
            ops2 = self.direction_optimizer._finish([], name_scope + "_d")  # pylint: disable=protected-access

            if self.use_global_norm:  # apply global grafting
                with tf.control_dependencies([ops1, ops2]):
                    m_global_norm = tf.Variable(0.)
                    d_global_norm = tf.Variable(0.)
                    for var in self._variables:
                        m_step_norm = self.get_slot(var, "m_step_norm")
                        d_step_norm = self.get_slot(var, "d_step_norm")
                        tf.assign_add(m_global_norm, m_step_norm**2)
                        tf.assign_add(d_global_norm, d_step_norm**2)

                    multiplier = tf.sqrt(m_global_norm /
                                         tf.maximum(d_global_norm, 1e-30))

                    step_ops = []
                    for var in self._variables:
                        d_step = self.get_slot(var, "scratch_copy")
                        step = tf.where(tf.greater(d_step_norm, 0),
                                        multiplier * d_step,
                                        tf.zeros_like(d_step))
                        step_op = tf.assign_add(
                            var, self._learning_rate_tensor * step)
                        step_ops.append(step_op)
                    return tf.group(*step_ops, name=name_scope)

        return tf.group(*([ops1, ops2] + update_ops), name=name_scope)
示例#3
0
  def _internal_apply_dense(self, grad, var, magnitude_optimizer_apply_fn,
                            direction_optimizer_apply_fn):  # pylint: disable=g-doc-args
    """Main optimization logic of AdaGraft, which calls the child optimizers.

    Args:
      grad: Tensor containing gradients.
      var: Tensor containing parameter values.
      magnitude_optimizer_apply_fn: Apply magnitude optimizer.
      direction_optimizer_apply_fn: Apply direction optimizer.

    Returns:
      The final update op, which increments var by the grafted step.

    Pseudocode:
    - Copy weights into scratch space 'scratch_copy'.
    - Run magnitude_optimizer in-place.
    - Use scratch copy to figure out how far we moved ('magnitude_step').
    - Copy weights back.
    - Run direction_optimizer in-place.
    - Move weights along the line segment with scratch_copy.
    """

    if self.use_global_norm:
      self._variables.append(var)

    # Slot with current parameter values
    scratch_slot = self.get_slot(var, "scratch_copy")
    old_var = tf.assign(scratch_slot, var)

    with tf.control_dependencies([old_var]):
      m_updated_var = magnitude_optimizer_apply_fn(grad, var)  # pylint: disable=protected-access

    # Run magnitude optimizer and compute the norm of the update.
    with tf.control_dependencies([m_updated_var]):
      m_step = var - old_var
      m_step_norm = tf.norm(m_step)
      if self.diagnostic or self.use_global_norm:
        m_step_norm = tf.assign(self.get_slot(var, "m_step_norm"), m_step_norm)

    # Run direction optimizer and compute its norm, and the direction.
    with tf.control_dependencies([m_step_norm]):
      flushed_var = tf.assign(var, old_var)
    with tf.control_dependencies([flushed_var]):
      d_updated_var = direction_optimizer_apply_fn(grad, var)  # pylint: disable=protected-access

    # Run an update of the direction optimizer with magnitude optimizer norm.
    with tf.control_dependencies([d_updated_var]):
      d_step = var - old_var
      d_step_norm = tf.norm(d_step)
      if self.diagnostic or self.use_global_norm:
        d_step_norm = tf.assign(self.get_slot(var, "d_step_norm"), d_step_norm)
      if self.use_global_norm:
        flushed_var = tf.assign(var, old_var)
        with tf.control_dependencies([d_step_norm, flushed_var]):
          return tf.assign(scratch_slot, d_step)
      step = tf.where(
          tf.greater(d_step_norm, 0),
          (m_step_norm / tf.maximum(d_step_norm, 1e-30)) * d_step,
          tf.zeros_like(d_step))
      return tf.assign(var, old_var + self._learning_rate_tensor * step)
示例#4
0
 def KeyFunc(batch):
     key = tf.reduce_min(batch.bucket_keys)
     idx = tf.reduce_sum(
         tf.cast(tf.greater(key, p.bucket_upper_bound),
                 tf.int32))
     return tf.constant(p.bucket_upper_bound,
                        dtype=tf.int64)[idx]
示例#5
0
    def _preconditioned_update(self, var, partitioned_grads,
                               diagonal_grad_update):
        """Computes the matrix preconditioned update.

    Args:
      var: Variable for which we are computing the preconditioned gradient.
      partitioned_grads: Partitioned gradients.
      diagonal_grad_update: Update as given by diagonal adagrad.

    Returns:
      scaled preconditioned gradient.
    """
        def _l2_norm(v):
            return tf.sqrt(tf.reduce_sum(tf.square(v)))

        precond_grad = self._compute_preconditioned_raw_grad(
            var, partitioned_grads)
        if self._momentum > 0.0:
            gbar = self.get_slot(var, "precond_grad_momentum")
            matrix_preconditioned_grad = state_ops.assign(
                gbar, gbar * self._momentum_tensor + precond_grad *
                (1.0 - self._momentum_tensor))
        else:
            matrix_preconditioned_grad = precond_grad

        # We use the direction from Shampoo while using the step size scale from
        # diagonal AdaGrad.
        precond_l2_norm = _l2_norm(matrix_preconditioned_grad)
        diagonal_l2_norm = _l2_norm(diagonal_grad_update)
        multiplier = tf.where(
            tf.greater(precond_l2_norm, 0.0),
            tf.maximum(diagonal_l2_norm, 1e-30) /
            (tf.maximum(precond_l2_norm, 1e-30)), 1.0)
        return matrix_preconditioned_grad * multiplier
示例#6
0
    def _inverse_pth_root_graph(self, epsilon):
        graph = tf.Graph()
        with graph.as_default():
            exponent_t = tf.reshape(
                tf.placeholder(dtype=tf.float32, name="exponent", shape=None),
                [])
            # Apply exponent multiplier.
            exponent_t = exponent_t * self._exponent_multiplier
            input_t = tf.placeholder(dtype=tf.float32,
                                     name="input",
                                     shape=None)
            # For p = 2, 4 or 8, we use the iterative Newton-Schur method for
            # computing the inverse-pth root.
            either_p_2_4_8 = tf.math.logical_or(
                tf.math.logical_or(tf.equal(-1.0 / exponent_t, 2),
                                   tf.equal(-1.0 / exponent_t, 4)),
                tf.equal(-1.0 / exponent_t, 8))
            # 4096 is the larger dimension SVD is tractable for.
            greater_than_4096 = tf.greater(tf.shape(input_t)[0], 4096)
            run_specialized_iterative_method = tf.math.logical_and(
                greater_than_4096, either_p_2_4_8)
            specialized_fn = functools.partial(
                self._specialized_inverse_pth_root, input_t, exponent_t,
                epsilon)
            generalized_fn = functools.partial(
                self._generalized_inverse_pth_root, input_t, exponent_t,
                epsilon)
            output, diff = tf.cond(run_specialized_iterative_method,
                                   specialized_fn, generalized_fn)

            tf.identity(output, "output")
            tf.identity(tf.cast(diff, tf.float32), "diff")
        return graph.as_graph_def().SerializeToString()
示例#7
0
def ReorderIndicesByPhi(anchor, bboxes):
    """Sort bboxes based their angles relative to the anchor point.

  Args:
    anchor: A vector of (x0, y0).
    bboxes: A matrix of shape [N, 4].

  Returns:
    A permutation of tf.range(n) which can be used to reshuffle bboxes to the
    sorted order. (e.g., tf.gather(bboxes, indices)).
  """
    @tf.Defun(anchor.dtype, bboxes.dtype)
    def _True(anchor, bboxes):
        """True branch when num of bboxes is non-zero."""
        n = tf.shape(bboxes)[0]
        centroid = BBoxesCentroid(bboxes)

        # Computed dot products between centroid and the anchor point.
        dot = tf.squeeze(tf.matmul(centroid, tf.expand_dims(anchor, 1)),
                         axis=1)

        # Normalize dot to get the cosine of the angles.
        norm = tf.norm(anchor) * tf.norm(centroid, axis=1)
        cosine = tf.where(tf.greater(norm, 0), dot / norm,
                          tf.zeros([n], norm.dtype))

        # Disambiguates the angle anchor--O--point is positive or negative by the
        # sign of cross products between angle and points.  tf.linalg.cross takes
        # 3-vector (x, y, z), so we set z to 0.  tf.linalg.cross does not support
        # broadcasting, so we tile anchor to shape [n, 3].
        cross = tf.linalg.cross(
            tf.tile(tf.pad(tf.expand_dims(anchor, 0), [[0, 0], [0, 1]]),
                    [n, 1]), tf.pad(centroid, [[0, 0], [0, 1]]))

        # If the sign is positive, the points lie on the clockwise side of
        # O-->anchor. Hence, -1 - cosine moves the cosine values to [-2, 0].  If the
        # sign is negative, the points lie on the counter-clockwise side of
        # O-->anchor. 1 + cosine moves the cosine values to [0, 2].
        #
        # The car dataset shows that the points are scanned in the counter-clockwise
        # fashion. Therefore, top-k orders the points in the same order in which
        # bboxes appears in the spin.
        score = tf.where(tf.greater(cross, 0)[:, 2], -1 - cosine, 1 + cosine)

        _, indices = tf.nn.top_k(score, n, sorted=True)
        return indices

    @tf.Defun(anchor.dtype, bboxes.dtype)
    def _False(anchor, bboxes):
        del anchor, bboxes
        return tf.zeros([0], dtype=tf.int32)

    n = tf.shape(bboxes)[0]
    return functional_ops.If(tf.greater(n, 0), [anchor, bboxes], _True,
                             _False)[0]
示例#8
0
 def PostTrainingStepUpdate(self, global_step):
     """Updates moving_mean, moving_variance after each training step."""
     p = self.params
     # Get sufficient stats that accumulates over microbatches.
     counts = self.accumulators.counts.GetValue()
     mean_ss = self.accumulators.mean_ss.GetValue()
     variance_ss = self.accumulators.variance_ss.GetValue()
     # Compute batch mean and batch variance from sufficient stats
     mean, variance = tf.nn.normalize_moments(counts, mean_ss, variance_ss,
                                              None)
     decay = tf.convert_to_tensor(1.0 - p.decay, p.dtype)
     # Update moving_mean, moving_variance from  batch mean and batch variance.
     with tf.name_scope(p.name) as scope:
         with tf.colocate_with(self.vars.moving_mean):
             mean_update = tf.assign_sub(
                 self.vars.moving_mean,
                 tf.where(tf.greater(counts, 0.5),
                          (self.vars.moving_mean - tf.cast(mean, p.dtype)) *
                          decay, tf.zeros_like(self.vars.moving_mean)),
                 name='moving_mean_update')
         with tf.colocate_with(self.vars.moving_variance):
             var_update = tf.assign_sub(
                 self.vars.moving_variance,
                 tf.where(tf.greater(counts, 0.5),
                          (self.vars.moving_variance -
                           tf.cast(variance, p.dtype)) * decay,
                          tf.zeros_like(self.vars.moving_variance)),
                 name='moving_variance_update')
         py_utils.CheckNumerics(
             self.vars.moving_mean,
             'moving mean of {} failed numeric check'.format(scope))
         py_utils.CheckNumerics(
             self.vars.moving_variance,
             'moving variance of {} failed numeric check'.format(scope))
     self.accumulators.counts.Reset()
     self.accumulators.mean_ss.Reset()
     self.accumulators.variance_ss.Reset()
     return tf.group(mean_update, var_update)
示例#9
0
def IsWithinBBox(points, bbox):
    """Checks if points are within a 2-d bbox.

  The function returns true if points are strictly inside the box. It also
  returns true when the points are exactly on the box edges.

  Args:
    points: a float Tensor of shape [..., 2] of points to be tested. The last
      coordinates are (x, y).
    bbox: a float Tensor of shape [..., 4, 2] of bboxes. The last coordinates
      are the four corners of the bbox and (x, y). The corners are assumed to be
      given in counter-clockwise order.

  Returns:
    Tensor: If ``pshape = tf.shape(points)[:-1]`` and
    ``bshape = tf.shape(bbox)[:-2]``, returns a boolean tensor of shape
    ``tf.concat(pshape, bshape)``, where each element is true if the point is
    inside to the corresponding box.  If a point falls exactly on an edge of the
    bbox, it is also true.
  """
    bshape = py_utils.GetShape(bbox)[:-2]
    pshape = py_utils.GetShape(points)[:-1]
    bbox = py_utils.HasShape(bbox, tf.concat([bshape, [4, 2]], axis=0))
    points = py_utils.HasShape(points, tf.concat([pshape, [2]], axis=0))
    # Enumerate all 4 edges:
    v1, v2, v3, v4 = (bbox[..., 0, :], bbox[..., 1, :], bbox[...,
                                                             2, :], bbox[...,
                                                                         3, :])
    v1v2v3_check = tf.reduce_all(_IsCounterClockwiseDirection(v1, v2, v3))
    v2v3v4_check = tf.reduce_all(_IsCounterClockwiseDirection(v2, v3, v4))
    v4v1v2_check = tf.reduce_all(_IsCounterClockwiseDirection(v4, v1, v2))
    v3v4v1_check = tf.reduce_all(_IsCounterClockwiseDirection(v3, v4, v1))
    with tf.control_dependencies([
            py_utils.Assert(v1v2v3_check, [v1, v2, v3]),
            py_utils.Assert(v2v3v4_check, [v3, v3, v4]),
            py_utils.Assert(v4v1v2_check, [v4, v1, v2]),
            py_utils.Assert(v3v4v1_check, [v3, v4, v1])
    ]):
        is_inside = tf.math.logical_and(
            tf.math.logical_and(_IsOnLeftHandSideOrOn(points, v1, v2),
                                _IsOnLeftHandSideOrOn(points, v2, v3)),
            tf.math.logical_and(_IsOnLeftHandSideOrOn(points, v3, v4),
                                _IsOnLeftHandSideOrOn(points, v4, v1)))
    has_non_zero_area = tf.greater(_BBoxArea(bbox), 0)
    is_inside = tf.logical_and(tf.cast(is_inside, tf.bool), has_non_zero_area)
    # Swap the last two dimensions.
    is_inside = tf.einsum('...ij->...ji', tf.cast(is_inside, tf.int32))
    return tf.cast(is_inside, tf.bool)
示例#10
0
    def _BodyFn(curr_idx, distance_to_selected, sampled_idx, closest_idx):
        """Loop body for farthest point sampler."""
        def _GetRandomRealPoint():
            """Select the first point.

      For the first point, we want any random real (non padded) point, so we
      create a random values per point, and then set all padded ones to
      some large value (more than the maxval). We then take the min per batch
      element to get the first points.

      Returns:
        Tensor containing the index of a random point selected for each example
        in the batch.
      """
            random_values = tf.random.uniform((batch_size, num_points),
                                              minval=0,
                                              maxval=1,
                                              dtype=tf.float32,
                                              seed=random_seed)
            random_values = tf.where(tf.equal(padding, 0.0), random_values,
                                     padding * 10)
            return tf.argmin(random_values, axis=1, output_type=tf.int32)

        def _GetFurthestPoint():
            """Get point that is furthest from those already selected.

      We also bias the sampling towards real points by setting the distance
      to padded points negative until we are out of real points.

      Returns:
        Tensor containing the index of the next farthest point selected for each
        example in the batch.
      """
            # Set padded points distance to negative so they aren't selected.
            padding_masked_distance_to_selected = tf.where(
                tf.equal(padding, 0.0), distance_to_selected, -1.0 * tf.ones(
                    (batch_size, num_points), dtype=tf.float32))
            # But only do this when we still have valid points left.
            padding_masked_distance_to_selected = tf.where(
                tf.less(curr_idx, num_valid_points),
                padding_masked_distance_to_selected, distance_to_selected)
            return tf.argmax(padding_masked_distance_to_selected,
                             axis=-1,
                             output_type=tf.int32)

        def _GetSeededPoint():
            """Select a seeded point.

      Seeded points are assumed to be at the beginning of the original points.

      Returns:
        Tensor containing the index of the next seeded point to select for each
        example in the batch.
      """
            return tf.ones((batch_size, ), dtype=tf.int32) * curr_idx

        # Select indices for this loop iteration.
        def _Seeded():
            return tf.cond(tf.less(curr_idx, num_seeded_points),
                           _GetSeededPoint, _GetFurthestPoint)

        def _Real():
            return tf.cond(tf.equal(curr_idx, 0), _GetRandomRealPoint,
                           _GetFurthestPoint)

        new_selected = tf.cond(tf.greater(num_seeded_points, 0), _Seeded,
                               _Real)
        sampled_idx = sampled_idx.write(curr_idx, new_selected)

        # Extract the distance to the latest point selected to update
        # distance_to_selected.
        new_selected_gather_idx = tf.stack(
            [tf.range(batch_size), new_selected], axis=1)
        if precomputed_squared_distance is not None:
            new_distance = tf.gather_nd(precomputed_squared_distance,
                                        new_selected_gather_idx)
        else:
            new_points = tf.reshape(
                tf.gather_nd(points, new_selected_gather_idx),
                [batch_size, 1, dims])
            new_distance = tf.reshape(
                SquaredDistanceMatrix(points, new_points),
                [batch_size, num_points])

        is_newly_closest = tf.less(new_distance, distance_to_selected)
        distance_to_selected = tf.minimum(distance_to_selected, new_distance)

        # Track the index to the closest selected point.
        new_selected_tiled = tf.tile([[curr_idx]], [batch_size, num_points])
        closest_idx = tf.cond(
            tf.equal(curr_idx, 0),
            # At the first loop iteration, the init points are the closest.
            lambda: new_selected_tiled,
            # Otherwise, update with the new points based on the distances.
            lambda: tf.where(is_newly_closest, new_selected_tiled, closest_idx)
        )
        return curr_idx + 1, distance_to_selected, sampled_idx, closest_idx
示例#11
0
def NeighborhoodIndices(points,
                        query_points,
                        k,
                        points_padding=None,
                        max_distance=None,
                        sample_neighbors_uniformly=False):
    """Get indices to k-neighbors of query_points in points.

  Padding is returned along-side indices. Non-padded points are guaranteed to
  be unique (non-repeated) points from original non-padded points.

  Padded points arise due to either a lack of points (k exceeds the number
  of original non-padded points) or points are too far away (exceeds max
  distance).

  Note: Padded point indices may refer to padded points from the original, or
  may be duplicates of the closest point.

  TODO(weihan,jngiam): PointCNN implementation makes an assumption that padded
  points are repeated points from the original points. This behavior is
  maintained here, but we should update PointCNN to respect indices paddings.

  Args:
    points: tensor of shape [N, P1, dims].
    query_points: tensor of shape [N, P2, dims]
    k: Integer.
    points_padding: optional tensor of shape [N, P1] containing True/1.0 iff the
      point is a padded point. if None, then all points are considered real
      points.
    max_distance: float representing the maximum distance that each neighbor can
      be. If there are no points within the distance, then the closest point is
      returned (regardless of distance). If this is set to None, then no
      filtering by distance is performed.
    sample_neighbors_uniformly: boolean specifying whether to sample neighbors
      uniformly if they are within max distance.

  Returns:
    A pair of tensors:

    - indices: tensor of shape [N, P2, k].
    - padding: tensor of shape [N, P2, k] where 1 represents a padded point, and
      0 represents an unpadded (real) point.

  """
    n, p1 = py_utils.GetShape(points, 2)
    query_points = py_utils.HasShape(query_points, [n, -1, -1])
    _, p2 = py_utils.GetShape(query_points, 2)

    # Compute pair-wise squared distances.
    # Note that dist_mat contains the squared distance (without sqrt). Thus, when
    # using max_distance, we will need to square max_distance to make sure it's
    # in the same units.
    dist_mat = SquaredDistanceMatrix(query_points, points)
    dist_mat = py_utils.HasShape(dist_mat, [n, p2, p1])

    # Add a large scalar to the distances for padded points.
    # dist_mat[i, j, k] will be:
    #   if k < valid_num[i]: distance between points[i, k] and query_points[i, j]
    #   otherwise:           a large scalar added to dist_mat[i, j, k]
    if points_padding is not None:
        points_padding = tf.cast(tf.expand_dims(points_padding, 1), tf.float32)
        points_padding = py_utils.HasShape(points_padding, [n, 1, p1])
        large_scalar = tf.reduce_max(dist_mat) + 1
        dist_mat += points_padding * large_scalar

    # To perform sampling neighbors uniformly efficiently, we set all neighbors
    # that are within the distance threshold to have distances be drawn uniformly
    # at random. Using top_k with this enables selecting a random set quickly
    # without replacement.
    if sample_neighbors_uniformly:
        if max_distance is not None:
            mask_by_distance = tf.less_equal(dist_mat, max_distance**2)
            dist_mat = tf.where(
                mask_by_distance,
                tf.square(max_distance) *
                tf.random_uniform(tf.shape(dist_mat)), dist_mat)
        else:
            raise ValueError(
                'Uniform sampling requires specifying max_distance.')

    top_k_dist, indices = tf.nn.top_k(-dist_mat, k=k,
                                      sorted=True)  # N x P2 x K

    # Set padding using top_k_dist; padded points will have distance exceeding
    # the large_scalar.
    if points_padding is not None:
        paddings = tf.greater_equal(-top_k_dist, large_scalar)
    else:
        paddings = tf.zeros_like(top_k_dist, dtype=tf.bool)

    # Filter by max_distances by setting all indices that exceed the max_distance
    # to the closest point.
    if max_distance is not None:
        # Mask is true for points that are further than max_distance.
        mask_by_distance = tf.greater(-top_k_dist, tf.square(max_distance))
        closest_idx = tf.tile(indices[:, :, :1], [1, 1, k])
        indices = tf.where(mask_by_distance, closest_idx, indices)
        paddings |= mask_by_distance

    indices = tf.reshape(indices, [n, p2, k])
    paddings = tf.cast(paddings, tf.float32)

    return indices, paddings
示例#12
0
    def AssignAnchors(self,
                      anchor_bboxes,
                      gt_bboxes,
                      gt_bboxes_labels,
                      gt_bboxes_mask,
                      foreground_assignment_threshold=0.5,
                      background_assignment_threshold=0.35,
                      background_class_id=0,
                      force_match=True,
                      similarity_fn=None):
        """Assigns anchors to bboxes using a similarity function (SSD-based).

    Each anchor box is assigned to the top matching ground truth box.
    Ground truth boxes can be assigned to multiple anchor boxes.

    Assignments can result in 3 outcomes:

      - Positive assignment (if score >= foreground_assignment_threshold):
        assigned_gt_labels will reflect the assigned box label and
        assigned_cls_mask will be set to 1.0
      - Background assignment (if score <= background_assignment_threshold):
        assigned_gt_labels will be background_class_id and assigned_cls_mask
        will be set to 1.0
      - Ignore assignment (otherwise):
        assigned_gt_labels will be background_class_id and assigned_cls_mask
        will be set to 0.0

    The detection loss function would usually:

      - Use assigned_cls_mask for weighting the classification loss. The mask
        is set such that the loss applies to foreground and background
        assignments only - ignored anchors will be set to 0.
      - Use assigned_reg_mask for weighting the regression loss. The mask is set
        such that the loss applies to foreground assignments only.

    The thresholds (foreground_assignment_threshold and
    background_assignment_threshold) should be tuned per dataset.

    TODO(jngiam): Consider having a separate threshold for regression boxes; a
    separate threshold is used in PointRCNN.

    Args:
      anchor_bboxes: tf.float32. [A, 7], where [..., :] corresponds to box
        parameters (x, y, z, dx, dy, dz, r).
      gt_bboxes: tf.float32. [G, 7], where [..., :] corresponds to ground truth
        box parameters (x, y, z, dx, dy, dz, r).
      gt_bboxes_labels: tensor with shape [G]. Ground truth labels for each
        bounding box.
      gt_bboxes_mask: tensor with shape [G]. Mask for ground truth boxes, 1 iff
        the gt_bbox is a real bbox.
      foreground_assignment_threshold: Similarity score threshold for assigning
        foreground bounding boxes; scores need to be >=
        foreground_assignment_threshold to be assigned to foreground.
      background_assignment_threshold: Similarity score threshold for assigning
        background bounding boxes; scores need to be <=
        background_assignment_threshold to be assigned to background.
      background_class_id: class id to be assigned to anchors_gt_class if no
        anchor boxes match.
      force_match: Boolean specifying if force matching is enabled. If
        force matching is enabled, then matched anchors which are also the
        highest scoring with a ground-truth box are considered foreground
        matches as long as their similarity score > 0.
      similarity_fn: Function that computes the a similarity score (e.g., IOU)
        between pairs of bounding boxes. This function should take in two
        tensors corresponding to anchor and ground-truth bboxes, and return a
        matrix [A, G] with the similarity score between each pair of bboxes. The
        score must be non-negative, with greater scores representing more
        similar. The fore/background_assignment_thresholds will be applied to
        this score to determine if the an anchor is foreground, background or
        ignored. If set to None, the function will default to IOU2DRotatedBoxes.

    Returns:
      NestedMap with the following keys

      - assigned_gt_idx: shape [A] index corresponding to the index of the
        assigned ground truth box. Anchors not assigned to a ground truth box
        will have the index set to -1.
      - assigned_gt_bbox: shape [A, 7] bbox parameters assigned to each anchor.
      - assigned_gt_similarity_score: shape [A] (iou) score between the anchor
        and the gt bbox.
      - assigned_gt_labels: shape [A] label assigned to bbox.
      - assigned_cls_mask: shape [A] mask for classification loss per anchor.
        This should be 1.0 if the anchor has a foreground or background
        assignment; otherwise, it will be assigned to 0.0.
      - assigned_reg_mask: shape [A] mask for regression loss per anchor.
        This should be 1.0 if the anchor has a foreground assignment;
        otherwise, it will be assigned to 0.0.
        Note: background anchors do not have regression targets.
    """
        if similarity_fn is None:
            similarity_fn = self.IOU2DRotatedBoxes

        # Shape validation.
        anchor_bboxes = py_utils.HasShape(anchor_bboxes, [-1, 7])
        num_anchor_bboxes, _ = py_utils.GetShape(anchor_bboxes, 2)
        gt_bboxes = py_utils.HasShape(gt_bboxes, [-1, 7])
        num_gt_bboxes, _ = py_utils.GetShape(gt_bboxes, 2)

        # Compute similarity score and reduce max by anchors and by ground-truth.
        similarity_score = similarity_fn(anchor_bboxes, gt_bboxes)
        similarity_score = py_utils.HasShape(
            similarity_score, [num_anchor_bboxes, num_gt_bboxes])

        # Reduce over ground-truth boxes, so we have the max score per anchor.
        anchor_max_score = tf.reduce_max(similarity_score, axis=1)
        anchor_max_idx = tf.argmax(similarity_score, axis=1)

        if force_match:
            # Reduce over anchors, so we have the max score per ground truth box.
            gt_max_score = tf.reduce_max(similarity_score,
                                         axis=0,
                                         keepdims=True)

            # Force matches occur when the top matching gt bbox for an anchor is the
            # top matching anchor for the gt bbox. When force matching, we match
            # these boxes as long as their similarity score exceeds 0.
            force_matches = (
                tf.equal(similarity_score, gt_max_score)
                & tf.equal(similarity_score, anchor_max_score[..., tf.newaxis])
                & tf.greater(similarity_score, 0.)
                & tf.cast(gt_bboxes_mask[tf.newaxis, ...], tf.bool))
            force_match_indicator = tf.reduce_any(force_matches, axis=1)
            force_match_idx = tf.argmax(tf.cast(force_matches, tf.int32),
                                        axis=1)

            # In assigning foreground/background anchors later, force_match_indicator
            # is used to determine which anchors are force foreground, and the index
            # assigned will be taken from anchor_max_idx.

            # Force matchers must also be the max scoring gt bbox per anchor.
            # We overwrite anchor_max_idx to ensure that the right match is done.
            anchor_max_idx = tf.where(force_match_indicator, force_match_idx,
                                      anchor_max_idx)

        # Ensure that max score boxes are not padded boxes by setting score to 0
        # for boxes that are padded.
        gathered_mask = tf.batch_gather(gt_bboxes_mask, anchor_max_idx)
        anchor_max_score = tf.where(tf.equal(gathered_mask, 1),
                                    anchor_max_score,
                                    tf.zeros_like(anchor_max_score))

        # Boolean tensors corresponding to whether an anchor is background or
        # foreground based on thresholding.
        background_anchors = tf.less_equal(anchor_max_score,
                                           background_assignment_threshold)
        foreground_anchors = tf.greater_equal(anchor_max_score,
                                              foreground_assignment_threshold)
        if force_match:
            # Background anchors are below threshold and not force matches.
            background_anchors &= ~force_match_indicator
            # Foreground anchors are above thresholds or force matches.
            foreground_anchors |= force_match_indicator

        # Add dummy background bbox to gt_boxes to facilitate batch gather.
        dummy_bbox = tf.constant([[0, 0, 0, 1, 1, 1, 0]], dtype=tf.float32)

        # Since we are concatenating the dummy bbox, the index corresponds to the
        # number of boxes.
        dummy_bbox_idx = py_utils.GetShape(gt_bboxes, 1)[0]

        gt_bboxes = tf.concat([gt_bboxes, dummy_bbox], axis=0)
        gt_bboxes_labels = tf.concat([gt_bboxes_labels, [background_class_id]],
                                     axis=0)

        # Gather indices so that all foreground boxes are gathered from gt_bboxes,
        # while all background and ignore boxes gather the dummy_bbox.
        anchor_gather_idx = tf.where(
            foreground_anchors, anchor_max_idx,
            tf.constant(dummy_bbox_idx,
                        shape=py_utils.GetShape(anchor_max_idx),
                        dtype=anchor_max_idx.dtype))

        # Gather the bboxes and weights.
        assigned_gt_bbox = tf.batch_gather(gt_bboxes, anchor_gather_idx)
        assigned_gt_labels = tf.batch_gather(gt_bboxes_labels,
                                             anchor_gather_idx)

        # Set masks for classification and regression losses.
        assigned_cls_mask = tf.cast(background_anchors | foreground_anchors,
                                    tf.float32)
        assigned_reg_mask = tf.cast(foreground_anchors, tf.float32)

        # Set assigned_gt_idx such that dummy boxes have idx = -1.
        assigned_gt_idx = tf.where(tf.equal(anchor_gather_idx, dummy_bbox_idx),
                                   tf.ones_like(anchor_gather_idx) * -1,
                                   anchor_gather_idx)
        assigned_gt_idx = tf.cast(assigned_gt_idx, tf.int32)

        return py_utils.NestedMap(
            assigned_gt_idx=assigned_gt_idx,
            assigned_gt_bbox=assigned_gt_bbox,
            assigned_gt_similarity_score=anchor_max_score,
            assigned_gt_labels=assigned_gt_labels,
            assigned_cls_mask=assigned_cls_mask,
            assigned_reg_mask=assigned_reg_mask)
示例#13
0
    def _AddNoise(self, batch):
        """Adding noise the src (see https://arxiv.org/pdf/1711.00043).

    This function implement 3 types of noise (hyparams defined in
    self.params.denoise):
    1) slightly shuffle the sentence following p.shuffle_tok_range
    2) randomly drop tokens with probability p.drop_tok_prob
    3) randomly mask tokens with probability p.blank_tok_prob
    The noises are added to the input with probability p.noise_sent_prob.

    Args:
      batch: a `.NestedMap` of the input batch.
    """
        def IsSpecialExample(task_ids, special_task_ids):
            """A utility function indicates whether inputs belong to specific tasks.

      Args:
        task_ids: Task ids for the input batch. Tensor of shape [batch].
        special_task_ids: A list of specified task ids.

      Returns:
        A tensor indicating whether each sample in the batch belong to the
        specified task. Return a tensor of size [batch].
      """
            batch_size = py_utils.GetShape(task_ids)[0]
            return tf.reduce_any(
                tf.equal(
                    tf.expand_dims(task_ids, -1),
                    tf.cast(
                        tf.broadcast_to(
                            special_task_ids,
                            [batch_size, len(special_task_ids)]), tf.int32)),
                -1)

        p = self.params.denoise
        batch_size = tf.shape(batch.src.ids)[0]
        source_max_len = tf.shape(batch.src.ids)[1]

        # Shuffle tokens according to p.shuffle_tok_range
        noise = tf.random.uniform([batch_size, source_max_len], 0,
                                  p.shuffle_tok_range + 1)

        # Don't shuffle eos or padding
        shuffle_tok_range = tf.fill([batch_size, source_max_len],
                                    float(p.shuffle_tok_range))
        shifted_paddings = tf.pad(batch.src.paddings[:, 1:], [[0, 0], [0, 1]],
                                  constant_values=1)
        noise = tf.where(tf.equal(shifted_paddings, 0), noise,
                         shuffle_tok_range)
        indices = tf.broadcast_to(tf.range(source_max_len, dtype=tf.int32),
                                  [batch_size, source_max_len])
        noisy_indices = tf.cast(indices, dtype=tf.float32) + noise
        permutations = tf.argsort(noisy_indices)
        stacked = tf.stack([batch.src.ids, permutations], axis=1)
        denoise_src_ids = tf.stack(tf.map_fn(lambda x: tf.gather(x[0], x[1]),
                                             stacked),
                                   axis=0)

        # Select tokens to drop with probability=p.drop_tok_prob
        random_drop_tok = tf.random.uniform([batch_size, source_max_len])
        # Don't drop eos token
        is_keep_tok = tf.math.logical_or(
            tf.greater(random_drop_tok, p.drop_tok_prob),
            tf.equal(denoise_src_ids, self._src_tokenizer.eos_id))
        denoise_src_ids = tf.ragged.boolean_mask(
            denoise_src_ids,
            is_keep_tok).to_tensor(default_value=0,
                                   shape=tf.shape(batch.src.ids))
        denoise_src_paddings = tf.ragged.boolean_mask(
            batch.src.paddings,
            is_keep_tok).to_tensor(default_value=1,
                                   shape=tf.shape(batch.src.ids))

        # Select tokens to blank with probability=p.blank_tok_prob
        # Don't blank eos token
        random_blank_tok = tf.random.uniform([batch_size, source_max_len])
        shifted_paddings = tf.pad(denoise_src_paddings[:, 1:],
                                  [[0, 0], [0, 1]],
                                  constant_values=1)
        is_blank_tok = tf.math.logical_and(
            tf.less(random_blank_tok, p.blank_tok_prob),
            tf.equal(shifted_paddings, 0))
        blank_id = tf.fill([batch_size, source_max_len], p.blank_id)
        denoise_src_ids = tf.where(is_blank_tok, blank_id, denoise_src_ids)

        # Select denoising task examples with probability=p.denoise_sent_prob
        random_uniform_sent = tf.random.uniform([batch_size])
        is_denoise_sent = tf.math.logical_and(
            tf.less(random_uniform_sent, p.noise_sent_prob),
            IsSpecialExample(self._GetTaskIds(batch.src.source_ids[:, 0]),
                             p.task_ids))
        batch.src.ids = tf.where(is_denoise_sent, denoise_src_ids,
                                 batch.src.ids)
        batch.src.paddings = tf.where(is_denoise_sent, denoise_src_paddings,
                                      batch.src.paddings)
        batch.src.ids_indicator = 1 - batch.src.paddings
        batch.src.weights = batch.src.ids_indicator
示例#14
0
    def StochasticBeamSearchDecodeBiased(self,
                                         encoder_outputs,
                                         biased,
                                         stochastic,
                                         num_hyps_per_beam_override=0):
        """Performs beam search based decoding with optional advanced features.

    If `biased` is true, the target biasing feature is added. `encoder_outputs`
    must include the following auxiliary inputs:

    - targets.labels: An int tensor of shape [batch, seq] that represents target
      labels to bias beam search towards.
    - targets.paddings: A 0/1 float tensor of shape [batch, seq] where 1 means
      that the corresponding element of targets.labels is a padding.
    - targets.weights: A float tensor of shape [batch, seq] that represents
      biasing weights. 1.0 means forced-decoding.

    If `stochastic` is true, the stochastic beam search feature
    (https://arxiv.org/pdf/1903.06059.pdf) is added. Also, top-p filtering (i.e.
    sampling only from the top-p probability mass of the token distribution) is
    performed to ensure the quality of samples. Note that there are slight
    differences from the implementation in the original paper, e.g., length
    normalization and coverage penalty are applied to the perturbed
    probabilities. `encoder_outputs` must include the following auxiliary
    inputs:

    - stochastic_beam_search.top_p_threshold: A float tensor of shape [batch]
      that represents the thresholds of top-p filtering. Must satisfy
      0 < top_p_threshold <= 1. If the value is low, the quality of samples will
      be high but the diversity will be low. If the value is high, the quality
      of samples will be low but the diversity will be high. Stochastic beam
      search is performed only if top_p_threshold > 0 for some batch items.
    - stochastic_beam_search.seed: An int tensor of shape [batch] the represents
      the random seeds. If the seeds are the same, the same samples are drawn.
    - stochastic_beam_search.src_ids: An int tensor of shape [batch, src_seq]
      that represents source IDs. Used for turning the random seed into a
      function of source IDs.
    - stochastic_beam_search.src_paddings: A 0/1 float tensor of shape [batch,
      src_seq] where 1 means that the corresponding element of
      stochastic_beam_search.src_ids is a padding.

    Args:
      encoder_outputs: a NestedMap computed by encoder.
      biased: If true, add the target decoding feature.
      stochastic: If true, add the stochastic beam search feature.
      num_hyps_per_beam_override: If set to a value <= 0, this parameter is
        ignored. If set to a value > 0, then this value will be used to override
        `p.num_hyps_per_beam`.

    Returns:
      BeamSearchDecodeOutput, a namedtuple containing the decode results.
    """
        p = self.params

        if biased:
            targets = encoder_outputs.targets
            targets.weights *= (1.0 - targets.paddings)

            def PadToTargetSeqLen(tensor, constant):
                length = tf.shape(tensor)[1]
                pad = tf.maximum(0, p.beam_search.target_seq_len - length)
                return tf.pad(tensor, [[0, 0], [0, pad]],
                              constant_values=constant)

            targets.labels = PadToTargetSeqLen(targets.labels, 0)
            targets.weights = PadToTargetSeqLen(targets.weights, 0)

        if stochastic:
            # Determine whether to perform stochastic beam search.
            stochastic_beam_search = encoder_outputs.stochastic_beam_search
            stochastic_beam_search.enable = tf.reduce_any(
                tf.greater(stochastic_beam_search.top_p_threshold, 0.0))

        return self.beam_search.BeamSearchDecode(
            self.theta, encoder_outputs, num_hyps_per_beam_override,
            self._WrapInitBeamSearchStateCallback(biased, stochastic),
            self._WrapPreBeamSearchStepCallback(biased, stochastic),
            self._WrapPostBeamSearchStepCallback(stochastic))