示例#1
0
  def CreateDenseCoordinates(self, ranges):
    """Create a matrix of coordinate locations corresponding to a dense grid.

    Example: To create (x, y) coordinates corresponding over a 10x10 grid with
      step sizes 1, call CreateDenseCoordinates([(1, 10, 10), (1, 10, 10)]).

    Args:
      ranges: A list of 3-tuples, each tuple is expected to contain (min, max,
        num_steps). Each list element corresponds to one dimesion. Each tuple
        will be passed into np.linspace to create the values for a single
        dimension.

    Returns:
      tf.float32 tensor of shape [total_points, len(ranges)], where
      total_points = product of all num_steps.

    """
    total_points = int(np.prod([r_steps for _, _, r_steps in ranges]))
    cycle_steps = total_points
    stack_coordinates = []

    for r_start, r_stop, r_steps in ranges:
      values = tf.lin_space(
          tf.to_float(r_start), tf.to_float(r_stop), tf.to_int32(r_steps))
      cycle_steps //= r_steps
      gather_idx = (tf.range(total_points) // cycle_steps) % r_steps
      stack_coordinates.append(tf.gather(values, gather_idx))

    return tf.stack(stack_coordinates, axis=1)
示例#2
0
 def FProp(self, theta, current_step):
   """Returns the current learning rate decay."""
   p = self.params
   current_step = tf.to_float(current_step)
   warmup_steps = tf.to_float(p.warmup_examples /
                              (p.batch_size * self._num_replicas))
   return tf.minimum((current_step + 1) * warmup_steps**-1.5,
                     (current_step + 1)**-0.5)
示例#3
0
 def FProp(self, theta, current_step):
   """Returns the current learning rate decay."""
   p = self.params
   current_step = tf.to_float(current_step)
   warmup_steps = tf.to_float(p.warmup_steps * p.worker_replicas)
   if p.decay_end is not None:
     current_step = tf.where(current_step < p.decay_end, current_step,
                             tf.to_float(p.decay_end))
   return p.model_dim**-0.5 * tf.minimum(
       (current_step + 1) * warmup_steps**-1.5, (current_step + 1)**-0.5)
示例#4
0
 def FProp(self, theta, current_step):
   """Returns the current learning rate decay."""
   params = self.params
   warmup_steps = tf.to_float(params.decay_start * params.worker_replicas)
   current_step = tf.to_float(current_step)
   if params.decay_end is not None:
     current_step = tf.where(current_step < params.decay_end, current_step,
                             tf.to_float(params.decay_end))
   peak_learning_rate = (warmup_steps**-0.5)
   return (params.model_dim**-0.5) * tf.minimum(
       tf.minimum((current_step + 1),
                  (current_step + 1)**-0.5), peak_learning_rate)
示例#5
0
 def _Value(self, current_step):
   """Returns the current clipping cap."""
   p = self.params
   start_step = tf.cast(p.start_step, tf.float32)
   end_step = tf.cast(p.end_step, tf.float32)
   current_step = tf.cast(current_step, tf.float32)
   steps_ratio = (
       tf.minimum(end_step - start_step, current_step - start_step)/
       (end_step - start_step))
   rmax_tensor = (
       steps_ratio * p.end_cap + (1.0 - steps_ratio) * p.start_cap)
   return tf.cond(tf.less(current_step, p.start_step),
                  lambda: tf.to_float(p.start_cap),
                  lambda: tf.to_float(rmax_tensor))
示例#6
0
    def testPointConvParametricConvShapes(self):
        batch_size, num_groups, points_per_group, num_in_channels = 4, 5, 6, 7
        num_out_channels = 8
        b = builder_lib.ModelBuilderBase()
        p = b._PointConvParametricConv('test', [3, 4, 9], num_in_channels,
                                       num_out_channels)
        l = p.Instantiate()
        x = py_utils.NestedMap(
            points=tf.random_uniform(
                (batch_size, num_groups, points_per_group, 3),
                dtype=tf.float32),
            features=tf.random_uniform(
                (batch_size, num_groups, points_per_group, num_in_channels),
                dtype=tf.float32),
            padding=tf.to_float(
                tf.random_uniform((batch_size, num_groups, points_per_group),
                                  minval=0,
                                  maxval=2,
                                  dtype=tf.int32)))
        y = l.FPropDefaultTheta(x)

        with self.session() as sess:
            sess.run(tf.global_variables_initializer())
            actual_y = sess.run(y)
            self.assertAllEqual(actual_y.shape,
                                (batch_size, num_groups, num_out_channels))
示例#7
0
    def _BBox2DImage(self, bbox_corners_image, input_images):
        """Compute [xmin, ymin, xmax, ymax] 2D bounding boxes from corners."""
        # Clip the boundaries of the bounding box to the image width/height.
        bci_x = bbox_corners_image[..., 0:1]
        image_width = tf.broadcast_to(
            input_images.width[..., tf.newaxis, tf.newaxis], tf.shape(bci_x))
        bci_x = tf.clip_by_value(bci_x, 0.0, tf.to_float(image_width))

        bci_y = bbox_corners_image[..., 1:2]
        image_height = tf.broadcast_to(
            input_images.height[..., tf.newaxis, tf.newaxis], tf.shape(bci_y))
        bci_y = tf.clip_by_value(bci_y, 0.0, tf.to_float(image_height))

        bbox_corners_image_clipped = tf.concat([bci_x, bci_y], axis=-1)

        # Compute the [xmin, ymin, xmax, ymax] bounding boxes from [batch,
        # num_boxes, 8, 2] extrema.
        min_vals = tf.math.reduce_min(bbox_corners_image_clipped, axis=2)
        max_vals = tf.math.reduce_max(bbox_corners_image_clipped, axis=2)
        bbox2d_corners_image = tf.concat([min_vals, max_vals], axis=2)
        return bbox2d_corners_image
    def _Extract(self, features):
        p = self.params
        # Label values match the proto enum car.open_dataset.Label.Type. The value
        # range is [1..4] for non-background labels.
        labels = tf.to_int32(_Dense(features['labels']))
        labels = py_utils.PadOrTrimTo(labels, [p.max_num_objects])
        label_ids = tf.reshape(_Dense(features['label_ids'], ''), [-1])
        label_ids = py_utils.PadOrTrimTo(label_ids, [p.max_num_objects], '')
        bboxes_3d = tf.reshape(_Dense(features['bboxes_3d']), [-1, 7])
        bboxes_3d_mask = tf.ones([tf.shape(bboxes_3d)[0]])
        bboxes_3d_num_points = tf.to_int32(
            _Dense(features['bboxes_3d_num_points']))
        bboxes_3d = py_utils.PadOrTrimTo(bboxes_3d, [p.max_num_objects, 7])
        bboxes_3d_mask = py_utils.PadOrTrimTo(bboxes_3d_mask,
                                              [p.max_num_objects])
        bboxes_3d_num_points = py_utils.PadOrTrimTo(bboxes_3d_num_points,
                                                    [p.max_num_objects])
        label_metadata = tf.reshape(_Dense(features['label_metadata']),
                                    [-1, 4])
        label_metadata = py_utils.PadOrTrimTo(label_metadata,
                                              [p.max_num_objects, 4])

        detection_difficulties = py_utils.PadOrTrimTo(
            tf.to_int32(_Dense(features['detection_difficulties'])),
            [p.max_num_objects])
        tracking_difficulties = py_utils.PadOrTrimTo(
            tf.to_int32(_Dense(features['tracking_difficulties'])),
            [p.max_num_objects])
        unfiltered_bboxes_3d_mask = bboxes_3d_mask

        if p.filter_labels:
            valid_labels = tf.constant([p.filter_labels])
            bbox_mask = tf.reduce_any(tf.equal(tf.expand_dims(labels, 1),
                                               valid_labels),
                                      axis=1)
            bboxes_3d_mask *= tf.to_float(bbox_mask)

        outputs = {
            'labels': labels,
            'label_ids': label_ids,
            'detection_difficulties': detection_difficulties,
            'tracking_difficulties': tracking_difficulties,
            'bboxes_3d': bboxes_3d,
            'bboxes_3d_mask': bboxes_3d_mask,
            'bboxes_3d_num_points': bboxes_3d_num_points,
            'unfiltered_bboxes_3d_mask': unfiltered_bboxes_3d_mask,
            'speed': label_metadata[:, :2],
            'acceleration': label_metadata[:, 2:],
        }

        return py_utils.NestedMap(outputs)
示例#9
0
  def SetMetrics(self, metric_dict, step_args):
    """Sets the metrics to evaluate and the per-step output tensors.

    Args:
      metric_dict: dict of (name -> (tensor of values, tensor of weights))
      step_args: the tensors being passed to the training loop body. These share
        the same structure of alternating value and weight scalars as the
        initial values and the output of this function.

    Returns:
      The tensors to return from the training loop body.  For entries that are
      for metrics in self._metrics, returns the value computed within the loop
      (the step_args value passed in); for all others, the value will never be
      used at the end and so the step_args value is passed through (which has
      the effect of passing the initial values through every iteration of the
      loop).
    """
    num_metrics = len(metric_dict)
    assert num_metrics <= self._max_metrics, ('Increase _max_metrics to >= %d' %
                                              num_metrics)
    self._metrics = py_utils.NestedMap(metric_dict)

    # self._metrics contains a map of (metric_value,
    # metric_weight). We convert it into [metric_value *
    # metric_weight, metric_weight] to make it easier to aggregate
    # metric values across steps and TPU replicas.
    ret = []
    for (value, weight) in self._metrics.Flatten():
      assert value.shape.is_fully_defined(), ('%s' % value)
      assert weight.shape.is_fully_defined(), ('%s' % weight)
      weight = tf.to_float(weight)
      value = tf.to_float(value) * weight
      ret += [value, weight]
    # Each metric has two tensors: value and weight.
    assert len(ret) == 2 * num_metrics
    ret += list(step_args)[len(ret):]
    return ret
示例#10
0
    def _CreateFrustumMask(self, bbox_corners_image,
                           bbox2d_corners_image_clipped, image_height,
                           image_width):
        """Creates a box mask for boxes whose projections fall outside of image."""
        p = self.params
        batch_size, num_boxes = py_utils.GetShape(bbox_corners_image, 2)
        if not p.filter_predictions_outside_frustum:
            return tf.ones(shape=(batch_size, num_boxes), dtype=tf.float32)

        def _MinMax(bbox_corners):
            """Computes the min and max over corners."""
            bbox_min = tf.reduce_min(bbox_corners, axis=-1)
            bbox_max = tf.reduce_max(bbox_corners, axis=-1)
            bbox_min = py_utils.HasShape(bbox_min, [batch_size, num_boxes])
            bbox_max = py_utils.HasShape(bbox_max, [batch_size, num_boxes])
            return bbox_min, bbox_max

        bbox_min_x, bbox_max_x = _MinMax(bbox_corners_image[:, :, :, 0])
        bbox_min_y, bbox_max_y = _MinMax(bbox_corners_image[:, :, :, 1])

        # Compute the fraction of the clipped 2d image projection and the
        # full 2d image projection.  We simply need to divide the area
        # of each cropped box by the area of the full box to get the
        # overlap fraction.
        original_area = (bbox_max_x - bbox_min_x) * (bbox_max_y - bbox_min_y)
        bbox_clipped_x_min = bbox2d_corners_image_clipped[..., 0]
        bbox_clipped_y_min = bbox2d_corners_image_clipped[..., 1]
        bbox_clipped_x_max = bbox2d_corners_image_clipped[..., 2]
        bbox_clipped_y_max = bbox2d_corners_image_clipped[..., 3]
        clipped_area = (bbox_clipped_x_max - bbox_clipped_x_min) * (
            bbox_clipped_y_max - bbox_clipped_y_min)
        fraction = clipped_area / original_area

        frustum_mask = (fraction > p.truncation_threshold)
        frustum_mask = py_utils.HasShape(frustum_mask, [batch_size, num_boxes])
        frustum_mask = tf.to_float(frustum_mask)
        return frustum_mask
  def _Extract(self, features):
    p = self.params

    source_id = py_utils.HasShape(features['image/source_id'], [])
    xmin = _Dense(features['object/image/bbox/xmin'])
    xmax = _Dense(features['object/image/bbox/xmax'])
    ymin = _Dense(features['object/image/bbox/ymin'])
    ymax = _Dense(features['object/image/bbox/ymax'])

    # 2d bounding box in image coordinates.
    bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=1)
    bboxes_count = tf.shape(bboxes)[0]
    bboxes = py_utils.PadOrTrimTo(bboxes, [p.max_num_objects, 4])

    bboxes_padding = 1.0 - py_utils.PadOrTrimTo(
        tf.ones([bboxes_count]), [p.max_num_objects])

    dim_xyz = tf.reshape(_Dense(features['object/velo/bbox/dim_xyz']), [-1, 3])
    loc_xyz = tf.reshape(_Dense(features['object/velo/bbox/xyz']), [-1, 3])
    phi = tf.reshape(_Dense(features['object/velo/bbox/phi']), [-1, 1])
    # bboxes_3d is in [x, y, z, dx, dy, dz, phi].
    bboxes_3d = tf.concat([loc_xyz, dim_xyz, phi], axis=1)

    cx, cy, _, dx, dy, _, _ = tf.unstack(bboxes_3d, num=7, axis=-1)
    bboxes_td = tf.stack([
        cy - dy / 2,
        cx - dx / 2,
        cy + dy / 2,
        cx + dx / 2,
    ], axis=-1)  # pyformat: disable
    bboxes_td = py_utils.PadOrTrimTo(bboxes_td, [p.max_num_objects, 4])

    has_3d_info = tf.to_float(_Dense(features['object/has_3d_info']))
    bboxes_3d_mask = py_utils.PadOrTrimTo(has_3d_info, [p.max_num_objects])
    bboxes_td_mask = bboxes_3d_mask

    # Fill in difficulties from bounding box height, truncation and occlusion.
    bb_height = ymax - ymin
    box_image_height = py_utils.PadOrTrimTo(bb_height, [p.max_num_objects])
    box_image_height *= bboxes_3d_mask

    # 0 to 3 indicating occlusion level. 0 means fully visible, 1 means partly,
    occlusion = tf.reshape(_Dense(features['object/occlusion']), [-1])
    occlusion = tf.to_float(occlusion)
    occlusion = py_utils.PadOrTrimTo(occlusion, [p.max_num_objects])
    occlusion *= bboxes_3d_mask

    # Truncation: 0 -> not truncated, 1.0 -> truncated
    truncation = tf.reshape(_Dense(features['object/truncation']), [-1])
    truncation = py_utils.PadOrTrimTo(truncation, [p.max_num_objects])
    truncation *= bboxes_3d_mask

    difficulties = ComputeKITTIDifficulties(box_image_height, occlusion,
                                            truncation)
    difficulties = py_utils.PadOrTrimTo(difficulties, [p.max_num_objects])

    # Make a batch axis to call BBoxCorners, and take the first result back.
    bbox3d_corners = geometry.BBoxCorners(bboxes_3d[tf.newaxis, ...])[0]

    # Project the 3D bbox to the image plane.
    velo_to_image_plane = features['transform/velo_to_image_plane']
    bboxes3d_proj_to_image_plane = geometry.PointsToImagePlane(
        tf.reshape(bbox3d_corners, [-1, 3]), velo_to_image_plane)

    # Output is [num_objects, 8 corners per object, (x, y)].
    bboxes3d_proj_to_image_plane = tf.reshape(bboxes3d_proj_to_image_plane,
                                              [-1, 8, 2])
    bboxes3d_proj_to_image_plane = py_utils.PadOrTrimTo(
        bboxes3d_proj_to_image_plane, [p.max_num_objects, 8, 2])

    texts = features['object/label'].values
    labels = ops.static_map_string_int(x=texts, keys=self.KITTI_CLASS_NAMES)

    labels = py_utils.PadOrTrimTo(labels, [p.max_num_objects])
    texts = py_utils.PadOrTrimTo(texts, [p.max_num_objects])

    # Filter labels by setting bboxes_padding, bboxes_3d_mask, and
    # bboxes_td_mask appropriately.
    if p.filter_labels is not None:
      valid_labels = tf.constant([p.filter_labels])
      bbox_mask = tf.reduce_any(
          tf.equal(tf.expand_dims(labels, 1), valid_labels), axis=1)
      bbox_mask = tf.to_float(bbox_mask)
      bboxes_padding = 1 - bbox_mask * (1 - bboxes_padding)
      filtered_bboxes_3d_mask = bboxes_3d_mask * bbox_mask
      bboxes_td_mask *= bbox_mask
    else:
      filtered_bboxes_3d_mask = bboxes_3d_mask

    # Placeholder for counting the number of laser points that reside within
    # each 3-d bounding box. This must be filled in outside of this function
    # based on the loaded 3-d laser points.
    bboxes_3d_num_points = tf.zeros([p.max_num_objects], dtype=tf.int32)
    bboxes_3d_num_points = py_utils.PadOrTrimTo(bboxes_3d_num_points,
                                                [p.max_num_objects])

    # Pad bboxes_3d.
    bboxes_3d = py_utils.PadOrTrimTo(bboxes_3d, [p.max_num_objects, 7])

    return py_utils.NestedMap(
        source_id=source_id,
        bboxes_count=bboxes_count,
        bboxes=bboxes,
        bboxes_padding=bboxes_padding,
        bboxes_3d=bboxes_3d,
        bboxes_3d_mask=filtered_bboxes_3d_mask,
        unfiltered_bboxes_3d_mask=bboxes_3d_mask,
        bboxes3d_proj_to_image_plane=bboxes3d_proj_to_image_plane,
        bboxes_td=bboxes_td,
        bboxes_td_mask=bboxes_td_mask,
        bboxes_3d_num_points=bboxes_3d_num_points,
        labels=labels,
        texts=texts,
        box_image_height=box_image_height,
        occlusion=occlusion,
        truncation=truncation,
        difficulties=difficulties)
示例#12
0
def _SingleClassDecodeWithNMS(predicted_bboxes,
                              classification_scores,
                              nms_iou_threshold,
                              score_threshold,
                              max_boxes_per_class=None):
    """Perform NMS on predicted bounding boxes / associated logits.

  Args:
    predicted_bboxes: [batch_size, num_boxes, 7] float Tensor containing
      predicted bounding box coordinates.
    classification_scores: [batch_size, num_boxes, num_classes] float Tensor
      containing predicted classification scores for each box.
    nms_iou_threshold: IoU threshold to use when determining whether two boxes
      overlap for purposes of suppression.
    score_threshold: The score threshold passed to NMS that allows NMS to
      quickly ignore irrelevant boxes.
    max_boxes_per_class: The maximum number of boxes per example to emit. If
      None, this value is set to num_boxes from the shape of predicted_bboxes.

  Returns:
    predicted_bboxes: Filtered bboxes after NMS of shape
      [batch_size, num_classes, max_boxes_per_class, 7].
    bbox_scores: A float32 Tensor with the score for each box of shape
      [batch_size, num_classes, max_boxes_per_class].
    valid_mask: A float32 Tensor with 1/0 values indicating the validity of
      each box. 1 indicates valid, and 0 invalid. Tensor of shape
      [batch_size, num_classes, max_boxes_per_class].
  """
    utils_3d = detection_3d_lib.Utils3D()
    predicted_bboxes = py_utils.HasShape(predicted_bboxes, [-1, -1, 7])
    batch_size, num_predicted_boxes, _ = py_utils.GetShape(predicted_bboxes)
    classification_scores = py_utils.HasShape(
        classification_scores, [batch_size, num_predicted_boxes, -1])
    _, _, num_classes = py_utils.GetShape(classification_scores)

    if not isinstance(nms_iou_threshold, float):
        raise ValueError('Single class NMS only supports a scalar '
                         '`nms_iou_threshold`.')
    if not isinstance(score_threshold, float):
        raise ValueError('Single class NMS only supports a scalar '
                         '`score_threshold`.')

    if max_boxes_per_class is None:
        max_boxes_per_class = num_predicted_boxes

    # TODO(jngiam): Change to be per-class bboxes, and hence, per-class NMS, and
    # per-class thresholding.
    # [batch, num_predicted_boxes]
    nms_scores = tf.reduce_max(classification_scores, axis=-1)

    # Compute the most likely label by computing the highest class score from
    # the output of the sigmoid.
    likely_labels = tf.argmax(classification_scores, axis=-1)

    # When background is the most likely class for the box, mask out the scores
    # of that box from NMS scoring so the background boxes don't dominate the
    # NMS.
    nms_scores *= tf.to_float(likely_labels > 0)

    # Compute NMS for every sample in the batch.
    nms_indices, valid_mask = utils_3d.BatchedNMSIndices(
        predicted_bboxes,
        nms_scores,
        nms_iou_threshold=nms_iou_threshold,
        score_threshold=score_threshold,
        max_num_boxes=max_boxes_per_class)

    # Reorder the box data and logits according to NMS scoring.
    predicted_bboxes = tf.batch_gather(predicted_bboxes, nms_indices)
    classification_scores = tf.batch_gather(classification_scores, nms_indices)

    # Now reformat the output of NMS to match the format of the
    # MultiClassOrientedDecodeWithNMS, which outputs a per class NMS result.
    # This takes the leading shape of
    # [batch_size, num_classes, max_boxes_per_class] for all outputs, which
    # means since this NMS is not class specific we need to tile the outputs
    # num_classes times or reorder the data such that its [batch, num_classes].
    predicted_bboxes = tf.tile(predicted_bboxes[:, tf.newaxis, :, :],
                               [1, num_classes, 1, 1])
    classification_scores = tf.transpose(classification_scores, (0, 2, 1))
    classification_scores = py_utils.HasShape(
        classification_scores, [batch_size, num_classes, max_boxes_per_class])
    valid_mask = tf.tile(valid_mask[:, tf.newaxis, :], [1, num_classes, 1])
    return predicted_bboxes, classification_scores, valid_mask
示例#13
0
  def add_point_cloud(self, feature, laser_names, range_image_pose):
    """Convert the range images in `feature` to 3D point clouds.

    Adds the point cloud data to the tf.Example feature map.

    Args:
      feature: A tf.Example feature map.
      laser_names: A list of laser names (e.g., 'TOP', 'REAR', 'SIDE_LEFT').
      range_image_pose: A range image pose Tensor for the GBR.
    """
    for laser_name in laser_names:
      beam_inclinations = np.array(feature['%s_beam_inclinations' %
                                           laser_name].float_list.value[:])
      # beam_inclinations will be populated if there is a non-uniform
      # beam configuration (e.g., for the TOP lasers).  Others that have
      # uniform beam inclinations are only parameterized by the min and max.
      # We use these min and max if the beam_inclinations are not present,
      # and turn them into a uniform inclinations array.
      if beam_inclinations.size == 0:
        beam_inclination_min = feature['%s_beam_inclination_min' %
                                       laser_name].float_list.value[:]
        beam_inclination_max = feature['%s_beam_inclination_max' %
                                       laser_name].float_list.value[:]

        laser_ri_name = '%s_ri1' % laser_name
        range_image_shape = feature[laser_ri_name +
                                    '_shape'].int64_list.value[:]
        height = tf.to_float(range_image_shape[0])

        beam_inclinations = tf.constant(
            [beam_inclination_min[0], beam_inclination_max[0]])
        beam_inclinations = range_image_utils.compute_inclination(
            beam_inclinations, height)

      beam_extrinsics = np.array(
          feature['%s_extrinsics' % laser_name].float_list.value[:]).reshape(
              4, 4)

      for ri_type in ['ri1', 'ri2']:
        laser_ri_name = '%s_%s' % (laser_name, ri_type)
        # For each of the 4 features of the lasers:
        range_image = np.array(feature[laser_ri_name].float_list.value[:])
        range_image_shape = feature[laser_ri_name +
                                    '_shape'].int64_list.value[:]
        range_image = range_image.reshape(range_image_shape)
        # Compute mask.  At the moment, invalid values in the range image
        # representation are indicated via a -1. entry.  Callers are expected
        # to create this mask when passing into the conversion function below.
        range_image_mask = range_image[..., 0] >= 0

        # Get the 'range' feature from the range images.
        range_image_range = range_image[..., 0]

        # Call utility to convert point cloud to cartesian coordinates.
        #
        # API expects a batch dimension for all inputs.
        batched_pixel_pose = None
        batched_frame_pose = None
        # At the moment, only the GBR has per-pixel pose.
        if laser_name == 'TOP':
          batched_pixel_pose = range_image_pose[tf.newaxis, ...]
          batched_frame_pose = self.frame_pose[tf.newaxis, ...]

        batched_range_image_range = tf.convert_to_tensor(
            range_image_range[np.newaxis, ...], dtype=tf.float32)
        batched_extrinsics = tf.convert_to_tensor(
            beam_extrinsics[np.newaxis, ...], dtype=tf.float32)
        batched_inclinations = tf.convert_to_tensor(
            beam_inclinations[np.newaxis, ...], dtype=tf.float32)

        batched_inclinations = tf.reverse(batched_inclinations, axis=[-1])

        range_image_cartesian = (
            range_image_utils.extract_point_cloud_from_range_image(
                batched_range_image_range,
                batched_extrinsics,
                batched_inclinations,
                pixel_pose=batched_pixel_pose,
                frame_pose=batched_frame_pose))

        points_xyz = tf.gather_nd(range_image_cartesian[0],
                                  tf.where(range_image_mask))

        # Fetch the features corresponding to each xyz coordinate and
        # concatentate them together.
        points_features = tf.to_float(
            tf.gather_nd(range_image[..., 1:], tf.where(range_image_mask)))
        points_data = tf.concat([points_xyz, points_features], axis=-1)

        # Add laser feature to output.
        #
        # Skip embedding shape since we assume that all points have six features
        # and so we can reconstruct the number of points.
        points_list = list(points_data.numpy().reshape([-1]))
        feature['laser_%s' % laser_ri_name].float_list.value[:] = points_list
示例#14
0
def NeighborhoodIndices(points,
                        query_points,
                        k,
                        points_padding=None,
                        max_distance=None,
                        sample_neighbors_uniformly=False):
    """Get indices to k-neighbors of query_points in points.

  Padding is returned along-side indices. Non-padded points are guaranteed to
  be unique (non-repeated) points from original non-padded points.

  Padded points arise due to either a lack of points (k exceeds the number
  of original non-padded points) or points are too far away (exceeds max
  distance).

  Note: Padded point indices may refer to padded points from the original, or
  may be duplicates of the closest point.

  TODO(weihan,jngiam): PointCNN implementation makes an assumption that padded
  points are repeated points from the original points. This behavior is
  maintained here, but we should update PointCNN to respect indices paddings.

  Args:
    points: tensor of shape [N, P1, dims].
    query_points: tensor of shape [N, P2, dims]
    k: Integer.
    points_padding: optional tensor of shape [N, P1] containing True/1.0 iff the
      point is a padded point. if None, then all points are considered real
      points.
    max_distance: float representing the maximum distance that each neighbor can
      be. If there are no points within the distance, then the closest point is
      returned (regardless of distance). If this is set to None, then no
      filtering by distance is performed.
    sample_neighbors_uniformly: boolean specifying whether to sample neighbors
      uniformly if they are within max distance.

  Returns:
    indices: tensor of shape [N, P2, k].

    padding: tensor of shape [N, P2, k] where 1 represents a padded point, and 0
    represents an unpadded (real) point.

  """
    n, p1 = py_utils.GetShape(points, 2)
    query_points = py_utils.HasShape(query_points, [n, -1, -1])
    _, p2 = py_utils.GetShape(query_points, 2)

    # Compute pair-wise squared distances.
    # Note that dist_mat contains the squared distance (without sqrt). Thus, when
    # using max_distance, we will need to square max_distance to make sure it's
    # in the same units.
    dist_mat = SquaredDistanceMatrix(query_points, points)
    dist_mat = py_utils.HasShape(dist_mat, [n, p2, p1])

    # Add a large scalar to the distances for padded points.
    # dist_mat[i, j, k] will be:
    #   if k < valid_num[i]: distance between points[i, k] and query_points[i, j]
    #   otherwise:           a large scalar added to dist_mat[i, j, k]
    if points_padding is not None:
        points_padding = tf.to_float(tf.expand_dims(points_padding, 1))
        points_padding = py_utils.HasShape(points_padding, [n, 1, p1])
        large_scalar = tf.reduce_max(dist_mat) + 1
        dist_mat += points_padding * large_scalar

    # To perform sampling neighbors uniformly efficiently, we set all neighbors
    # that are within the distance threshold to have distances be drawn uniformly
    # at random. Using top_k with this enables selecting a random set quickly
    # without replacement.
    if sample_neighbors_uniformly:
        if max_distance is not None:
            mask_by_distance = tf.less_equal(dist_mat, max_distance**2)
            dist_mat = tf.where(
                mask_by_distance,
                tf.square(max_distance) *
                tf.random_uniform(tf.shape(dist_mat)), dist_mat)
        else:
            raise ValueError(
                'Uniform sampling requires specifying max_distance.')

    top_k_dist, indices = tf.nn.top_k(-dist_mat, k=k,
                                      sorted=True)  # N x P2 x K

    # Set padding using top_k_dist; padded points will have distance exceeding
    # the large_scalar.
    if points_padding is not None:
        paddings = tf.greater_equal(-top_k_dist, large_scalar)
    else:
        paddings = tf.zeros_like(top_k_dist, dtype=tf.bool)

    # Filter by max_distances by setting all indices that exceed the max_distance
    # to the closest point.
    if max_distance is not None:
        # Mask is true for points that are further than max_distance.
        mask_by_distance = tf.greater(-top_k_dist, tf.square(max_distance))
        closest_idx = tf.tile(indices[:, :, :1], [1, 1, k])
        indices = tf.where(mask_by_distance, closest_idx, indices)
        paddings |= mask_by_distance

    indices = tf.reshape(indices, [n, p2, k])
    paddings = tf.to_float(paddings)

    return indices, paddings
示例#15
0
  def AssignAnchors(self,
                    anchor_bboxes,
                    gt_bboxes,
                    gt_bboxes_labels,
                    gt_bboxes_mask,
                    foreground_assignment_threshold=0.5,
                    background_assignment_threshold=0.35,
                    background_class_id=0,
                    force_match=True,
                    similarity_fn=None):
    """Assigns anchors to bboxes using a similarity function (SSD-based).

    Each anchor box is assigned to the top matching ground truth box.
    Ground truth boxes can be assigned to multiple anchor boxes.

    Assignments can result in 3 outcomes:
      Positive assignment (if score >= foreground_assignment_threshold):
        assigned_gt_labels will reflect the assigned box label and
        assigned_cls_mask will be set to 1.0
      Background assignment (if score <= background_assignment_threshold):
        assigned_gt_labels will be background_class_id and assigned_cls_mask
        will be set to 1.0
      Ignore assignment (otherwise):
        assigned_gt_labels will be background_class_id and assigned_cls_mask
        will be set to 0.0

    The detection loss function would usually:

      Use assigned_cls_mask for weighting the classification loss. The mask
      is set such that the loss applies to foreground and background assignments
      only - ignored anchors will be set to 0.

      Use assigned_reg_mask for weighting the regression loss. The mask is set
      such that the loss applies to foreground assignments only.

    The thresholds (foreground_assignment_threshold and
    background_assignment_threshold) should be tuned per dataset.

    TODO(jngiam): Consider having a separate threshold for regression boxes; a
    separate threshold is used in PointRCNN.

    Args:
      anchor_bboxes: tf.float32. [A, 7], where [..., :] corresponds to box
        parameters (x, y, z, dx, dy, dz, r).
      gt_bboxes: tf.float32. [G, 7], where [..., :] corresponds to ground truth
        box parameters (x, y, z, dx, dy, dz, r).
      gt_bboxes_labels: tensor with shape [G]. Ground truth labels for each
        bounding box.
      gt_bboxes_mask: tensor with shape [G]. Mask for ground truth boxes, 1 iff
        the gt_bbox is a real bbox.
      foreground_assignment_threshold: Similarity score threshold for assigning
        foreground bounding boxes; scores need to be >=
        foreground_assignment_threshold to be assigned to foreground.
      background_assignment_threshold: Similarity score threshold for assigning
        background bounding boxes; scores need to be <=
        background_assignment_threshold to be assigned to background.
      background_class_id: class id to be assigned to anchors_gt_class if no
        anchor boxes match.
      force_match: Boolean specifying if force matching is enabled. If
        force matching is enabled, then matched anchors which are also the
        highest scoring with a ground-truth box are considered foreground
        matches as long as their similarity score > 0.
      similarity_fn: Function that computes the a similarity score (e.g., IOU)
        between pairs of bounding boxes. This function should take in two
        tensors corresponding to anchor and ground-truth bboxes, and return a
        matrix [A, G] with the similarity score between each pair of bboxes. The
        score must be non-negative, with greater scores representing more
        similar. The fore/background_assignment_thresholds will be applied to
        this score to determine if the an anchor is foreground, background or
        ignored. If set to None, the function will default to IOU2DRotatedBoxes.

    Returns:
      NestedMap with the following keys:

        assigned_gt_bbox: shape [A, 7] bbox parameters assigned to each anchor.

        assigned_gt_similarity_score: shape [A] (iou) score between the anchor
        and the gt bbox.

        assigned_gt_labels: shape [A] label assigned to bbox.

        assigned_cls_mask: shape [A] mask for classification loss per anchor.
        This should be 1.0 if the anchor has a foreground or background
        assignment; otherwise, it will be assigned to 0.0.

        assigned_reg_mask: shape [A] mask for regression loss per anchor.
        This should be 1.0 if the anchor has a foreground assignment;
        otherwise, it will be assigned to 0.0.
        Note: background anchors do not have regression targets.

    """
    if similarity_fn is None:
      similarity_fn = self.IOU2DRotatedBoxes

    # Shape validation.
    anchor_bboxes = py_utils.HasShape(anchor_bboxes, [-1, 7])
    num_anchor_bboxes, _ = py_utils.GetShape(anchor_bboxes, 2)
    gt_bboxes = py_utils.HasShape(gt_bboxes, [-1, 7])
    num_gt_bboxes, _ = py_utils.GetShape(gt_bboxes, 2)

    # Compute similarity score and reduce max by anchors and by ground-truth.
    similarity_score = similarity_fn(anchor_bboxes, gt_bboxes)
    similarity_score = py_utils.HasShape(similarity_score,
                                         [num_anchor_bboxes, num_gt_bboxes])

    # Reduce over ground-truth boxes, so we have the max score per anchor.
    anchor_max_score = tf.reduce_max(similarity_score, axis=1)
    anchor_max_idx = tf.argmax(similarity_score, axis=1)

    if force_match:
      # Reduce over anchors, so we have the max score per ground truth box.
      gt_max_score = tf.reduce_max(similarity_score, axis=0, keep_dims=True)

      # Force matches occur when the top matching gt bbox for an anchor is the
      # top matching anchor for the gt bbox. When force matching, we match
      # these boxes as long as their similarity score exceeds 0.
      force_matches = (
          tf.equal(similarity_score, gt_max_score)
          & tf.equal(similarity_score, anchor_max_score[..., tf.newaxis])
          & tf.greater(similarity_score, 0.)
          & tf.cast(gt_bboxes_mask[tf.newaxis, ...], tf.bool))
      force_match_indicator = tf.reduce_any(force_matches, axis=1)
      force_match_idx = tf.argmax(tf.to_int32(force_matches), axis=1)

      # In assigning foreground/background anchors later, force_match_indicator
      # is used to determine which anchors are force foreground, and the index
      # assigned will be taken from anchor_max_idx.

      # Force matchers must also be the max scoring gt bbox per anchor.
      # We overwrite anchor_max_idx to ensure that the right match is done.
      anchor_max_idx = tf.where(force_match_indicator, force_match_idx,
                                anchor_max_idx)

    # Ensure that max score boxes are not padded boxes by setting score to 0
    # for boxes that are padded.
    gathered_mask = tf.batch_gather(gt_bboxes_mask, anchor_max_idx)
    anchor_max_score = tf.where(
        tf.equal(gathered_mask, 1), anchor_max_score,
        tf.zeros_like(anchor_max_score))

    # Boolean tensors corresponding to whether an anchor is background or
    # foreground based on thresholding.
    background_anchors = tf.less_equal(anchor_max_score,
                                       background_assignment_threshold)
    foreground_anchors = tf.greater_equal(anchor_max_score,
                                          foreground_assignment_threshold)
    if force_match:
      # Background anchors are below threshold and not force matches.
      background_anchors &= ~force_match_indicator
      # Foreground anchors are above thresholds or force matches.
      foreground_anchors |= force_match_indicator

    # Add dummy background bbox to gt_boxes to facilitate batch gather.
    dummy_bbox = tf.constant([[0, 0, 0, 1, 1, 1, 0]], dtype=tf.float32)

    # Since we are concatenating the dummy bbox, the index corresponds to the
    # number of boxes.
    dummy_bbox_idx = py_utils.GetShape(gt_bboxes, 1)[0]

    gt_bboxes = tf.concat([gt_bboxes, dummy_bbox], axis=0)
    gt_bboxes_labels = tf.concat([gt_bboxes_labels, [background_class_id]],
                                 axis=0)

    # Gather indices so that all foreground boxes are gathered from gt_bboxes,
    # while all background and ignore boxes gather the dummy_bbox.
    anchor_gather_idx = tf.where(
        foreground_anchors, anchor_max_idx,
        tf.constant(
            dummy_bbox_idx,
            shape=py_utils.GetShape(anchor_max_idx),
            dtype=anchor_max_idx.dtype))

    # Gather the bboxes and weights.
    assigned_gt_bbox = tf.batch_gather(gt_bboxes, anchor_gather_idx)
    assigned_gt_labels = tf.batch_gather(gt_bboxes_labels, anchor_gather_idx)

    # Set masks for classification and regression losses.
    assigned_cls_mask = tf.to_float(background_anchors | foreground_anchors)
    assigned_reg_mask = tf.to_float(foreground_anchors)

    return py_utils.NestedMap(
        assigned_gt_bbox=assigned_gt_bbox,
        assigned_gt_similarity_score=anchor_max_score,
        assigned_gt_labels=assigned_gt_labels,
        assigned_cls_mask=assigned_cls_mask,
        assigned_reg_mask=assigned_reg_mask)
  def _Extract(self, features):
    p = self.params
    ri_outputs = {}
    outputs = {}
    frame_pose = tf.reshape(_Dense(features['pose']), [4, 4])
    for laser in p.cbr_laser_names + p.gbr_laser_names:
      # Extract range images.
      for returns in p.returns:
        ri_shape = tf.reshape(
            _Dense(features['%s_%s_shape' % (laser, returns)]), [-1])
        range_image = tf.reshape(
            _Dense(features['%s_%s' % (laser, returns)]), ri_shape)

        shape_to_check = (
            p.cbr_ri_shape if laser in p.cbr_laser_names else p.gbr_ri_shape)
        range_image = py_utils.HasShape(range_image, shape_to_check)

        ri_outputs['%s_%s' % (laser, returns)] = range_image

      # Extract beam inclinations and extrinsics
      outputs['%s_extrinsics' % laser] = tf.reshape(
          _Dense(features['%s_extrinsics' % laser]), [4, 4])

    # CBRs have uniform inclination
    for laser in p.cbr_laser_names:
      beam_inclination_min = tf.reshape(
          _Dense(features['%s_beam_inclination_min' % laser]), [])
      beam_inclination_max = tf.reshape(
          _Dense(features['%s_beam_inclination_max' % laser]), [])
      outputs['%s_beam_inclinations' % laser] = tf.stack(
          [beam_inclination_min, beam_inclination_max], axis=0)

    # GBRs have non-uniform inclinations defined by 64 floats.
    for laser in p.gbr_laser_names:
      outputs['%s_beam_inclinations' % laser] = tf.reshape(
          _Dense(features['%s_beam_inclinations' % laser]), [64])

    # Embed xyz onto each range image pixel.
    for laser in p.cbr_laser_names + p.gbr_laser_names:
      extrinsics = outputs['%s_extrinsics' % laser]
      inclinations = outputs['%s_beam_inclinations' % laser]
      if laser in p.cbr_laser_names:
        ri_shape = p.cbr_ri_shape

        # Convert from 2-tuple range inclination to the full range
        # via linear interpolation.
        #
        # CBR lasers currently are always uniform inclinations specified by a
        # length 2 vector.
        height = ri_shape[0]
        min_inclination = inclinations[0]
        max_inclination = inclinations[1]
        diff = max_inclination - min_inclination
        ratio = (.5 + tf.to_float(tf.range(0, height))) / tf.to_float(height)
        # interpolate from min to max inclination.
        inclinations = (ratio * diff) + min_inclination
      else:
        ri_shape = p.gbr_ri_shape

      pixel_pose = None
      if laser in p.gbr_laser_names:
        pixel_pose = tf.reshape(
            _Dense(features['%s_pose' % laser]),
            shape=p.gbr_ri_shape[0:2] + [4, 4])

      for returns in p.returns:
        range_image = ri_outputs['%s_%s' % (laser, returns)]
        range_image = tf.reshape(range_image, ri_shape)
        range_image_mask = range_image[..., 0] >= 0
        ri_xyz = tf.to_float(
            self._XYZFromRangeImage(range_image, range_image_mask, extrinsics,
                                    inclinations, pixel_pose, frame_pose))

        # Produce the NestedMap of xyz, features, mask.
        ri_result = py_utils.NestedMap({
            'xyz': ri_xyz,
            'features': range_image,
            'mask': tf.to_float(range_image_mask),
        })

        outputs['%s_%s' % (laser, returns)] = ri_result

    return py_utils.NestedMap(outputs)
示例#17
0
文件: model.py 项目: jairsan/lingvo-1
    def _InferenceSubgraph_Default(self):
        """Default inference subgraph.

    Returns:
      (fetches, feeds), with:

      - fetches: A dictionary of fetches, containing:

        - log_pplx_per_token: A matrix of shape [batch, time]. [i, j]
          is i-th input text's j-th token's log prob.
        - paddings: A matrix of shape [batch, time]. The padding mask.
        - log_pplx_per_sample: A vector of shape [batch]. [i]
          is i-th input text's log prob.
        - num_oovs_per_sample: A vector of shape [batch] counting the total
          number of out-of-vocabulary tokens in each input.
        - tokens_from_labels: A vector of shape [batch] returning the predicted
          tokens as a sequence after mapping them back to strings from ids using
          the vocabulary.
        - ids: A matrix of shape [batch, time]. [i, j]
          is i-th input text's j-th token's id.

      - feeds: A dictionary of feeds, containing:

        - text: A placeholder for a vector of strings.
    """
        text = tf.placeholder(tf.string, shape=[None])
        # [batch, time]
        ids, labels, paddings = self.input_generator.StringsToIds(text)
        lengths = tf.reduce_sum(tf.to_int32(1 - paddings), axis=1)
        tokens_from_labels = self.input_generator.IdsToStrings(labels, lengths)
        oovs = tf.equal(labels, self.input_generator.tokenizer.unk_id)
        num_oovs_per_sample = tf.to_int32(
            tf.reduce_sum(tf.to_float(oovs) * (1 - paddings), axis=1))
        # [time, batch]
        ids, paddings, labels, weights = self._TrimIfPossibleThenTranspose(
            ids, paddings, labels, 1.0 - paddings)
        batch_size = tf.shape(ids)[1]
        xent_output, _ = self.lm.FPropDefaultTheta(
            inputs=ids,
            paddings=paddings,
            state0=self.lm.zero_state(self.theta.lm, batch_size),
            labels=py_utils.NestedMap(class_ids=labels, class_weights=weights))

        per_example_xent = py_utils.HasShape(xent_output.per_example_xent,
                                             tf.shape(ids))
        log_pplx_per_sample = tf.reduce_sum(per_example_xent * (1 - paddings),
                                            axis=0)
        fetches = {
            'log_pplx_per_token':  # [batch, time]
            tf.transpose(per_example_xent),
            'paddings':  # [batch, time]
            tf.transpose(paddings),
            'lengths':  # [batch]
            lengths,
            'log_pplx_per_sample':  # [batch]
            log_pplx_per_sample,
            'num_oovs_per_sample':  # [batch], int32
            num_oovs_per_sample,
            'tokens_from_labels':  # [batch], string
            tokens_from_labels,
            'ids':  # [batch, time], int32
            ids
        }
        feeds = {'text': text}
        return fetches, feeds
示例#18
0
 def ReadData():
     x, y = io_ops.restore_v2(p.ckpt, [p.data, p.label], [''] * 2,
                              [p.data_dtype, p.label_dtype])
     # Always convert to float32.
     return tf.to_float(x), tf.to_float(y)