Exemplo n.º 1
0
def select_random_box(boxlist, default_box=None, seed=None, scope=None):
    """Selects a random bounding box from a `BoxList`.

  Args:
    boxlist: A BoxList.
    default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`,
      this default box will be returned. If None, will use a default box of
      [[-1., -1., -1., -1.]].
    seed: Random seed.
    scope: Name scope.

  Returns:
    bbox: A [1, 4] tensor with a random bounding box.
    valid: A bool tensor indicating whether a valid bounding box is returned
      (True) or whether the default box is returned (False).
  """
    with tf.name_scope(scope, 'SelectRandomBox'):
        bboxes = boxlist.get()
        combined_shape = shape_utils.combined_static_and_dynamic_shape(bboxes)
        number_of_boxes = combined_shape[0]
        default_box = default_box or tf.constant([[-1., -1., -1., -1.]])

        def select_box():
            random_index = tf.random_uniform([],
                                             maxval=number_of_boxes,
                                             dtype=tf.int32,
                                             seed=seed)
            return tf.expand_dims(bboxes[random_index],
                                  axis=0), tf.constant(True)

    return tf.cond(tf.greater_equal(number_of_boxes, 1),
                   true_fn=select_box,
                   false_fn=lambda: (default_box, tf.constant(False)))
    def _match_when_rows_are_empty():
      """Performs matching when the rows of similarity matrix are empty.

      When the rows are empty, all detections are false positives. So we return
      a tensor of -1's to indicate that the columns do not match to any rows.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
      similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
          similarity_matrix)
      return -1 * tf.ones([similarity_matrix_shape[1]], dtype=tf.int32)
    def _match_when_rows_are_non_empty():
      """Performs matching when the rows of similarity matrix are non empty.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
      # Matches for each column
      matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32)

      # Deal with matched and unmatched threshold
      if self._matched_threshold is not None:
        # Get logical indices of ignored and unmatched columns as tf.int64
        matched_vals = tf.reduce_max(similarity_matrix, 0)
        below_unmatched_threshold = tf.greater(self._unmatched_threshold,
                                               matched_vals)
        between_thresholds = tf.logical_and(
            tf.greater_equal(matched_vals, self._unmatched_threshold),
            tf.greater(self._matched_threshold, matched_vals))

        if self._negatives_lower_than_unmatched:
          matches = self._set_values_using_indicator(matches,
                                                     below_unmatched_threshold,
                                                     -1)
          matches = self._set_values_using_indicator(matches,
                                                     between_thresholds,
                                                     -2)
        else:
          matches = self._set_values_using_indicator(matches,
                                                     below_unmatched_threshold,
                                                     -2)
          matches = self._set_values_using_indicator(matches,
                                                     between_thresholds,
                                                     -1)

      if self._force_match_for_each_row:
        similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
            similarity_matrix)
        force_match_column_ids = tf.argmax(similarity_matrix, 1,
                                           output_type=tf.int32)
        force_match_column_indicators = (
            tf.one_hot(
                force_match_column_ids, depth=similarity_matrix_shape[1]) *
            tf.cast(tf.expand_dims(valid_rows, axis=-1), dtype=tf.float32))
        force_match_row_ids = tf.argmax(force_match_column_indicators, 0,
                                        output_type=tf.int32)
        force_match_column_mask = tf.cast(
            tf.reduce_max(force_match_column_indicators, 0), tf.bool)
        final_matches = tf.where(force_match_column_mask,
                                 force_match_row_ids, matches)
        return final_matches
      else:
        return matches
Exemplo n.º 4
0
def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
    unbatched_tensor_dict = {key: tf.unstack(tensor) for key, tensor in tensor_dict.items()}
    if unpad_groundtruth_tensors:
        if (fields.InputDataFields.num_groundtruth_boxes not in
                unbatched_tensor_dict):
            raise ValueError('`num_groundtruth_boxes` not found in tensor_dict. '
                             'Keys available: {}'.format(unbatched_tensor_dict.keys()))
        unbatched_unpadded_tensor_dict = {}
        unpad_keys = {fields.InputDataFields.groundtruth_instance_masks,
                      fields.InputDataFields.groundtruth_classes,
                      fields.InputDataFields.groundtruth_boxes,
                      fields.InputDataFields.groundtruth_keypoints,
                      fields.InputDataFields.groundtruth_keypoint_visibilities,
                      fields.InputDataFields.groundtruth_dp_num_points,
                      fields.InputDataFields.groundtruth_dp_part_ids,
                      fields.InputDataFields.groundtruth_dp_surface_coords,
                      fields.InputDataFields.groundtruth_track_ids,
                      fields.InputDataFields.groundtruth_group_of,
                      fields.InputDataFields.groundtruth_difficult,
                      fields.InputDataFields.groundtruth_is_crowd,
                      fields.InputDataFields.groundtruth_area,
                      fields.InputDataFields.groundtruth_weights}.intersection(set(unbatched_tensor_dict.keys()))

        for key in unpad_keys:
            unpadded_tensor_list = []
            for num_gt, padded_tensor in zip(
                    unbatched_tensor_dict[fields.InputDataFields.num_groundtruth_boxes],
                    unbatched_tensor_dict[key]):
                tensor_shape = shape_utils.combined_static_and_dynamic_shape(
                    padded_tensor)
                slice_begin = tf.zeros([len(tensor_shape)], dtype=tf.int32)
                slice_size = tf.stack(
                    [num_gt] + [-1 if dim is None else dim for dim in tensor_shape[1:]])
                unpadded_tensor = tf.slice(padded_tensor, slice_begin, slice_size)
                unpadded_tensor_list.append(unpadded_tensor)
            unbatched_unpadded_tensor_dict[key] = unpadded_tensor_list

        unbatched_tensor_dict.update(unbatched_unpadded_tensor_dict)

    return unbatched_tensor_dict
Exemplo n.º 5
0
    def _compute_loss(self, prediction_tensor, target_tensor, weights):
        """Compute loss function.

        Args:
          prediction_tensor: A float tensor of shape [batch_size, num_anchors, 4]
            representing the decoded predicted boxes
          target_tensor: A float tensor of shape [batch_size, num_anchors, 4]
            representing the decoded target boxes
          weights: a float tensor of shape [batch_size, num_anchors]

        Returns:
          loss: a float tensor of shape [batch_size, num_anchors] tensor
            representing the value of the loss function.
        """
        batch_size, num_anchors, _ = shape_utils.combined_static_and_dynamic_shape(
            prediction_tensor)
        predicted_boxes = tf.reshape(prediction_tensor, [-1, 4])
        target_boxes = tf.reshape(target_tensor, [-1, 4])

        per_anchor_iou_loss = 1 - ops.giou(predicted_boxes, target_boxes)
        return tf.reshape(tf.reshape(weights, [-1]) * per_anchor_iou_loss,
                          [batch_size, num_anchors])
Exemplo n.º 6
0
    def _create_regression_targets(self, anchors, groundtruth_boxes, match):
        """Returns a regression target for each anchor.

        Args:
          anchors: a BoxList representing N anchors
          groundtruth_boxes: a BoxList representing M groundtruth_boxes
          match: a matcher.Match object

        Returns:
          reg_targets: a float32 tensor with shape [N, box_code_dimension]
        """
        matched_gt_boxes = match.gather_based_on_match(
            groundtruth_boxes.get(),
            unmatched_value=tf.zeros(4),
            ignored_value=tf.zeros(4))
        matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
        if groundtruth_boxes.has_field(fields.BoxListFields.keypoints):
            groundtruth_keypoints = groundtruth_boxes.get_field(
                fields.BoxListFields.keypoints)
            matched_keypoints = match.gather_based_on_match(
                groundtruth_keypoints,
                unmatched_value=tf.zeros(
                    groundtruth_keypoints.get_shape()[1:]),
                ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
            matched_gt_boxlist.add_field(fields.BoxListFields.keypoints,
                                         matched_keypoints)
        matched_reg_targets = self._box_coder.encode(matched_gt_boxlist,
                                                     anchors)
        match_results_shape = shape_utils.combined_static_and_dynamic_shape(
            match.match_results)

        # Zero out the unmatched and ignored regression targets.
        unmatched_ignored_reg_targets = tf.tile(
            self._default_regression_target(), [match_results_shape[0], 1])
        matched_anchors_mask = match.matched_column_indicator()
        reg_targets = tf.where(matched_anchors_mask, matched_reg_targets,
                               unmatched_ignored_reg_targets)
        return reg_targets
def fpn_top_down_feature_maps(image_features,
                              depth,
                              use_depthwise=False,
                              use_explicit_padding=False,
                              use_bounded_activations=False,
                              scope=None,
                              use_native_resize_op=False):
  """Generates `top-down` feature maps for Feature Pyramid Networks.

  See https://arxiv.org/abs/1612.03144 for details.

  Args:
    image_features: list of tuples of (tensor_name, image_feature_tensor).
      Spatial resolutions of succesive tensors must reduce exactly by a factor
      of 2.
    depth: depth of output feature maps.
    use_depthwise: whether to use depthwise separable conv instead of regular
      conv.
    use_explicit_padding: whether to use explicit padding.
    use_bounded_activations: Whether or not to clip activations to range
      [-ACTIVATION_BOUND, ACTIVATION_BOUND]. Bounded activations better lend
      themselves to quantized inference.
    scope: A scope name to wrap this op under.
    use_native_resize_op: If True, uses tf.image.resize_nearest_neighbor op for
      the upsampling process instead of reshape and broadcasting implementation.

  Returns:
    feature_maps: an OrderedDict mapping keys (feature map names) to
      tensors where each tensor has shape [batch, height_i, width_i, depth_i].
  """
  with tf.name_scope(scope, 'top_down'):
    num_levels = len(image_features)
    output_feature_maps_list = []
    output_feature_map_keys = []
    padding = 'VALID' if use_explicit_padding else 'SAME'
    kernel_size = 3
    with slim.arg_scope(
        [slim.conv2d, slim.separable_conv2d], padding=padding, stride=1):
      top_down = slim.conv2d(
          image_features[-1][1],
          depth, [1, 1], activation_fn=None, normalizer_fn=None,
          scope='projection_%d' % num_levels)
      if use_bounded_activations:
        top_down = tf.clip_by_value(top_down, -ACTIVATION_BOUND,
                                    ACTIVATION_BOUND)
      output_feature_maps_list.append(top_down)
      output_feature_map_keys.append(
          'top_down_%s' % image_features[-1][0])

      for level in reversed(list(range(num_levels - 1))):
        if use_native_resize_op:
          with tf.name_scope('nearest_neighbor_upsampling'):
            top_down_shape = shape_utils.combined_static_and_dynamic_shape(
                top_down)
            top_down = tf.image.resize_nearest_neighbor(
                top_down, [top_down_shape[1] * 2, top_down_shape[2] * 2])
        else:
          top_down = ops.nearest_neighbor_upsampling(top_down, scale=2)
        residual = slim.conv2d(
            image_features[level][1], depth, [1, 1],
            activation_fn=None, normalizer_fn=None,
            scope='projection_%d' % (level + 1))
        if use_bounded_activations:
          residual = tf.clip_by_value(residual, -ACTIVATION_BOUND,
                                      ACTIVATION_BOUND)
        if use_explicit_padding:
          # slice top_down to the same shape as residual
          residual_shape = tf.shape(residual)
          top_down = top_down[:, :residual_shape[1], :residual_shape[2], :]
        top_down += residual
        if use_bounded_activations:
          top_down = tf.clip_by_value(top_down, -ACTIVATION_BOUND,
                                      ACTIVATION_BOUND)
        if use_depthwise:
          conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1)
        else:
          conv_op = slim.conv2d
        pre_output = top_down
        if use_explicit_padding:
          pre_output = ops.fixed_padding(pre_output, kernel_size)
        output_feature_maps_list.append(conv_op(
            pre_output,
            depth, [kernel_size, kernel_size],
            scope='smoothing_%d' % (level + 1)))
        output_feature_map_keys.append('top_down_%s' % image_features[level][0])
      return collections.OrderedDict(reversed(
          list(zip(output_feature_map_keys, output_feature_maps_list))))
 def resize_nearest_neighbor(image):
   image_shape = shape_utils.combined_static_and_dynamic_shape(image)
   return tf.image.resize_nearest_neighbor(
       image, [image_shape[1] * 2, image_shape[2] * 2])
Exemplo n.º 9
0
    def assign(self,
               anchors,
               groundtruth_boxes,
               groundtruth_labels=None,
               unmatched_class_label=None,
               groundtruth_weights=None):
        if not isinstance(anchors, box_list.BoxList):
            raise ValueError('anchors must be an BoxList')
        if not isinstance(groundtruth_boxes, box_list.BoxList):
            raise ValueError('groundtruth_boxes must be an BoxList')

        if unmatched_class_label is None:
            unmatched_class_label = tf.constant([0], tf.float32)

        if groundtruth_labels is None:
            groundtruth_labels = tf.ones(
                tf.expand_dims(groundtruth_boxes.num_boxes(), 0))
            groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)

        unmatched_shape_assert = shape_utils.assert_shape_equal(
            shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)
            [1:],
            shape_utils.combined_static_and_dynamic_shape(
                unmatched_class_label))
        labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
            shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)
            [:1],
            shape_utils.combined_static_and_dynamic_shape(
                groundtruth_boxes.get())[:1])

        if groundtruth_weights is None:
            num_gt_boxes = groundtruth_boxes.num_boxes_static()
            if not num_gt_boxes:
                num_gt_boxes = groundtruth_boxes.num_boxes()
            groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)

        # set scores on the gt boxes
        scores = 1 - groundtruth_labels[:, 0]
        groundtruth_boxes.add_field(fields.BoxListFields.scores, scores)

        with tf.control_dependencies(
            [unmatched_shape_assert, labels_and_box_shapes_assert]):
            match_quality_matrix = self._similarity_calc.compare(
                groundtruth_boxes, anchors)
            match = self._matcher.match(match_quality_matrix,
                                        valid_rows=tf.greater(
                                            groundtruth_weights, 0))
            reg_targets = self._create_regression_targets(
                anchors, groundtruth_boxes, match)
            cls_targets = self._create_classification_targets(
                groundtruth_labels, unmatched_class_label, match)
            reg_weights = self._create_regression_weights(
                match, groundtruth_weights)

            cls_weights = self._create_classification_weights(
                match, groundtruth_weights)
            # convert cls_weights from per-anchor to per-class.
            class_label_shape = tf.shape(cls_targets)[1:]
            weights_shape = tf.shape(cls_weights)
            weights_multiple = tf.concat(
                [tf.ones_like(weights_shape), class_label_shape], axis=0)
            for _ in range(len(cls_targets.get_shape()[1:])):
                cls_weights = tf.expand_dims(cls_weights, -1)
            cls_weights = tf.tile(cls_weights, weights_multiple)

        num_anchors = anchors.num_boxes_static()
        if num_anchors is not None:
            reg_targets = self._reset_target_shape(reg_targets, num_anchors)
            cls_targets = self._reset_target_shape(cls_targets, num_anchors)
            reg_weights = self._reset_target_shape(reg_weights, num_anchors)
            cls_weights = self._reset_target_shape(cls_weights, num_anchors)

        return (cls_targets, cls_weights, reg_targets, reg_weights,
                match.match_results)