示例#1
0
    def test_fails_with_nested_input(self):
        def fn(input_tensor):
            return input_tensor

        input_tensor1 = tf.constant([1])
        input_tensor2 = tf.constant([2])
        with self.assertRaisesRegexp(
                ValueError, '`elems` must be a Tensor or list of Tensors.'):
            shape_utils.static_or_dynamic_map_fn(
                fn, [input_tensor1, [input_tensor2]], dtype=tf.float32)
示例#2
0
    def test_with_multiple_dynamic_shapes(self):
        def fn(elems):
            i_tensor, scalar_tensor = elems
            return tf.reshape(tf.slice(i_tensor, scalar_tensor, [1]), [])

        input_tensor = tf.placeholder(tf.float32, shape=(None, 3))
        scalar_index_tensor = tf.placeholder(tf.int32, shape=(None, 1))
        map_fn_output = shape_utils.static_or_dynamic_map_fn(
            fn, [input_tensor, scalar_index_tensor], dtype=tf.float32)

        op_names = [op.name for op in tf.get_default_graph().get_operations()]
        self.assertTrue(any(['map' == op_name[:3] for op_name in op_names]))

        with self.test_session() as sess:
            result1 = sess.run(map_fn_output,
                               feed_dict={
                                   input_tensor: [[1, 2, 3], [4, 5, -1],
                                                  [0, 6, 9]],
                                   scalar_index_tensor: [[0], [2], [1]],
                               })
            result2 = sess.run(map_fn_output,
                               feed_dict={
                                   input_tensor: [[-1, 1, 0], [3, 9, 30]],
                                   scalar_index_tensor: [[1], [0]]
                               })
            self.assertAllEqual(result1, [1, -1, 6])
            self.assertAllEqual(result2, [1, 3])
示例#3
0
    def test_with_static_shape(self):
        def fn(i_tensor):
            return tf.reduce_sum(i_tensor)

        input_tensor = tf.constant([[1, 2], [3, 1], [0, 4]], dtype=tf.float32)
        map_fn_output = shape_utils.static_or_dynamic_map_fn(fn, input_tensor)

        op_names = [op.name for op in tf.get_default_graph().get_operations()]
        self.assertTrue(all(['map' != op_name[:3] for op_name in op_names]))

        with self.test_session() as sess:
            result = sess.run(map_fn_output)
            self.assertAllEqual(result, [3, 4, 4])
示例#4
0
    def test_with_multiple_static_shapes(self):
        def fn(elems):
            i_tensor, scalar_tensor = elems
            return tf.reshape(tf.slice(i_tensor, scalar_tensor, [1]), [])

        input_tensor = tf.constant([[1, 2, 3], [4, 5, -1], [0, 6, 9]],
                                   dtype=tf.float32)
        scalar_index_tensor = tf.constant([[0], [2], [1]], dtype=tf.int32)
        map_fn_output = shape_utils.static_or_dynamic_map_fn(
            fn, [input_tensor, scalar_index_tensor], dtype=tf.float32)

        op_names = [op.name for op in tf.get_default_graph().get_operations()]
        self.assertTrue(all(['map' != op_name[:3] for op_name in op_names]))

        with self.test_session() as sess:
            result = sess.run(map_fn_output)
            self.assertAllEqual(result, [1, -1, 6])
示例#5
0
def batch_position_sensitive_crop_regions(images,
                                          boxes,
                                          crop_size,
                                          num_spatial_bins,
                                          global_pool,
                                          parallel_iterations=64):
    """Position sensitive crop with batches of images and boxes.

  This op is exactly like `position_sensitive_crop_regions` below but operates
  on batches of images and boxes. See `position_sensitive_crop_regions` function
  below for the operation applied per batch element.

  Args:
    images: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
      `int16`, `int32`, `int64`, `half`, `float32`, `float64`.
      A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
      Both `image_height` and `image_width` need to be positive.
    boxes: A `Tensor` of type `float32`.
      A 3-D tensor of shape `[batch, num_boxes, 4]`. Each box is specified in
      normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value
      of `y` is mapped to the image coordinate at `y * (image_height - 1)`, so
      as the `[0, 1]` interval of normalized image height is mapped to
      `[0, image_height - 1] in image height coordinates. We do allow y1 > y2,
      in which case the sampled crop is an up-down flipped version of the
      original image. The width dimension is treated similarly.
    crop_size: See `position_sensitive_crop_regions` below.
    num_spatial_bins: See `position_sensitive_crop_regions` below.
    global_pool: See `position_sensitive_crop_regions` below.
    parallel_iterations: Number of batch items to process in parallel.

  Returns:
  """
    def _position_sensitive_crop_fn(inputs):
        images, boxes = inputs
        return position_sensitive_crop_regions(
            images,
            boxes,
            crop_size=crop_size,
            num_spatial_bins=num_spatial_bins,
            global_pool=global_pool)

    return shape_utils.static_or_dynamic_map_fn(
        _position_sensitive_crop_fn,
        elems=[images, boxes],
        dtype=tf.float32,
        parallel_iterations=parallel_iterations)
示例#6
0
    def test_with_dynamic_shape(self):
        def fn(i_tensor):
            return tf.reduce_sum(i_tensor)

        input_tensor = tf.placeholder(tf.float32, shape=(None, 2))
        map_fn_output = shape_utils.static_or_dynamic_map_fn(fn, input_tensor)

        op_names = [op.name for op in tf.get_default_graph().get_operations()]
        self.assertTrue(any(['map' == op_name[:3] for op_name in op_names]))

        with self.test_session() as sess:
            result1 = sess.run(
                map_fn_output,
                feed_dict={input_tensor: [[1, 2], [3, 1], [0, 4]]})
            result2 = sess.run(map_fn_output,
                               feed_dict={input_tensor: [[-1, 1], [0, 9]]})
            self.assertAllEqual(result1, [3, 4, 4])
            self.assertAllEqual(result2, [0, 9])
示例#7
0
    def preprocess(self, inputs):
        """Feature-extractor specific preprocessing.

    SSD meta architecture uses a default clip_window of [0, 0, 1, 1] during
    post-processing. On calling `preprocess` method, clip_window gets updated
    based on `true_image_shapes` returned by `image_resizer_fn`.

    Args:
      inputs: a [batch, height_in, width_in, channels] float tensor representing
        a batch of images with values between 0 and 255.0.

    Returns:
      preprocessed_inputs: a [batch, height_out, width_out, channels] float
        tensor representing a batch of images.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    Raises:
      ValueError: if inputs tensor does not have type tf.float32
    """
        if inputs.dtype is not tf.float32:
            raise ValueError('`preprocess` expects a tf.float32 tensor')
        with tf.name_scope('Preprocessor'):
            # TODO(jonathanhuang): revisit whether to always use batch size as
            # the number of parallel iterations vs allow for dynamic batching.
            outputs = shape_utils.static_or_dynamic_map_fn(
                self._image_resizer_fn,
                elems=inputs,
                dtype=[tf.float32, tf.int32])
            resized_inputs = outputs[0]
            true_image_shapes = outputs[1]

            return (self._feature_extractor.preprocess(resized_inputs),
                    true_image_shapes)
示例#8
0
def normalized_to_image_coordinates(normalized_boxes,
                                    image_shape,
                                    parallel_iterations=32):
    """Converts a batch of boxes from normal to image coordinates.

  Args:
    normalized_boxes: a float32 tensor of shape [None, num_boxes, 4] in
      normalized coordinates.
    image_shape: a float32 tensor of shape [4] containing the image shape.
    parallel_iterations: parallelism for the map_fn op.

  Returns:
    absolute_boxes: a float32 tensor of shape [None, num_boxes, 4] containing
      the boxes in image coordinates.
  """
    x_scale = tf.cast(image_shape[2], tf.float32)
    y_scale = tf.cast(image_shape[1], tf.float32)

    def _to_absolute_coordinates(normalized_boxes):
        y_min, x_min, y_max, x_max = tf.split(value=normalized_boxes,
                                              num_or_size_splits=4,
                                              axis=1)
        y_min = y_scale * y_min
        y_max = y_scale * y_max
        x_min = x_scale * x_min
        x_max = x_scale * x_max
        scaled_boxes = tf.concat([y_min, x_min, y_max, x_max], 1)
        return scaled_boxes

    absolute_boxes = shape_utils.static_or_dynamic_map_fn(
        _to_absolute_coordinates,
        elems=normalized_boxes,
        dtype=tf.float32,
        parallel_iterations=parallel_iterations,
        back_prop=True)
    return absolute_boxes
示例#9
0
def batch_multiclass_non_max_suppression(boxes,
                                         scores,
                                         score_thresh,
                                         iou_thresh,
                                         max_size_per_class,
                                         max_total_size=0,
                                         clip_window=None,
                                         change_coordinate_frame=False,
                                         num_valid_boxes=None,
                                         masks=None,
                                         additional_fields=None,
                                         scope=None,
                                         parallel_iterations=32):
    """Multi-class version of non maximum suppression that operates on a batch.

  This op is similar to `multiclass_non_max_suppression` but operates on a batch
  of boxes and scores. See documentation for `multiclass_non_max_suppression`
  for details.

  Args:
    boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing
      detections. If `q` is 1 then same boxes are used for all classes
        otherwise, if `q` is equal to number of classes, class-specific boxes
        are used.
    scores: A [batch_size, num_anchors, num_classes] float32 tensor containing
      the scores for each of the `num_anchors` detections.
    score_thresh: scalar threshold for score (low scoring boxes are removed).
    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
      with previously selected boxes are removed).
    max_size_per_class: maximum number of retained boxes per class.
    max_total_size: maximum number of boxes retained over all classes. By
      default returns all boxes retained after capping boxes per class.
    clip_window: A float32 tensor of shape [batch_size, 4]  where each entry is
      of the form [y_min, x_min, y_max, x_max] representing the window to clip
      boxes to before performing non-max suppression. This argument can also be
      a tensor of shape [4] in which case, the same clip window is applied to
      all images in the batch. If clip_widow is None, all boxes are used to
      perform non-max suppression.
    change_coordinate_frame: Whether to normalize coordinates after clipping
      relative to clip_window (this can only be set to True if a clip_window
      is provided)
    num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
      [batch_size] representing the number of valid boxes to be considered
      for each image in the batch.  This parameter allows for ignoring zero
      paddings.
    masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
      float32 tensor containing box masks. `q` can be either number of classes
      or 1 depending on whether a separate mask is predicted per class.
    additional_fields: (optional) If not None, a dictionary that maps keys to
      tensors whose dimensions are [batch_size, num_anchors, ...].
    scope: tf scope name.
    parallel_iterations: (optional) number of batch items to process in
      parallel.

  Returns:
    'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor
      containing the non-max suppressed boxes.
    'nmsed_scores': A [batch_size, max_detections] float32 tensor containing
      the scores for the boxes.
    'nmsed_classes': A [batch_size, max_detections] float32 tensor
      containing the class for boxes.
    'nmsed_masks': (optional) a
      [batch_size, max_detections, mask_height, mask_width] float32 tensor
      containing masks for each selected box. This is set to None if input
      `masks` is None.
    'nmsed_additional_fields': (optional) a dictionary of
      [batch_size, max_detections, ...] float32 tensors corresponding to the
      tensors specified in the input `additional_fields`. This is not returned
      if input `additional_fields` is None.
    'num_detections': A [batch_size] int32 tensor indicating the number of
      valid detections per batch item. Only the top num_detections[i] entries in
      nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
      entries are zero paddings.

  Raises:
    ValueError: if `q` in boxes.shape is not 1 or not equal to number of
      classes as inferred from scores.shape.
  """
    q = boxes.shape[2].value
    num_classes = scores.shape[2].value
    if q != 1 and q != num_classes:
        raise ValueError('third dimension of boxes must be either 1 or equal '
                         'to the third dimension of scores')
    if change_coordinate_frame and clip_window is None:
        raise ValueError(
            'if change_coordinate_frame is True, then a clip_window'
            'must be specified.')
    original_masks = masks
    original_additional_fields = additional_fields
    with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
        boxes_shape = boxes.shape
        batch_size = boxes_shape[0].value
        num_anchors = boxes_shape[1].value

        if batch_size is None:
            batch_size = tf.shape(boxes)[0]
        if num_anchors is None:
            num_anchors = tf.shape(boxes)[1]

        # If num valid boxes aren't provided, create one and mark all boxes as
        # valid.
        if num_valid_boxes is None:
            num_valid_boxes = tf.ones([batch_size],
                                      dtype=tf.int32) * num_anchors

        # If masks aren't provided, create dummy masks so we can only have one copy
        # of _single_image_nms_fn and discard the dummy masks after map_fn.
        if masks is None:
            masks_shape = tf.stack([batch_size, num_anchors, 1, 0, 0])
            masks = tf.zeros(masks_shape)

        if clip_window is None:
            clip_window = tf.stack([
                tf.reduce_min(boxes[:, :, :, 0]),
                tf.reduce_min(boxes[:, :, :, 1]),
                tf.reduce_max(boxes[:, :, :, 2]),
                tf.reduce_max(boxes[:, :, :, 3])
            ])
        if clip_window.shape.ndims == 1:
            clip_window = tf.tile(tf.expand_dims(clip_window, 0),
                                  [batch_size, 1])

        if additional_fields is None:
            additional_fields = {}

        def _single_image_nms_fn(args):
            """Runs NMS on a single image and returns padded output.

      Args:
        args: A list of tensors consisting of the following:
          per_image_boxes - A [num_anchors, q, 4] float32 tensor containing
            detections. If `q` is 1 then same boxes are used for all classes
            otherwise, if `q` is equal to number of classes, class-specific
            boxes are used.
          per_image_scores - A [num_anchors, num_classes] float32 tensor
            containing the scores for each of the `num_anchors` detections.
          per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
            tensor containing box masks. `q` can be either number of classes
            or 1 depending on whether a separate mask is predicted per class.
          per_image_clip_window - A 1D float32 tensor of the form
            [ymin, xmin, ymax, xmax] representing the window to clip the boxes
            to.
          per_image_additional_fields - (optional) A variable number of float32
            tensors each with size [num_anchors, ...].
          per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
            shape [batch_size] representing the number of valid boxes to be
            considered for each image in the batch.  This parameter allows for
            ignoring zero paddings.

      Returns:
        'nmsed_boxes': A [max_detections, 4] float32 tensor containing the
          non-max suppressed boxes.
        'nmsed_scores': A [max_detections] float32 tensor containing the scores
          for the boxes.
        'nmsed_classes': A [max_detections] float32 tensor containing the class
          for boxes.
        'nmsed_masks': (optional) a [max_detections, mask_height, mask_width]
          float32 tensor containing masks for each selected box. This is set to
          None if input `masks` is None.
        'nmsed_additional_fields':  (optional) A variable number of float32
          tensors each with size [max_detections, ...] corresponding to the
          input `per_image_additional_fields`.
        'num_detections': A [batch_size] int32 tensor indicating the number of
          valid detections per batch item. Only the top num_detections[i]
          entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The
          rest of the entries are zero paddings.
      """
            per_image_boxes = args[0]
            per_image_scores = args[1]
            per_image_masks = args[2]
            per_image_clip_window = args[3]
            per_image_additional_fields = {
                key: value
                for key, value in zip(additional_fields, args[4:-1])
            }
            per_image_num_valid_boxes = args[-1]
            per_image_boxes = tf.reshape(
                tf.slice(per_image_boxes, 3 * [0],
                         tf.stack([per_image_num_valid_boxes, -1, -1])),
                [-1, q, 4])
            per_image_scores = tf.reshape(
                tf.slice(per_image_scores, [0, 0],
                         tf.stack([per_image_num_valid_boxes, -1])),
                [-1, num_classes])
            per_image_masks = tf.reshape(
                tf.slice(per_image_masks, 4 * [0],
                         tf.stack([per_image_num_valid_boxes, -1, -1, -1])), [
                             -1, q, per_image_masks.shape[2].value,
                             per_image_masks.shape[3].value
                         ])
            if per_image_additional_fields is not None:
                for key, tensor in per_image_additional_fields.items():
                    additional_field_shape = tensor.get_shape()
                    additional_field_dim = len(additional_field_shape)
                    per_image_additional_fields[key] = tf.reshape(
                        tf.slice(
                            per_image_additional_fields[key],
                            additional_field_dim * [0],
                            tf.stack([per_image_num_valid_boxes] +
                                     (additional_field_dim - 1) * [-1])),
                        [-1] +
                        [dim.value for dim in additional_field_shape[1:]])
            nmsed_boxlist = multiclass_non_max_suppression(
                per_image_boxes,
                per_image_scores,
                score_thresh,
                iou_thresh,
                max_size_per_class,
                max_total_size,
                clip_window=per_image_clip_window,
                change_coordinate_frame=change_coordinate_frame,
                masks=per_image_masks,
                additional_fields=per_image_additional_fields)
            padded_boxlist = box_list_ops.pad_or_clip_box_list(
                nmsed_boxlist, max_total_size)
            num_detections = nmsed_boxlist.num_boxes()
            nmsed_boxes = padded_boxlist.get()
            nmsed_scores = padded_boxlist.get_field(
                fields.BoxListFields.scores)
            nmsed_classes = padded_boxlist.get_field(
                fields.BoxListFields.classes)
            nmsed_masks = padded_boxlist.get_field(fields.BoxListFields.masks)
            nmsed_additional_fields = [
                padded_boxlist.get_field(key)
                for key in per_image_additional_fields
            ]
            return ([nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks] +
                    nmsed_additional_fields + [num_detections])

        num_additional_fields = 0
        if additional_fields is not None:
            num_additional_fields = len(additional_fields)
        num_nmsed_outputs = 4 + num_additional_fields

        batch_outputs = shape_utils.static_or_dynamic_map_fn(
            _single_image_nms_fn,
            elems=([boxes, scores, masks, clip_window] +
                   list(additional_fields.values()) + [num_valid_boxes]),
            dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]),
            parallel_iterations=parallel_iterations)

        batch_nmsed_boxes = batch_outputs[0]
        batch_nmsed_scores = batch_outputs[1]
        batch_nmsed_classes = batch_outputs[2]
        batch_nmsed_masks = batch_outputs[3]
        batch_nmsed_additional_fields = {
            key: value
            for key, value in zip(additional_fields, batch_outputs[4:-1])
        }
        batch_num_detections = batch_outputs[-1]

        if original_masks is None:
            batch_nmsed_masks = None

        if original_additional_fields is None:
            batch_nmsed_additional_fields = None

        return (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes,
                batch_nmsed_masks, batch_nmsed_additional_fields,
                batch_num_detections)
示例#10
0
    def loss(self, prediction_dict, true_image_shapes, scope=None):
        """Compute scalar loss tensors with respect to provided groundtruth.

    Calling this function requires that groundtruth tensors have been
    provided via the provide_groundtruth function.

    Args:
      prediction_dict: a dictionary holding prediction tensors with
        1) box_encodings: 3-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        2) class_predictions_with_background: 3-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors. Note that this tensor *includes*
          background class predictions.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.
      scope: Optional scope name.

    Returns:
      a dictionary mapping loss keys (`localization_loss` and
        `classification_loss`) to scalar tensors representing corresponding loss
        values.
    """
        with tf.name_scope(scope, 'Loss', prediction_dict.values()):
            keypoints = None
            if self.groundtruth_has_field(fields.BoxListFields.keypoints):
                keypoints = self.groundtruth_lists(
                    fields.BoxListFields.keypoints)
            weights = None
            if self.groundtruth_has_field(fields.BoxListFields.weights):
                weights = self.groundtruth_lists(fields.BoxListFields.weights)
            (batch_cls_targets, batch_cls_weights, batch_reg_targets,
             batch_reg_weights, match_list) = self._assign_targets(
                 self.groundtruth_lists(fields.BoxListFields.boxes),
                 self.groundtruth_lists(fields.BoxListFields.classes),
                 keypoints, weights)
            if self._add_summaries:
                self._summarize_target_assignment(
                    self.groundtruth_lists(fields.BoxListFields.boxes),
                    match_list)

            if self._random_example_sampler:
                batch_sampled_indicator = tf.to_float(
                    shape_utils.static_or_dynamic_map_fn(
                        self._minibatch_subsample_fn,
                        [batch_cls_targets, batch_cls_weights],
                        dtype=tf.bool,
                        parallel_iterations=self._parallel_iterations,
                        back_prop=True))
                batch_reg_weights = tf.multiply(batch_sampled_indicator,
                                                batch_reg_weights)
                batch_cls_weights = tf.multiply(batch_sampled_indicator,
                                                batch_cls_weights)

            location_losses = self._localization_loss(
                prediction_dict['box_encodings'],
                batch_reg_targets,
                ignore_nan_targets=True,
                weights=batch_reg_weights)
            cls_losses = ops.reduce_sum_trailing_dimensions(
                self._classification_loss(
                    prediction_dict['class_predictions_with_background'],
                    batch_cls_targets,
                    weights=batch_cls_weights),
                ndims=2)

            if self._hard_example_miner:
                (localization_loss,
                 classification_loss) = self._apply_hard_mining(
                     location_losses, cls_losses, prediction_dict, match_list)
                if self._add_summaries:
                    self._hard_example_miner.summarize()
            else:
                if self._add_summaries:
                    class_ids = tf.argmax(batch_cls_targets, axis=2)
                    flattened_class_ids = tf.reshape(class_ids, [-1])
                    flattened_classification_losses = tf.reshape(
                        cls_losses, [-1])
                    self._summarize_anchor_classification_loss(
                        flattened_class_ids, flattened_classification_losses)
                localization_loss = tf.reduce_sum(location_losses)
                classification_loss = tf.reduce_sum(cls_losses)

            # Optionally normalize by number of positive matches
            normalizer = tf.constant(1.0, dtype=tf.float32)
            if self._normalize_loss_by_num_matches:
                normalizer = tf.maximum(
                    tf.to_float(tf.reduce_sum(batch_reg_weights)), 1.0)

            localization_loss_normalizer = normalizer
            if self._normalize_loc_loss_by_codesize:
                localization_loss_normalizer *= self._box_coder.code_size
            localization_loss = tf.multiply((self._localization_loss_weight /
                                             localization_loss_normalizer),
                                            localization_loss,
                                            name='localization_loss')
            classification_loss = tf.multiply(
                (self._classification_loss_weight / normalizer),
                classification_loss,
                name='classification_loss')

            loss_dict = {
                str(localization_loss.op.name): localization_loss,
                str(classification_loss.op.name): classification_loss
            }
        return loss_dict