Exemplo n.º 1
0
    def test_fails_with_nested_input(self):
        def fn(input_tensor):
            return input_tensor

        input_tensor1 = tf.constant([1])
        input_tensor2 = tf.constant([2])
        with self.assertRaisesRegexp(
                ValueError, '`elems` must be a Tensor or list of Tensors.'):
            shape_utils.static_or_dynamic_map_fn(
                fn, [input_tensor1, [input_tensor2]], dtype=tf.float32)
Exemplo n.º 2
0
def normalized_to_image_coordinates(normalized_boxes, image_shape,
                                    parallel_iterations=32):
  """Converts a batch of boxes from normal to image coordinates.

  Args:
    normalized_boxes: a tensor of shape [None, num_boxes, 4] in
      normalized coordinates. The dtype of this tensor must support tf.mul.
    image_shape: a tensor of shape [4] containing the image shape, with same
      dtype as `normalized_boxes`.
    parallel_iterations: parallelism for the map_fn op.

  Returns:
    absolute_boxes: a tensor of shape [None, num_boxes, 4] containing
      the boxes in image coordinates, with same
      dtype as `normalized_boxes`.
  """
  x_scale = tf.cast(image_shape[2], normalized_boxes.dtype)
  y_scale = tf.cast(image_shape[1], normalized_boxes.dtype)
  def _to_absolute_coordinates(normalized_boxes):
    y_min, x_min, y_max, x_max = tf.split(
        value=normalized_boxes, num_or_size_splits=4, axis=1)
    y_min = y_scale * y_min
    y_max = y_scale * y_max
    x_min = x_scale * x_min
    x_max = x_scale * x_max
    scaled_boxes = tf.concat([y_min, x_min, y_max, x_max], 1)
    return scaled_boxes

  absolute_boxes = shape_utils.static_or_dynamic_map_fn(
      _to_absolute_coordinates,
      elems=(normalized_boxes),
      dtype=normalized_boxes.dtype,
      parallel_iterations=parallel_iterations,
      back_prop=True)
  return absolute_boxes
Exemplo n.º 3
0
    def test_with_multiple_dynamic_shapes(self):
        def fn(elems):
            input_tensor, scalar_index_tensor = elems
            return tf.reshape(tf.slice(input_tensor, scalar_index_tensor, [1]),
                              [])

        input_tensor = tf.placeholder(tf.float32, shape=(None, 3))
        scalar_index_tensor = tf.placeholder(tf.int32, shape=(None, 1))
        map_fn_output = shape_utils.static_or_dynamic_map_fn(
            fn, [input_tensor, scalar_index_tensor], dtype=tf.float32)

        op_names = [op.name for op in tf.get_default_graph().get_operations()]
        self.assertTrue(any(['map' == op_name[:3] for op_name in op_names]))

        with self.test_session() as sess:
            result1 = sess.run(map_fn_output,
                               feed_dict={
                                   input_tensor: [[1, 2, 3], [4, 5, -1],
                                                  [0, 6, 9]],
                                   scalar_index_tensor: [[0], [2], [1]],
                               })
            result2 = sess.run(map_fn_output,
                               feed_dict={
                                   input_tensor: [[-1, 1, 0], [3, 9, 30]],
                                   scalar_index_tensor: [[1], [0]]
                               })
            self.assertAllEqual(result1, [1, -1, 6])
            self.assertAllEqual(result2, [1, 3])
Exemplo n.º 4
0
    def test_with_static_shape(self):
        def fn(input_tensor):
            return tf.reduce_sum(input_tensor)

        input_tensor = tf.constant([[1, 2], [3, 1], [0, 4]], dtype=tf.float32)
        map_fn_output = shape_utils.static_or_dynamic_map_fn(fn, input_tensor)

        op_names = [op.name for op in tf.get_default_graph().get_operations()]
        self.assertTrue(all(['map' != op_name[:3] for op_name in op_names]))

        with self.test_session() as sess:
            result = sess.run(map_fn_output)
            self.assertAllEqual(result, [3, 4, 4])
Exemplo n.º 5
0
def batch_position_sensitive_crop_regions(images,
                                          boxes,
                                          crop_size,
                                          num_spatial_bins,
                                          global_pool,
                                          parallel_iterations=64):
  """Position sensitive crop with batches of images and boxes.

  This op is exactly like `position_sensitive_crop_regions` below but operates
  on batches of images and boxes. See `position_sensitive_crop_regions` function
  below for the operation applied per batch element.

  Args:
    images: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
      `int16`, `int32`, `int64`, `half`, `float32`, `float64`.
      A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
      Both `image_height` and `image_width` need to be positive.
    boxes: A `Tensor` of type `float32`.
      A 3-D tensor of shape `[batch, num_boxes, 4]`. Each box is specified in
      normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value
      of `y` is mapped to the image coordinate at `y * (image_height - 1)`, so
      as the `[0, 1]` interval of normalized image height is mapped to
      `[0, image_height - 1] in image height coordinates. We do allow y1 > y2,
      in which case the sampled crop is an up-down flipped version of the
      original image. The width dimension is treated similarly.
    crop_size: See `position_sensitive_crop_regions` below.
    num_spatial_bins: See `position_sensitive_crop_regions` below.
    global_pool: See `position_sensitive_crop_regions` below.
    parallel_iterations: Number of batch items to process in parallel.

  Returns:
  """
  def _position_sensitive_crop_fn(inputs):
    images, boxes = inputs
    return position_sensitive_crop_regions(
        images,
        boxes,
        crop_size=crop_size,
        num_spatial_bins=num_spatial_bins,
        global_pool=global_pool)

  return shape_utils.static_or_dynamic_map_fn(
      _position_sensitive_crop_fn,
      elems=[images, boxes],
      dtype=tf.float32,
      parallel_iterations=parallel_iterations)
Exemplo n.º 6
0
    def test_with_multiple_static_shapes(self):
        def fn(elems):
            input_tensor, scalar_index_tensor = elems
            return tf.reshape(tf.slice(input_tensor, scalar_index_tensor, [1]),
                              [])

        input_tensor = tf.constant([[1, 2, 3], [4, 5, -1], [0, 6, 9]],
                                   dtype=tf.float32)
        scalar_index_tensor = tf.constant([[0], [2], [1]], dtype=tf.int32)
        map_fn_output = shape_utils.static_or_dynamic_map_fn(
            fn, [input_tensor, scalar_index_tensor], dtype=tf.float32)

        op_names = [op.name for op in tf.get_default_graph().get_operations()]
        self.assertTrue(all(['map' != op_name[:3] for op_name in op_names]))

        with self.test_session() as sess:
            result = sess.run(map_fn_output)
            self.assertAllEqual(result, [1, -1, 6])
Exemplo n.º 7
0
    def test_with_dynamic_shape(self):
        def fn(input_tensor):
            return tf.reduce_sum(input_tensor)

        input_tensor = tf.placeholder(tf.float32, shape=(None, 2))
        map_fn_output = shape_utils.static_or_dynamic_map_fn(fn, input_tensor)

        op_names = [op.name for op in tf.get_default_graph().get_operations()]
        self.assertTrue(any(['map' == op_name[:3] for op_name in op_names]))

        with self.test_session() as sess:
            result1 = sess.run(
                map_fn_output,
                feed_dict={input_tensor: [[1, 2], [3, 1], [0, 4]]})
            result2 = sess.run(map_fn_output,
                               feed_dict={input_tensor: [[-1, 1], [0, 9]]})
            self.assertAllEqual(result1, [3, 4, 4])
            self.assertAllEqual(result2, [0, 9])
Exemplo n.º 8
0
def _tf_example_input_placeholder():
    """Returns input that accepts a batch of strings with tf examples.

  Returns:
    a tuple of input placeholder and the output decoded images.
  """
    batch_tf_example_placeholder = tf.placeholder(tf.string,
                                                  shape=[None],
                                                  name='tf_example')

    def decode(tf_example_string_tensor):
        tensor_dict = tf_example_decoder.TfExampleDecoder().decode(
            tf_example_string_tensor)
        image_tensor = tensor_dict[fields.InputDataFields.image]
        return image_tensor

    return (batch_tf_example_placeholder,
            shape_utils.static_or_dynamic_map_fn(
                decode,
                elems=batch_tf_example_placeholder,
                dtype=tf.uint8,
                parallel_iterations=32,
                back_prop=False))
Exemplo n.º 9
0
def result_dict_for_batched_example(images,
                                    keys,
                                    detections,
                                    groundtruth=None,
                                    class_agnostic=False,
                                    scale_to_absolute=False,
                                    original_image_spatial_shapes=None,
                                    true_image_shapes=None,
                                    max_gt_boxes=None):
    """Merges all detection and groundtruth information for a single example.

  Note that evaluation tools require classes that are 1-indexed, and so this
  function performs the offset. If `class_agnostic` is True, all output classes
  have label 1.

  Args:
    images: A single 4D uint8 image tensor of shape [batch_size, H, W, C].
    keys: A [batch_size] string tensor with image identifier.
    detections: A dictionary of detections, returned from
      DetectionModel.postprocess().
    groundtruth: (Optional) Dictionary of groundtruth items, with fields:
      'groundtruth_boxes': [batch_size, max_number_of_boxes, 4] float32 tensor
        of boxes, in normalized coordinates.
      'groundtruth_classes':  [batch_size, max_number_of_boxes] int64 tensor of
        1-indexed classes.
      'groundtruth_area': [batch_size, max_number_of_boxes] float32 tensor of
        bbox area. (Optional)
      'groundtruth_is_crowd':[batch_size, max_number_of_boxes] int64
        tensor. (Optional)
      'groundtruth_difficult': [batch_size, max_number_of_boxes] int64
        tensor. (Optional)
      'groundtruth_group_of': [batch_size, max_number_of_boxes] int64
        tensor. (Optional)
      'groundtruth_instance_masks': 4D int64 tensor of instance
        masks (Optional).
    class_agnostic: Boolean indicating whether the detections are class-agnostic
      (i.e. binary). Default False.
    scale_to_absolute: Boolean indicating whether boxes and keypoints should be
      scaled to absolute coordinates. Note that for IoU based evaluations, it
      does not matter whether boxes are expressed in absolute or relative
      coordinates. Default False.
    original_image_spatial_shapes: A 2D int32 tensor of shape [batch_size, 2]
      used to resize the image. When set to None, the image size is retained.
    true_image_shapes: A 2D int32 tensor of shape [batch_size, 3]
      containing the size of the unpadded original_image.
    max_gt_boxes: [batch_size] tensor representing the maximum number of
      groundtruth boxes to pad.

  Returns:
    A dictionary with:
    'original_image': A [batch_size, H, W, C] uint8 image tensor.
    'original_image_spatial_shape': A [batch_size, 2] tensor containing the
      original image sizes.
    'true_image_shape': A [batch_size, 3] tensor containing the size of
      the unpadded original_image.
    'key': A [batch_size] string tensor with image identifier.
    'detection_boxes': [batch_size, max_detections, 4] float32 tensor of boxes,
      in normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`.
    'detection_scores': [batch_size, max_detections] float32 tensor of scores.
    'detection_classes': [batch_size, max_detections] int64 tensor of 1-indexed
      classes.
    'detection_masks': [batch_size, max_detections, H, W] float32 tensor of
      binarized masks, reframed to full image masks.
    'num_detections': [batch_size] int64 tensor containing number of valid
      detections.
    'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`. (Optional)
    'groundtruth_classes': [batch_size, num_boxes] int64 tensor of 1-indexed
      classes. (Optional)
    'groundtruth_area': [batch_size, num_boxes] float32 tensor of bbox
      area. (Optional)
    'groundtruth_is_crowd': [batch_size, num_boxes] int64 tensor. (Optional)
    'groundtruth_difficult': [batch_size, num_boxes] int64 tensor. (Optional)
    'groundtruth_group_of': [batch_size, num_boxes] int64 tensor. (Optional)
    'groundtruth_instance_masks': 4D int64 tensor of instance masks
      (Optional).
    'num_groundtruth_boxes': [batch_size] tensor containing the maximum number
      of groundtruth boxes per image.

  Raises:
    ValueError: if original_image_spatial_shape is not 2D int32 tensor of shape
      [2].
    ValueError: if true_image_shapes is not 2D int32 tensor of shape
      [3].
  """
    label_id_offset = 1  # Applying label id offset (b/63711816)

    input_data_fields = fields.InputDataFields
    if original_image_spatial_shapes is None:
        original_image_spatial_shapes = tf.tile(
            tf.expand_dims(tf.shape(images)[1:3], axis=0),
            multiples=[tf.shape(images)[0], 1])
    else:
        if (len(original_image_spatial_shapes.shape) != 2
                and original_image_spatial_shapes.shape[1] != 2):
            raise ValueError(
                '`original_image_spatial_shape` should be a 2D tensor of shape '
                '[batch_size, 2].')

    if true_image_shapes is None:
        true_image_shapes = tf.tile(tf.expand_dims(tf.shape(images)[1:4],
                                                   axis=0),
                                    multiples=[tf.shape(images)[0], 1])
    else:
        if (len(true_image_shapes.shape) != 2
                and true_image_shapes.shape[1] != 3):
            raise ValueError('`true_image_shapes` should be a 2D tensor of '
                             'shape [batch_size, 3].')

    output_dict = {
        input_data_fields.original_image:
        images,
        input_data_fields.key:
        keys,
        input_data_fields.original_image_spatial_shape:
        (original_image_spatial_shapes),
        input_data_fields.true_image_shape:
        true_image_shapes
    }

    detection_fields = fields.DetectionResultFields
    detection_boxes = detections[detection_fields.detection_boxes]
    detection_scores = detections[detection_fields.detection_scores]
    num_detections = tf.cast(detections[detection_fields.num_detections],
                             dtype=tf.int32)

    if class_agnostic:
        detection_classes = tf.ones_like(detection_scores, dtype=tf.int64)
    else:
        detection_classes = (
            tf.to_int64(detections[detection_fields.detection_classes]) +
            label_id_offset)

    if scale_to_absolute:
        output_dict[detection_fields.detection_boxes] = (
            shape_utils.static_or_dynamic_map_fn(
                _scale_box_to_absolute,
                elems=[detection_boxes, original_image_spatial_shapes],
                dtype=tf.float32))
    else:
        output_dict[detection_fields.detection_boxes] = detection_boxes
    output_dict[detection_fields.detection_classes] = detection_classes
    output_dict[detection_fields.detection_scores] = detection_scores
    output_dict[detection_fields.num_detections] = num_detections

    if detection_fields.detection_masks in detections:
        detection_masks = detections[detection_fields.detection_masks]
        # TODO(rathodv): This should be done in model's postprocess
        # function ideally.
        output_dict[detection_fields.detection_masks] = (
            shape_utils.static_or_dynamic_map_fn(
                _resize_detection_masks,
                elems=[
                    detection_boxes, detection_masks,
                    original_image_spatial_shapes
                ],
                dtype=tf.uint8))

    if detection_fields.detection_keypoints in detections:
        detection_keypoints = detections[detection_fields.detection_keypoints]
        output_dict[detection_fields.detection_keypoints] = detection_keypoints
        if scale_to_absolute:
            output_dict[detection_fields.detection_keypoints] = (
                shape_utils.static_or_dynamic_map_fn(
                    _scale_keypoint_to_absolute,
                    elems=[detection_keypoints, original_image_spatial_shapes],
                    dtype=tf.float32))

    if groundtruth:
        if max_gt_boxes is None:
            if input_data_fields.num_groundtruth_boxes in groundtruth:
                max_gt_boxes = groundtruth[
                    input_data_fields.num_groundtruth_boxes]
            else:
                raise ValueError(
                    'max_gt_boxes must be provided when processing batched examples.'
                )

        if input_data_fields.groundtruth_instance_masks in groundtruth:
            masks = groundtruth[input_data_fields.groundtruth_instance_masks]
            groundtruth[input_data_fields.groundtruth_instance_masks] = (
                shape_utils.static_or_dynamic_map_fn(
                    _resize_groundtruth_masks,
                    elems=[masks, original_image_spatial_shapes],
                    dtype=tf.uint8))

        output_dict.update(groundtruth)
        if scale_to_absolute:
            groundtruth_boxes = groundtruth[
                input_data_fields.groundtruth_boxes]
            output_dict[input_data_fields.groundtruth_boxes] = (
                shape_utils.static_or_dynamic_map_fn(
                    _scale_box_to_absolute,
                    elems=[groundtruth_boxes, original_image_spatial_shapes],
                    dtype=tf.float32))

        # For class-agnostic models, groundtruth classes all become 1.
        if class_agnostic:
            groundtruth_classes = groundtruth[
                input_data_fields.groundtruth_classes]
            groundtruth_classes = tf.ones_like(groundtruth_classes,
                                               dtype=tf.int64)
            output_dict[
                input_data_fields.groundtruth_classes] = groundtruth_classes

        output_dict[input_data_fields.num_groundtruth_boxes] = max_gt_boxes

    return output_dict