示例#1
0
    def test_position_sensitive_with_global_pool_false_and_single_bin(self):
        num_spatial_bins = [1, 1]
        image_shape = [2, 3, 3, 4]
        crop_size = [1, 1]

        image = tf.random_uniform(image_shape)
        boxes = tf.random_uniform((6, 4))
        box_ind = tf.constant([0, 0, 0, 1, 1, 1], dtype=tf.int32)

        # Since single_bin is used and crop_size = [1, 1] (i.e., no crop resize),
        # the outputs are the same whatever the global_pool value is.
        ps_crop_and_pool = ops.position_sensitive_crop_regions(
            image,
            boxes,
            box_ind,
            crop_size,
            num_spatial_bins,
            global_pool=True)
        ps_crop = ops.position_sensitive_crop_regions(image,
                                                      boxes,
                                                      box_ind,
                                                      crop_size,
                                                      num_spatial_bins,
                                                      global_pool=False)

        with self.test_session() as sess:
            pooled_output, unpooled_output = sess.run(
                (ps_crop_and_pool, ps_crop))
            self.assertAllClose(pooled_output, unpooled_output)
示例#2
0
    def test_position_sensitive_with_global_pool_false_and_known_boxes(self):
        num_spatial_bins = [2, 2]
        image_shape = [2, 2, 2, 4]
        crop_size = [2, 2]

        image = tf.constant(range(1, 2 * 2 * 4 + 1) * 2,
                            dtype=tf.float32,
                            shape=image_shape)

        # First box contains whole image, and second box contains only first row.
        boxes = tf.constant(np.array([[0., 0., 1., 1.], [0., 0., 0.5, 1.]]),
                            dtype=tf.float32)
        box_ind = tf.constant([0, 1], dtype=tf.int32)

        expected_output = []

        # Expected output, when the box containing whole image.
        expected_output.append(
            np.reshape(np.array([[4, 7], [10, 13]]), (1, 2, 2, 1)))

        # Expected output, when the box containing only first row.
        expected_output.append(
            np.reshape(np.array([[3, 6], [7, 10]]), (1, 2, 2, 1)))
        expected_output = np.concatenate(expected_output, axis=0)

        ps_crop = ops.position_sensitive_crop_regions(image,
                                                      boxes,
                                                      box_ind,
                                                      crop_size,
                                                      num_spatial_bins,
                                                      global_pool=False)

        with self.test_session() as sess:
            output = sess.run(ps_crop)
            self.assertAllEqual(output, expected_output)
示例#3
0
    def test_position_sensitive_with_single_bin(self):
        num_spatial_bins = [1, 1]
        image_shape = [2, 3, 3, 4]
        crop_size = [2, 2]

        image = tf.random_uniform(image_shape)
        boxes = tf.random_uniform((6, 4))
        box_ind = tf.constant([0, 0, 0, 1, 1, 1], dtype=tf.int32)

        # When a single bin is used, position-sensitive crop and pool should be
        # the same as non-position sensitive crop and pool.
        crop = tf.image.crop_and_resize(image, boxes, box_ind, crop_size)
        crop_and_pool = tf.reduce_mean(crop, [1, 2], keep_dims=True)

        ps_crop_and_pool = ops.position_sensitive_crop_regions(
            image,
            boxes,
            box_ind,
            crop_size,
            num_spatial_bins,
            global_pool=True)

        with self.test_session() as sess:
            expected_output, output = sess.run(
                (crop_and_pool, ps_crop_and_pool))
            self.assertAllClose(output, expected_output)
示例#4
0
    def test_position_sensitive_with_equal_channels(self):
        num_spatial_bins = [2, 2]
        image_shape = [1, 3, 3, 4]
        crop_size = [2, 2]

        image = tf.constant(range(1, 3 * 3 + 1),
                            dtype=tf.float32,
                            shape=[1, 3, 3, 1])
        tiled_image = tf.tile(image, [1, 1, 1, image_shape[3]])
        boxes = tf.random_uniform((3, 4))
        box_ind = tf.constant([0, 0, 0], dtype=tf.int32)

        # All channels are equal so position-sensitive crop and resize should
        # work as the usual crop and resize for just one channel.
        crop = tf.image.crop_and_resize(image, boxes, box_ind, crop_size)
        crop_and_pool = tf.reduce_mean(crop, [1, 2], keep_dims=True)

        ps_crop_and_pool = ops.position_sensitive_crop_regions(
            tiled_image,
            boxes,
            box_ind,
            crop_size,
            num_spatial_bins,
            global_pool=True)

        with self.test_session() as sess:
            expected_output, output = sess.run(
                (crop_and_pool, ps_crop_and_pool))
            self.assertAllClose(output, expected_output)
示例#5
0
    def test_position_sensitive(self):
        num_spatial_bins = [3, 2]
        image_shape = [1, 3, 2, 6]

        # First channel is 1's, second channel is 2's, etc.
        image = tf.constant(range(1, 3 * 2 + 1) * 6,
                            dtype=tf.float32,
                            shape=image_shape)
        boxes = tf.random_uniform((2, 4))
        box_ind = tf.constant([0, 0], dtype=tf.int32)

        # The result for both boxes should be [[1, 2], [3, 4], [5, 6]]
        # before averaging.
        expected_output = np.array([3.5, 3.5]).reshape([2, 1, 1, 1])

        for crop_size_mult in range(1, 3):
            crop_size = [3 * crop_size_mult, 2 * crop_size_mult]
            ps_crop_and_pool = ops.position_sensitive_crop_regions(
                image,
                boxes,
                box_ind,
                crop_size,
                num_spatial_bins,
                global_pool=True)

            with self.test_session() as sess:
                output = sess.run(ps_crop_and_pool)
                self.assertAllClose(output, expected_output)
示例#6
0
    def test_raise_value_error_on_num_bins_less_than_one(self):
        num_spatial_bins = [1, -1]
        image_shape = [1, 1, 1, 2]
        crop_size = [2, 2]

        image = tf.constant(1, dtype=tf.float32, shape=image_shape)
        boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
        box_ind = tf.constant([0], dtype=tf.int32)

        with self.assertRaisesRegexp(ValueError,
                                     'num_spatial_bins should be >= 1'):
            ops.position_sensitive_crop_regions(image,
                                                boxes,
                                                box_ind,
                                                crop_size,
                                                num_spatial_bins,
                                                global_pool=True)
示例#7
0
    def test_raise_value_error_on_non_square_block_size(self):
        num_spatial_bins = [3, 2]
        image_shape = [1, 3, 2, 6]
        crop_size = [6, 2]

        image = tf.constant(1, dtype=tf.float32, shape=image_shape)
        boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
        box_ind = tf.constant([0], dtype=tf.int32)

        with self.assertRaisesRegexp(
                ValueError, 'Only support square bin crop size for now.'):
            ops.position_sensitive_crop_regions(image,
                                                boxes,
                                                box_ind,
                                                crop_size,
                                                num_spatial_bins,
                                                global_pool=False)
示例#8
0
    def test_raise_value_error_on_non_divisible_num_channels(self):
        num_spatial_bins = [2, 2]
        image_shape = [1, 1, 1, 5]
        crop_size = [2, 2]

        image = tf.constant(1, dtype=tf.float32, shape=image_shape)
        boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
        box_ind = tf.constant([0], dtype=tf.int32)

        with self.assertRaisesRegexp(
                ValueError,
                'Dimension size must be evenly divisible by 4 but is 5'):
            ops.position_sensitive_crop_regions(image,
                                                boxes,
                                                box_ind,
                                                crop_size,
                                                num_spatial_bins,
                                                global_pool=True)
示例#9
0
    def test_raise_value_error_on_non_divisible_crop_size(self):
        num_spatial_bins = [2, 3]
        image_shape = [1, 1, 1, 6]
        crop_size = [3, 2]

        image = tf.constant(1, dtype=tf.float32, shape=image_shape)
        boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
        box_ind = tf.constant([0], dtype=tf.int32)

        with self.assertRaisesRegexp(
                ValueError,
                'crop_size should be divisible by num_spatial_bins'):
            ops.position_sensitive_crop_regions(image,
                                                boxes,
                                                box_ind,
                                                crop_size,
                                                num_spatial_bins,
                                                global_pool=True)
示例#10
0
    def test_position_sensitive_with_global_pool_false_and_do_global_pool(
            self):
        num_spatial_bins = [3, 2]
        image_shape = [1, 3, 2, 6]
        num_boxes = 2

        # First channel is 1's, second channel is 2's, etc.
        image = tf.constant(range(1, 3 * 2 + 1) * 6,
                            dtype=tf.float32,
                            shape=image_shape)
        boxes = tf.random_uniform((num_boxes, 4))
        box_ind = tf.constant([0, 0], dtype=tf.int32)

        expected_output = []

        # Expected output, when crop_size = [3, 2].
        expected_output.append(
            np.mean(np.expand_dims(np.tile(np.array([[1, 2], [3, 4], [5, 6]]),
                                           (num_boxes, 1, 1)),
                                   axis=-1),
                    axis=(1, 2),
                    keepdims=True))

        # Expected output, when crop_size = [6, 4].
        expected_output.append(
            np.mean(np.expand_dims(np.tile(
                np.array([[1, 1, 2, 2], [1, 1, 2, 2], [3, 3, 4,
                                                       4], [3, 3, 4, 4],
                          [5, 5, 6, 6], [5, 5, 6, 6]]), (num_boxes, 1, 1)),
                                   axis=-1),
                    axis=(1, 2),
                    keepdims=True))

        for crop_size_mult in range(1, 3):
            crop_size = [3 * crop_size_mult, 2 * crop_size_mult]

            # Perform global_pooling after running the function with
            # global_pool=False.
            ps_crop = ops.position_sensitive_crop_regions(image,
                                                          boxes,
                                                          box_ind,
                                                          crop_size,
                                                          num_spatial_bins,
                                                          global_pool=False)
            ps_crop_and_pool = tf.reduce_mean(ps_crop,
                                              reduction_indices=(1, 2),
                                              keep_dims=True)

            with self.test_session() as sess:
                output = sess.run(ps_crop_and_pool)

            self.assertAllEqual(output, expected_output[crop_size_mult - 1])
示例#11
0
  def _predict(self, image_features, num_predictions_per_location,
               proposal_boxes):
    """Computes encoded object locations and corresponding confidences.

    Args:
      image_features: A list of float tensors of shape [batch_size, height_i,
      width_i, channels_i] containing features for a batch of images.
      num_predictions_per_location: A list of integers representing the number
        of box predictions to be made per spatial location for each feature map.
        Currently, this must be set to [1], or an error will be raised.
      proposal_boxes: A float tensor of shape [batch_size, num_proposals,
        box_code_size].

    Returns:
      box_encodings: A float tensor of shape
        [batch_size, num_anchors, num_classes, code_size] representing the
        location of the objects.
      class_predictions_with_background: A float tensor of shape
        [batch_size, num_anchors, num_classes + 1] representing the class
        predictions for the proposals.

    Raises:
      ValueError: if num_predictions_per_location is not 1 or if
        len(image_features) is not 1.
    """
    if (len(num_predictions_per_location) != 1 or
        num_predictions_per_location[0] != 1):
      raise ValueError('Currently RfcnBoxPredictor only supports '
                       'predicting a single box per class per location.')
    if len(image_features) != 1:
      raise ValueError('length of `image_features` must be 1. Found {}'.
                       format(len(image_features)))
    image_feature = image_features[0]
    num_predictions_per_location = num_predictions_per_location[0]
    batch_size = tf.shape(proposal_boxes)[0]
    num_boxes = tf.shape(proposal_boxes)[1]
    def get_box_indices(proposals):
      proposals_shape = proposals.get_shape().as_list()
      if any(dim is None for dim in proposals_shape):
        proposals_shape = tf.shape(proposals)
      ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
      multiplier = tf.expand_dims(
          tf.range(start=0, limit=proposals_shape[0]), 1)
      return tf.reshape(ones_mat * multiplier, [-1])

    net = image_feature
    with slim.arg_scope(self._conv_hyperparams):
      net = slim.conv2d(net, self._depth, [1, 1], scope='reduce_depth')
      # Location predictions.
      location_feature_map_depth = (self._num_spatial_bins[0] *
                                    self._num_spatial_bins[1] *
                                    self.num_classes *
                                    self._box_code_size)
      location_feature_map = slim.conv2d(net, location_feature_map_depth,
                                         [1, 1], activation_fn=None,
                                         scope='refined_locations')
      box_encodings = ops.position_sensitive_crop_regions(
          location_feature_map,
          boxes=tf.reshape(proposal_boxes, [-1, self._box_code_size]),
          box_ind=get_box_indices(proposal_boxes),
          crop_size=self._crop_size,
          num_spatial_bins=self._num_spatial_bins,
          global_pool=True)
      box_encodings = tf.squeeze(box_encodings, squeeze_dims=[1, 2])
      box_encodings = tf.reshape(box_encodings,
                                 [batch_size * num_boxes, 1, self.num_classes,
                                  self._box_code_size])

      # Class predictions.
      total_classes = self.num_classes + 1  # Account for background class.
      class_feature_map_depth = (self._num_spatial_bins[0] *
                                 self._num_spatial_bins[1] *
                                 total_classes)
      class_feature_map = slim.conv2d(net, class_feature_map_depth, [1, 1],
                                      activation_fn=None,
                                      scope='class_predictions')
      class_predictions_with_background = ops.position_sensitive_crop_regions(
          class_feature_map,
          boxes=tf.reshape(proposal_boxes, [-1, self._box_code_size]),
          box_ind=get_box_indices(proposal_boxes),
          crop_size=self._crop_size,
          num_spatial_bins=self._num_spatial_bins,
          global_pool=True)
      class_predictions_with_background = tf.squeeze(
          class_predictions_with_background, squeeze_dims=[1, 2])
      class_predictions_with_background = tf.reshape(
          class_predictions_with_background,
          [batch_size * num_boxes, 1, total_classes])

    return {BOX_ENCODINGS: box_encodings,
            CLASS_PREDICTIONS_WITH_BACKGROUND:
            class_predictions_with_background}