def test_with_invalid_scores_field(self):
   corners = tf.constant([[0, 0, 1, 1],
                          [0, 0.1, 1, 1.1],
                          [0, -0.1, 1, 0.9],
                          [0, 10, 1, 11],
                          [0, 10.1, 1, 11.1],
                          [0, 100, 1, 101]], tf.float32)
   boxes = box_list.BoxList(corners)
   boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5]))
   iou_thresh = .5
   max_output_size = 3
   with self.assertRaisesWithPredicateMatch(ValueError,
                                            'Dimensions must be equal'):
     box_list_ops.non_max_suppression(boxes, iou_thresh, max_output_size)
Пример #2
0
 def graph_fn():
   corners = tf.constant(10 * [[0, 0, 1, 1]], tf.float32)
   boxes = box_list.BoxList(corners)
   boxes.add_field('scores', tf.constant(10 * [.9]))
   iou_thresh = .5
   max_output_size = 3
   nms = box_list_ops.non_max_suppression(
       boxes, iou_thresh, max_output_size)
   return nms.get()
    def test_select_from_ten_identical_boxes(self):
        corners = tf.constant(10 * [[0, 0, 1, 1]], tf.float32)
        boxes = box_list.BoxList(corners)
        boxes.add_field('scores', tf.constant(10 * [.9]))
        iou_thresh = .5
        max_output_size = 3

        exp_nms = [[0, 0, 1, 1]]
        nms = box_list_ops.non_max_suppression(boxes, iou_thresh,
                                               max_output_size)
        with self.test_session() as sess:
            nms_output = sess.run(nms.get())
            self.assertAllClose(nms_output, exp_nms)
Пример #4
0
  def test_select_from_ten_identical_boxes(self):
    corners = tf.constant(10 * [[0, 0, 1, 1]], tf.float32)
    boxes = box_list.BoxList(corners)
    boxes.add_field('scores', tf.constant(10 * [.9]))
    iou_thresh = .5
    max_output_size = 3

    exp_nms = [[0, 0, 1, 1]]
    nms = box_list_ops.non_max_suppression(
        boxes, iou_thresh, max_output_size)
    with self.test_session() as sess:
      nms_output = sess.run(nms.get())
      self.assertAllClose(nms_output, exp_nms)
Пример #5
0
 def graph_fn():
   corners = tf.constant([[0, 0, 1, 1],
                          [0, 0.1, 1, 1.1],
                          [0, -0.1, 1, 0.9],
                          [0, 10, 1, 11],
                          [0, 10.1, 1, 11.1],
                          [0, 100, 1, 101]], tf.float32)
   boxes = box_list.BoxList(corners)
   boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
   iou_thresh = .5
   max_output_size = 30
   nms = box_list_ops.non_max_suppression(
       boxes, iou_thresh, max_output_size)
   return nms.get()
    def test_select_at_most_thirty_boxes_from_three_clusters(self):
        corners = tf.constant(
            [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], [0, 10, 1, 11],
             [0, 10.1, 1, 11.1], [0, 100, 1, 101]], tf.float32)
        boxes = box_list.BoxList(corners)
        boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
        iou_thresh = .5
        max_output_size = 30

        exp_nms = [[0, 10, 1, 11], [0, 0, 1, 1], [0, 100, 1, 101]]
        nms = box_list_ops.non_max_suppression(boxes, iou_thresh,
                                               max_output_size)
        with self.test_session() as sess:
            nms_output = sess.run(nms.get())
            self.assertAllClose(nms_output, exp_nms)
 def test_with_invalid_scores_field(self):
     corners = tf.constant(
         [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], [0, 10, 1, 11],
          [0, 10.1, 1, 11.1], [0, 100, 1, 101]], tf.float32)
     boxes = box_list.BoxList(corners)
     boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5]))
     iou_thresh = .5
     max_output_size = 3
     nms = box_list_ops.non_max_suppression(boxes, iou_thresh,
                                            max_output_size)
     with self.test_session() as sess:
         with self.assertRaisesWithPredicateMatch(
                 errors.InvalidArgumentError,
                 'scores has incompatible shape'):
             sess.run(nms.get())
Пример #8
0
 def test_with_invalid_scores_field(self):
   corners = tf.constant([[0, 0, 1, 1],
                          [0, 0.1, 1, 1.1],
                          [0, -0.1, 1, 0.9],
                          [0, 10, 1, 11],
                          [0, 10.1, 1, 11.1],
                          [0, 100, 1, 101]], tf.float32)
   boxes = box_list.BoxList(corners)
   boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5]))
   iou_thresh = .5
   max_output_size = 3
   nms = box_list_ops.non_max_suppression(
       boxes, iou_thresh, max_output_size)
   with self.test_session() as sess:
     with self.assertRaisesWithPredicateMatch(
         errors.InvalidArgumentError, 'scores has incompatible shape'):
       sess.run(nms.get())
Пример #9
0
  def test_select_at_most_two_boxes_from_three_clusters(self):
    corners = tf.constant([[0, 0, 1, 1],
                           [0, 0.1, 1, 1.1],
                           [0, -0.1, 1, 0.9],
                           [0, 10, 1, 11],
                           [0, 10.1, 1, 11.1],
                           [0, 100, 1, 101]], tf.float32)
    boxes = box_list.BoxList(corners)
    boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
    iou_thresh = .5
    max_output_size = 2

    exp_nms = [[0, 10, 1, 11],
               [0, 0, 1, 1]]
    nms = box_list_ops.non_max_suppression(
        boxes, iou_thresh, max_output_size)
    with self.test_session() as sess:
      nms_output = sess.run(nms.get())
      self.assertAllClose(nms_output, exp_nms)
Пример #10
0
def ExtractLocalFeatures(image, image_scales, max_feature_num, abs_thres, iou,
                         attention_model_fn, stride_factor):
    """Extract local features for input image.

  Args:
    image: image tensor of type tf.uint8 with shape [h, w, channels].
    image_scales: 1D float tensor which contains float scales used for image
      pyramid construction.
    max_feature_num: int tensor denoting the maximum selected feature points.
    abs_thres: float tensor denoting the score threshold for feature selection.
    iou: float scalar denoting the iou threshold for NMS.
    attention_model_fn: model function. Follows the signature:
      * Args:
        * `images`: Image tensor which is re-scaled.
      * Returns:
        * `attention_prob`: attention map after the non-linearity.
        * `feature_map`: feature map after ResNet convolution.
    stride_factor: integer accounting for striding after block3.

  Returns:
    boxes: [N, 4] float tensor which denotes the selected receptive box. N is
      the number of final feature points which pass through keypoint selection
      and NMS steps.
    features: [N, depth] float tensor.
    feature_scales: [N] float tensor. It is the inverse of the input image
      scales such that larger image scales correspond to larger image regions,
      which is compatible with keypoints detected with other techniques, for
      example Congas.
    scores: [N, 1] float tensor denoting the attention score.

  """
    original_image_shape_float = tf.gather(
        tf.dtypes.cast(tf.shape(image), tf.float32), [0, 1])

    image_tensor = gld.NormalizeImages(image,
                                       pixel_value_offset=128.0,
                                       pixel_value_scale=128.0)
    image_tensor = tf.expand_dims(image_tensor, 0, name='image/expand_dims')

    # Hard code the feature depth and receptive field parameters for now.
    # We need to revisit this once we change the architecture and selected
    # convolutional blocks to use as local features.
    rf, stride, padding = [291.0, 16.0 * stride_factor, 145.0]
    feature_depth = 1024

    def _ProcessSingleScale(scale_index, boxes, features, scales, scores):
        """Resizes the image and run feature extraction and keypoint selection.

       This function will be passed into tf.while_loop() and be called
       repeatedly. The input boxes are collected from the previous iteration
       [0: scale_index -1]. We get the current scale by
       image_scales[scale_index], and run resize image, feature extraction and
       keypoint selection. Then we will get a new set of selected_boxes for
       current scale. In the end, we concat the previous boxes with current
       selected_boxes as the output.
    Args:
      scale_index: A valid index in the image_scales.
      boxes: Box tensor with the shape of [N, 4].
      features: Feature tensor with the shape of [N, depth].
      scales: Scale tensor with the shape of [N].
      scores: Attention score tensor with the shape of [N].

    Returns:
      scale_index: The next scale index for processing.
      boxes: Concatenated box tensor with the shape of [K, 4]. K >= N.
      features: Concatenated feature tensor with the shape of [K, depth].
      scales: Concatenated scale tensor with the shape of [K].
      scores: Concatenated score tensor with the shape of [K].
    """
        scale = tf.gather(image_scales, scale_index)
        new_image_size = tf.dtypes.cast(
            tf.round(original_image_shape_float * scale), tf.int32)
        resized_image = tf.image.resize(image_tensor, new_image_size)

        attention_prob, feature_map = attention_model_fn(resized_image)
        attention_prob = tf.squeeze(attention_prob, axis=[0])
        feature_map = tf.squeeze(feature_map, axis=[0])

        rf_boxes = feature_extractor.CalculateReceptiveBoxes(
            tf.shape(feature_map)[0],
            tf.shape(feature_map)[1], rf, stride, padding)

        # Re-project back to the original image space.
        rf_boxes = tf.divide(rf_boxes, scale)
        attention_prob = tf.reshape(attention_prob, [-1])
        feature_map = tf.reshape(feature_map, [-1, feature_depth])

        # Use attention score to select feature vectors.
        indices = tf.reshape(tf.where(attention_prob >= abs_thres), [-1])
        selected_boxes = tf.gather(rf_boxes, indices)
        selected_features = tf.gather(feature_map, indices)
        selected_scores = tf.gather(attention_prob, indices)
        selected_scales = tf.ones_like(selected_scores, tf.float32) / scale

        # Concat with the previous result from different scales.
        boxes = tf.concat([boxes, selected_boxes], 0)
        features = tf.concat([features, selected_features], 0)
        scales = tf.concat([scales, selected_scales], 0)
        scores = tf.concat([scores, selected_scores], 0)

        return scale_index + 1, boxes, features, scales, scores

    output_boxes = tf.zeros([0, 4], dtype=tf.float32)
    output_features = tf.zeros([0, feature_depth], dtype=tf.float32)
    output_scales = tf.zeros([0], dtype=tf.float32)
    output_scores = tf.zeros([0], dtype=tf.float32)

    # Process the first scale separately, the following scales will reuse the
    # graph variables.
    (_, output_boxes, output_features, output_scales,
     output_scores) = _ProcessSingleScale(0, output_boxes, output_features,
                                          output_scales, output_scores)

    i = tf.constant(1, dtype=tf.int32)
    num_scales = tf.shape(image_scales)[0]
    keep_going = lambda j, b, f, scales, scores: tf.less(j, num_scales)

    (_, output_boxes, output_features, output_scales,
     output_scores) = tf.nest.map_structure(
         tf.stop_gradient,
         tf.while_loop(cond=keep_going,
                       body=_ProcessSingleScale,
                       loop_vars=[
                           i, output_boxes, output_features, output_scales,
                           output_scores
                       ],
                       shape_invariants=[
                           i.get_shape(),
                           tf.TensorShape([None, 4]),
                           tf.TensorShape([None, feature_depth]),
                           tf.TensorShape([None]),
                           tf.TensorShape([None])
                       ]))

    feature_boxes = box_list.BoxList(output_boxes)
    feature_boxes.add_field('features', output_features)
    feature_boxes.add_field('scales', output_scales)
    feature_boxes.add_field('scores', output_scores)

    nms_max_boxes = tf.minimum(max_feature_num, feature_boxes.num_boxes())
    final_boxes = box_list_ops.non_max_suppression(feature_boxes, iou,
                                                   nms_max_boxes)

    return final_boxes.get(), final_boxes.get_field(
        'features'), final_boxes.get_field('scales'), tf.expand_dims(
            final_boxes.get_field('scores'), 1)
Пример #11
0
def ExtractLocalAndGlobalFeatures(image, image_scales, max_feature_num,
                                  abs_thres, global_scales_ind, iou, model_fn,
                                  stride_factor):
    """Extract local+global features for input image.

  Args:
    image: image tensor of type tf.uint8 with shape [h, w, channels].
    image_scales: 1D float tensor which contains float scales used for image
      pyramid construction.
    max_feature_num: int tensor denoting the maximum selected feature points.
    abs_thres: float tensor denoting the score threshold for feature selection.
    global_scales_ind: Global feature extraction happens only for a subset of
      `image_scales`, those with corresponding indices from this tensor.
    iou: float scalar denoting the iou threshold for NMS.
    model_fn: model function. Follows the signature:
      * Args:
        * `images`: Batched image tensor.
      * Returns:
        * `global_descriptors`: Global descriptors for input images.
        * `attention_prob`: Attention map after the non-linearity.
        * `feature_map`: Feature map after ResNet convolution.
    stride_factor: integer accounting for striding after block3.

  Returns:
    boxes: [N, 4] float tensor which denotes the selected receptive boxes. N is
      the number of final feature points which pass through keypoint selection
      and NMS steps.
    local_descriptors: [N, depth] float tensor.
    feature_scales: [N] float tensor. It is the inverse of the input image
      scales such that larger image scales correspond to larger image regions,
      which is compatible with keypoints detected with other techniques, for
      example Congas.
    scores: [N, 1] float tensor denoting the attention score.
    global_descriptors: [S, D] float tensor, with the global descriptors for
      each scale; S is the number of scales, and D the global descriptor
      dimensionality.
  """
    original_image_shape_float = tf.gather(
        tf.dtypes.cast(tf.shape(image), tf.float32), [0, 1])
    image_tensor = gld.NormalizeImages(image,
                                       pixel_value_offset=128.0,
                                       pixel_value_scale=128.0)
    image_tensor = tf.expand_dims(image_tensor, 0, name='image/expand_dims')

    # Hard code the receptive field parameters for now.
    # We need to revisit this once we change the architecture and selected
    # convolutional blocks to use as local features.
    rf, stride, padding = [291.0, 16.0 * stride_factor, 145.0]

    def _ResizeAndExtract(scale_index):
        """Helper function to resize image then extract features.

    Args:
      scale_index: A valid index in image_scales.

    Returns:
      global_descriptor: [1,D] tensor denoting the extracted global descriptor.
      boxes: Box tensor with the shape of [K, 4].
      local_descriptors: Local descriptor tensor with the shape of [K, depth].
      scales: Scale tensor with the shape of [K].
      scores: Score tensor with the shape of [K].
    """
        scale = tf.gather(image_scales, scale_index)
        new_image_size = tf.dtypes.cast(
            tf.round(original_image_shape_float * scale), tf.int32)
        resized_image = tf.image.resize(image_tensor, new_image_size)
        global_descriptor, attention_prob, feature_map = model_fn(
            resized_image)

        attention_prob = tf.squeeze(attention_prob, axis=[0])
        feature_map = tf.squeeze(feature_map, axis=[0])

        # Compute RF boxes and re-project them to the original image space.
        rf_boxes = feature_extractor.CalculateReceptiveBoxes(
            tf.shape(feature_map)[0],
            tf.shape(feature_map)[1], rf, stride, padding)
        rf_boxes = tf.divide(rf_boxes, scale)

        attention_prob = tf.reshape(attention_prob, [-1])
        feature_map = tf.reshape(feature_map, [-1, tf.shape(feature_map)[2]])

        # Use attention score to select local features.
        indices = tf.reshape(tf.where(attention_prob >= abs_thres), [-1])
        boxes = tf.gather(rf_boxes, indices)
        local_descriptors = tf.gather(feature_map, indices)
        scores = tf.gather(attention_prob, indices)
        scales = tf.ones_like(scores, tf.float32) / scale

        return global_descriptor, boxes, local_descriptors, scales, scores

    # TODO(andrearaujo): Currently, a global feature is extracted even for scales
    # which are not using it. The obtained result is correct, however feature
    # extraction is slower than expected. We should try to fix this in the future.

    # Run first scale.
    (output_global_descriptors, output_boxes, output_local_descriptors,
     output_scales, output_scores) = _ResizeAndExtract(0)
    if not tf.reduce_any(tf.equal(global_scales_ind, 0)):
        # If global descriptor is not using the first scale, clear it out.
        output_global_descriptors = tf.zeros(
            [0, tf.shape(output_global_descriptors)[1]])

    # Loop over subsequent scales.
    num_scales = tf.shape(image_scales)[0]
    for scale_index in tf.range(1, num_scales):
        # Allow an undefined number of global feature scales to be extracted.
        tf.autograph.experimental.set_loop_options(
            shape_invariants=[(output_global_descriptors,
                               tf.TensorShape([None, None]))])

        (global_descriptor, boxes, local_descriptors, scales,
         scores) = _ResizeAndExtract(scale_index)
        output_boxes = tf.concat([output_boxes, boxes], 0)
        output_local_descriptors = tf.concat(
            [output_local_descriptors, local_descriptors], 0)
        output_scales = tf.concat([output_scales, scales], 0)
        output_scores = tf.concat([output_scores, scores], 0)
        if tf.reduce_any(tf.equal(global_scales_ind, scale_index)):
            output_global_descriptors = tf.concat(
                [output_global_descriptors, global_descriptor], 0)

    feature_boxes = box_list.BoxList(output_boxes)
    feature_boxes.add_field('local_descriptors', output_local_descriptors)
    feature_boxes.add_field('scales', output_scales)
    feature_boxes.add_field('scores', output_scores)

    nms_max_boxes = tf.minimum(max_feature_num, feature_boxes.num_boxes())
    final_boxes = box_list_ops.non_max_suppression(feature_boxes, iou,
                                                   nms_max_boxes)

    return (final_boxes.get(), final_boxes.get_field('local_descriptors'),
            final_boxes.get_field('scales'),
            tf.expand_dims(final_boxes.get_field('scores'),
                           1), output_global_descriptors)
Пример #12
0
def ExtractKeypointDescriptor(image, layer_name, image_scales, iou,
                              max_feature_num, abs_thres, model_fn):
    """Extract keypoint descriptor for input image.

  Args:
    image: A image tensor with shape [h, w, channels].
    layer_name: The endpoint of feature extraction layer.
    image_scales: A 1D float tensor which contains the scales.
    iou: A float scalar denoting the IOU threshold for NMS.
    max_feature_num: An int tensor denoting the maximum selected feature points.
    abs_thres: A float tensor denoting the score threshold for feature
      selection.
    model_fn: Model function. Follows the signature:

      * Args:
        * `images`: Image tensor which is re-scaled.
        * `normalized_image`: Whether or not the images are normalized.
        * `reuse`: Whether or not the layer and its variables should be reused.

      * Returns:
        * `attention`: Attention score after the non-linearity.
        * `feature_map`: Feature map obtained from the ResNet model.

  Returns:
    boxes: [N, 4] float tensor which denotes the selected receptive box. N is
      the number of final feature points which pass through keypoint selection
      and NMS steps.
    feature_scales: [N] float tensor. It is the inverse of the input image
      scales such that larger image scales correspond to larger image regions,
      which is compatible with scale-space keypoint detection convention.
    features: [N, depth] float tensor with feature descriptors.
    scores: [N, 1] float tensor denoting the attention score.

  Raises:
    ValueError: If the layer_name is unsupported.
  """
    original_image_shape_float = tf.gather(tf.to_float(tf.shape(image)),
                                           [0, 1])
    image_tensor = NormalizePixelValues(image)
    image_tensor = tf.expand_dims(image_tensor, 0, name='image/expand_dims')

    # Feature depth and receptive field parameters for each network version.
    if layer_name == 'resnet_v1_50/block3':
        feature_depth = 1024
        rf, stride, padding = [291.0, 32.0, 145.0]
    elif layer_name == 'resnet_v1_50/block4':
        feature_depth = 2048
        rf, stride, padding = [483.0, 32.0, 241.0]
    else:
        raise ValueError('Unsupported layer_name.')

    def _ProcessSingleScale(scale_index,
                            boxes,
                            features,
                            scales,
                            scores,
                            reuse=True):
        """Resize the image and run feature extraction and keypoint selection.

       This function will be passed into tf.while_loop() and be called
       repeatedly. The input boxes are collected from the previous iteration
       [0: scale_index -1]. We get the current scale by
       image_scales[scale_index], and run image resizing, feature extraction and
       keypoint selection. Then we will get a new set of selected_boxes for
       current scale. In the end, we concat the previous boxes with current
       selected_boxes as the output.

    Args:
      scale_index: A valid index in the image_scales.
      boxes: Box tensor with the shape of [N, 4].
      features: Feature tensor with the shape of [N, depth].
      scales: Scale tensor with the shape of [N].
      scores: Attention score tensor with the shape of [N].
      reuse: Whether or not the layer and its variables should be reused.

    Returns:
      scale_index: The next scale index for processing.
      boxes: Concatenated box tensor with the shape of [K, 4]. K >= N.
      features: Concatenated feature tensor with the shape of [K, depth].
      scales: Concatenated scale tensor with the shape of [K].
      scores: Concatenated attention score tensor with the shape of [K].
    """
        scale = tf.gather(image_scales, scale_index)
        new_image_size = tf.to_int32(
            tf.round(original_image_shape_float * scale))
        resized_image = tf.image.resize_bilinear(image_tensor, new_image_size)

        attention, feature_map = model_fn(resized_image,
                                          normalized_image=True,
                                          reuse=reuse)

        rf_boxes = CalculateReceptiveBoxes(
            tf.shape(feature_map)[1],
            tf.shape(feature_map)[2], rf, stride, padding)
        # Re-project back to the original image space.
        rf_boxes = tf.divide(rf_boxes, scale)
        attention = tf.reshape(attention, [-1])
        feature_map = tf.reshape(feature_map, [-1, feature_depth])

        # Use attention score to select feature vectors.
        indices = tf.reshape(tf.where(attention >= abs_thres), [-1])
        selected_boxes = tf.gather(rf_boxes, indices)
        selected_features = tf.gather(feature_map, indices)
        selected_scores = tf.gather(attention, indices)
        selected_scales = tf.ones_like(selected_scores, tf.float32) / scale

        # Concat with the previous result from different scales.
        boxes = tf.concat([boxes, selected_boxes], 0)
        features = tf.concat([features, selected_features], 0)
        scales = tf.concat([scales, selected_scales], 0)
        scores = tf.concat([scores, selected_scores], 0)

        return scale_index + 1, boxes, features, scales, scores

    output_boxes = tf.zeros([0, 4], dtype=tf.float32)
    output_features = tf.zeros([0, feature_depth], dtype=tf.float32)
    output_scales = tf.zeros([0], dtype=tf.float32)
    output_scores = tf.zeros([0], dtype=tf.float32)

    # Process the first scale separately, the following scales will reuse the
    # graph variables.
    (_, output_boxes, output_features, output_scales,
     output_scores) = _ProcessSingleScale(0,
                                          output_boxes,
                                          output_features,
                                          output_scales,
                                          output_scores,
                                          reuse=False)
    i = tf.constant(1, dtype=tf.int32)
    num_scales = tf.shape(image_scales)[0]
    keep_going = lambda j, boxes, features, scales, scores: tf.less(
        j, num_scales)

    (_, output_boxes, output_features, output_scales,
     output_scores) = tf.while_loop(cond=keep_going,
                                    body=_ProcessSingleScale,
                                    loop_vars=[
                                        i, output_boxes, output_features,
                                        output_scales, output_scores
                                    ],
                                    shape_invariants=[
                                        i.get_shape(),
                                        tf.TensorShape([None, 4]),
                                        tf.TensorShape([None, feature_depth]),
                                        tf.TensorShape([None]),
                                        tf.TensorShape([None])
                                    ],
                                    back_prop=False)

    feature_boxes = box_list.BoxList(output_boxes)
    feature_boxes.add_field('features', output_features)
    feature_boxes.add_field('scales', output_scales)
    feature_boxes.add_field('scores', output_scores)

    nms_max_boxes = tf.minimum(max_feature_num, feature_boxes.num_boxes())
    final_boxes = box_list_ops.non_max_suppression(feature_boxes, iou,
                                                   nms_max_boxes)

    return (final_boxes.get(), final_boxes.get_field('scales'),
            final_boxes.get_field('features'),
            tf.expand_dims(final_boxes.get_field('scores'), 1))
Пример #13
0
def ExtractKeypointDescriptor(image, layer_name, image_scales, iou,
                              max_feature_num, abs_thres, model_fn):
  """Extract keypoint descriptor for input image.

  Args:
    image: A image tensor with shape [h, w, channels].
    layer_name: The endpoint of feature extraction layer.
    image_scales: A 1D float tensor which contains the scales.
    iou: A float scalar denoting the IOU threshold for NMS.
    max_feature_num: An int tensor denoting the maximum selected feature points.
    abs_thres: A float tensor denoting the score threshold for feature
      selection.
    model_fn: Model function. Follows the signature:

      * Args:
        * `images`: Image tensor which is re-scaled.
        * `normalized_image`: Whether or not the images are normalized.
        * `reuse`: Whether or not the layer and its variables should be reused.

      * Returns:
        * `attention`: Attention score after the non-linearity.
        * `feature_map`: Feature map obtained from the ResNet model.

  Returns:
    boxes: [N, 4] float tensor which denotes the selected receptive box. N is
      the number of final feature points which pass through keypoint selection
      and NMS steps.
    feature_scales: [N] float tensor. It is the inverse of the input image
      scales such that larger image scales correspond to larger image regions,
      which is compatible with scale-space keypoint detection convention.
    features: [N, depth] float tensor with feature descriptors.
    scores: [N, 1] float tensor denoting the attention score.

  Raises:
    ValueError: If the layer_name is unsupported.
  """
  original_image_shape_float = tf.gather(tf.to_float(tf.shape(image)), [0, 1])
  image_tensor = NormalizePixelValues(image)
  image_tensor = tf.expand_dims(image_tensor, 0, name='image/expand_dims')

  # Feature depth and receptive field parameters for each network version.
  if layer_name == 'resnet_v1_50/block3':
    feature_depth = 1024
    rf, stride, padding = [291.0, 32.0, 145.0]
  elif layer_name == 'resnet_v1_50/block4':
    feature_depth = 2048
    rf, stride, padding = [483.0, 32.0, 241.0]
  else:
    raise ValueError('Unsupported layer_name.')

  def _ProcessSingleScale(scale_index,
                          boxes,
                          features,
                          scales,
                          scores,
                          reuse=True):
    """Resize the image and run feature extraction and keypoint selection.

       This function will be passed into tf.while_loop() and be called
       repeatedly. The input boxes are collected from the previous iteration
       [0: scale_index -1]. We get the current scale by
       image_scales[scale_index], and run image resizing, feature extraction and
       keypoint selection. Then we will get a new set of selected_boxes for
       current scale. In the end, we concat the previous boxes with current
       selected_boxes as the output.

    Args:
      scale_index: A valid index in the image_scales.
      boxes: Box tensor with the shape of [N, 4].
      features: Feature tensor with the shape of [N, depth].
      scales: Scale tensor with the shape of [N].
      scores: Attention score tensor with the shape of [N].
      reuse: Whether or not the layer and its variables should be reused.

    Returns:
      scale_index: The next scale index for processing.
      boxes: Concatenated box tensor with the shape of [K, 4]. K >= N.
      features: Concatenated feature tensor with the shape of [K, depth].
      scales: Concatenated scale tensor with the shape of [K].
      scores: Concatenated attention score tensor with the shape of [K].
    """
    scale = tf.gather(image_scales, scale_index)
    new_image_size = tf.to_int32(tf.round(original_image_shape_float * scale))
    resized_image = tf.image.resize_bilinear(image_tensor, new_image_size)

    attention, feature_map = model_fn(
        resized_image, normalized_image=True, reuse=reuse)

    rf_boxes = CalculateReceptiveBoxes(
        tf.shape(feature_map)[1], tf.shape(feature_map)[2], rf, stride, padding)
    # Re-project back to the original image space.
    rf_boxes = tf.divide(rf_boxes, scale)
    attention = tf.reshape(attention, [-1])
    feature_map = tf.reshape(feature_map, [-1, feature_depth])

    # Use attention score to select feature vectors.
    indices = tf.reshape(tf.where(attention >= abs_thres), [-1])
    selected_boxes = tf.gather(rf_boxes, indices)
    selected_features = tf.gather(feature_map, indices)
    selected_scores = tf.gather(attention, indices)
    selected_scales = tf.ones_like(selected_scores, tf.float32) / scale

    # Concat with the previous result from different scales.
    boxes = tf.concat([boxes, selected_boxes], 0)
    features = tf.concat([features, selected_features], 0)
    scales = tf.concat([scales, selected_scales], 0)
    scores = tf.concat([scores, selected_scores], 0)

    return scale_index + 1, boxes, features, scales, scores

  output_boxes = tf.zeros([0, 4], dtype=tf.float32)
  output_features = tf.zeros([0, feature_depth], dtype=tf.float32)
  output_scales = tf.zeros([0], dtype=tf.float32)
  output_scores = tf.zeros([0], dtype=tf.float32)

  # Process the first scale separately, the following scales will reuse the
  # graph variables.
  (_, output_boxes, output_features, output_scales,
   output_scores) = _ProcessSingleScale(
       0,
       output_boxes,
       output_features,
       output_scales,
       output_scores,
       reuse=False)
  i = tf.constant(1, dtype=tf.int32)
  num_scales = tf.shape(image_scales)[0]
  keep_going = lambda j, boxes, features, scales, scores: tf.less(j, num_scales)

  (_, output_boxes, output_features, output_scales,
   output_scores) = tf.while_loop(
       cond=keep_going,
       body=_ProcessSingleScale,
       loop_vars=[
           i, output_boxes, output_features, output_scales, output_scores
       ],
       shape_invariants=[
           i.get_shape(),
           tf.TensorShape([None, 4]),
           tf.TensorShape([None, feature_depth]),
           tf.TensorShape([None]),
           tf.TensorShape([None])
       ],
       back_prop=False)

  feature_boxes = box_list.BoxList(output_boxes)
  feature_boxes.add_field('features', output_features)
  feature_boxes.add_field('scales', output_scales)
  feature_boxes.add_field('scores', output_scores)

  nms_max_boxes = tf.minimum(max_feature_num, feature_boxes.num_boxes())
  final_boxes = box_list_ops.non_max_suppression(feature_boxes, iou,
                                                 nms_max_boxes)

  return (final_boxes.get(), final_boxes.get_field('scales'),
          final_boxes.get_field('features'), tf.expand_dims(
              final_boxes.get_field('scores'), 1))