Пример #1
0
    def _decode(self, rel_codes, anchors):
        """Decodes relative codes to boxes.

        Args:
          rel_codes: a tensor representing N anchor-encoded boxes.
          anchors: BoxList of anchors.

        Returns:
          boxes: BoxList holding N bounding boxes.
        """
        ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes(
        )
        la = tf.sqrt(ha * wa)

        ty, tx, tl = tf.unstack(tf.transpose(rel_codes))
        if self._scale_factors:
            ty /= self._scale_factors[0]
            tx /= self._scale_factors[1]
            tl /= self._scale_factors[2]
        l = tf.exp(tl) * la
        ycenter = ty * la + ycenter_a
        xcenter = tx * la + xcenter_a
        ymin = ycenter - l / 2.
        xmin = xcenter - l / 2.
        ymax = ycenter + l / 2.
        xmax = xcenter + l / 2.
        return box_list.BoxList(
            tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
Пример #2
0
    def _decode(self, rel_codes, anchors):
        """Decode relative codes to boxes.

        Args:
          rel_codes: a tensor representing N anchor-encoded boxes.
          anchors: BoxList of anchors.

        Returns:
          boxes: BoxList holding N bounding boxes.
        """
        ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes(
        )

        ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
        if self._scale_factors:
            ty /= self._scale_factors[0]
            tx /= self._scale_factors[1]
            th /= self._scale_factors[2]
            tw /= self._scale_factors[3]
        w = tf.exp(tw) * wa
        h = tf.exp(th) * ha
        ycenter = ty * ha + ycenter_a
        xcenter = tx * wa + xcenter_a
        ymin = ycenter - h / 2.
        xmin = xcenter - w / 2.
        ymax = ycenter + h / 2.
        xmax = xcenter + w / 2.
        return box_list.BoxList(
            tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
Пример #3
0
def scale(boxlist, y_scale, x_scale, scope=None):
    """scale box coordinates in x and y dimensions.

    Args:
      boxlist: BoxList holding N boxes
      y_scale: (float) scalar tensor
      x_scale: (float) scalar tensor
      scope: name scope.

    Returns:
      boxlist: BoxList holding N boxes
    """
    with tf.name_scope(scope, 'Scale'):
        y_scale = tf.cast(y_scale, tf.float32)
        x_scale = tf.cast(x_scale, tf.float32)
        y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(),
                                              num_or_size_splits=4,
                                              axis=1)
        y_min = y_scale * y_min
        y_max = y_scale * y_max
        x_min = x_scale * x_min
        x_max = x_scale * x_max
        scaled_boxlist = box_list.BoxList(
            tf.concat([y_min, x_min, y_max, x_max], 1))
        return _copy_extra_fields(scaled_boxlist, boxlist)
Пример #4
0
def change_coordinate_frame(boxlist, window, scope=None):
    """Change coordinate frame of the boxlist to be relative to window's frame.

    Given a window of the form [ymin, xmin, ymax, xmax],
    changes bounding box coordinates from boxlist to be relative to this window
    (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).

    An example use case is data augmentation: where we are given groundtruth
    boxes (boxlist) and would like to randomly crop the image to some
    window (window). In this case we need to change the coordinate frame of
    each groundtruth box to be relative to this new window.

    Args:
      boxlist: A BoxList object holding N boxes.
      window: A rank 1 tensor [4].
      scope: name scope.

    Returns:
      Returns a BoxList object with N boxes.
    """
    with tf.name_scope(scope, 'ChangeCoordinateFrame'):
        win_height = window[2] - window[0]
        win_width = window[3] - window[1]
        boxlist_new = scale(
            box_list.BoxList(boxlist.get() -
                             [window[0], window[1], window[0], window[1]]),
            1.0 / win_height, 1.0 / win_width)
        boxlist_new = _copy_extra_fields(boxlist_new, boxlist)
        return boxlist_new
Пример #5
0
def concatenate(boxlists, fields=None, scope=None):
    """Concatenate list of BoxLists.

    This op concatenates a list of input BoxLists into a larger BoxList.  It also
    handles concatenation of BoxList fields as long as the field tensor shapes
    are equal except for the first dimension.

    Args:
      boxlists: list of BoxList objects
      fields: optional list of fields to also concatenate.  By default, all
        fields from the first BoxList in the list are included in the
        concatenation.
      scope: name scope.

    Returns:
      a BoxList with number of boxes equal to
        sum([boxlist.num_boxes() for boxlist in BoxList])
    Raises:
      ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
        contains non BoxList objects), or if requested fields are not contained in
        all boxlists
    """
    with tf.name_scope(scope, 'Concatenate'):
        if not isinstance(boxlists, list):
            raise ValueError('boxlists should be a list')
        if not boxlists:
            raise ValueError('boxlists should have nonzero length')
        for boxlist in boxlists:
            if not isinstance(boxlist, box_list.BoxList):
                raise ValueError(
                    'all elements of boxlists should be BoxList objects')
        concatenated = box_list.BoxList(
            tf.concat([boxlist.get() for boxlist in boxlists], 0))
        if fields is None:
            fields = boxlists[0].get_extra_fields()
        for field in fields:
            first_field_shape = boxlists[0].get_field(
                field).get_shape().as_list()
            first_field_shape[0] = -1
            if None in first_field_shape:
                raise ValueError(
                    'field %s must have fully defined shape except for the'
                    ' 0th dimension.' % field)
            for boxlist in boxlists:
                if not boxlist.has_field(field):
                    raise ValueError(
                        'boxlist must contain all requested fields')
                field_shape = boxlist.get_field(field).get_shape().as_list()
                field_shape[0] = -1
                if field_shape != first_field_shape:
                    raise ValueError(
                        'field %s must have same shape for all boxlists '
                        'except for the 0th dimension.' % field)
            concatenated_field = tf.concat(
                [boxlist.get_field(field) for boxlist in boxlists], 0)
            concatenated.add_field(field, concatenated_field)
        return concatenated
Пример #6
0
    def _compute_loss(self, prediction_tensor, target_tensor, weights):
        """Compute loss function.

        Args:
          prediction_tensor: A float tensor of shape [batch_size, num_anchors, 4]
            representing the decoded predicted boxes
          target_tensor: A float tensor of shape [batch_size, num_anchors, 4]
            representing the decoded target boxes
          weights: a float tensor of shape [batch_size, num_anchors]

        Returns:
          loss: a (scalar) tensor representing the value of the loss function
        """
        predicted_boxes = box_list.BoxList(
            tf.reshape(prediction_tensor, [-1, 4]))
        target_boxes = box_list.BoxList(tf.reshape(target_tensor, [-1, 4]))
        per_anchor_iou_loss = 1.0 - box_list_ops.matched_iou(
            predicted_boxes, target_boxes)
        return tf.reduce_sum(tf.reshape(weights, [-1]) * per_anchor_iou_loss)
Пример #7
0
def tile_anchors(grid_height, grid_width, scales, aspect_ratios,
                 base_anchor_size, anchor_stride, anchor_offset):
    """Create a tiled set of anchors strided along a grid in image space.

    This op creates a set of anchor boxes by placing a "basis" collection of
    boxes with user-specified scales and aspect ratios centered at evenly
    distributed points along a grid.  The basis collection is specified via the
    scale and aspect_ratios arguments.  For example, setting scales=[.1, .2, .2]
    and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale
    .1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2
    and aspect ratio 1/2.  Each box is multiplied by "base_anchor_size" before
    placing it over its respective center.

    Grid points are specified via grid_height, grid_width parameters as well as
    the anchor_stride and anchor_offset parameters.

    Args:
      grid_height: size of the grid in the y direction (int or int scalar tensor)
      grid_width: size of the grid in the x direction (int or int scalar tensor)
      scales: a 1-d  (float) tensor representing the scale of each box in the
        basis set.
      aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each
        box in the basis set.  The length of the scales and aspect_ratios tensors
        must be equal.
      base_anchor_size: base anchor size as [height, width]
        (float tensor of shape [2])
      anchor_stride: difference in centers between base anchors for adjacent grid
                     positions (float tensor of shape [2])
      anchor_offset: center of the anchor with scale and aspect ratio 1 for the
                     upper left element of the grid, this should be zero for
                     feature networks with only VALID padding and even receptive
                     field size, but may need some additional calculation if other
                     padding is used (float tensor of shape [2])
    Returns:
      a BoxList holding a collection of N anchor boxes
    """
    ratio_sqrts = tf.sqrt(aspect_ratios)
    heights = scales / ratio_sqrts * base_anchor_size[0]
    widths = scales * ratio_sqrts * base_anchor_size[1]

    # Get a grid of box centers
    y_centers = tf.to_float(tf.range(grid_height))
    y_centers = y_centers * anchor_stride[0] + anchor_offset[0]
    x_centers = tf.to_float(tf.range(grid_width))
    x_centers = x_centers * anchor_stride[1] + anchor_offset[1]
    x_centers, y_centers = ops.meshgrid(x_centers, y_centers)

    widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers)
    heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers)
    bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3)
    bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3)
    bbox_centers = tf.reshape(bbox_centers, [-1, 2])
    bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
    bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes)
    return box_list.BoxList(bbox_corners)
Пример #8
0
    def _apply_hard_mining(self, location_losses, cls_losses, prediction_dict,
                           match_list):
        """Applies hard mining to anchorwise losses.

        Args:
          location_losses: Float tensor of shape [batch_size, num_anchors]
            representing anchorwise location losses.
          cls_losses: Float tensor of shape [batch_size, num_anchors]
            representing anchorwise classification losses.
          prediction_dict: p a dictionary holding prediction tensors with
            1) box_encodings: 4-D float tensor of shape [batch_size, num_anchors,
              box_code_dimension] containing predicted boxes.
            2) class_predictions_with_background: 2-D float tensor of shape
              [batch_size, num_anchors, num_classes+1] containing class predictions
              (logits) for each of the anchors.  Note that this tensor *includes*
              background class predictions.
          match_list: a list of matcher.Match objects encoding the match between
            anchors and groundtruth boxes for each image of the batch,
            with rows of the Match objects corresponding to groundtruth boxes
            and columns corresponding to anchors.

        Returns:
          mined_location_loss: a float scalar with sum of localization losses from
            selected hard examples.
          mined_cls_loss: a float scalar with sum of classification losses from
            selected hard examples.
        """
        class_pred_shape = [
            -1, self.anchors.num_boxes_static(), self.num_classes
        ]
        class_predictions = tf.reshape(
            tf.slice(prediction_dict['class_predictions_with_background'],
                     [0, 0, 1], class_pred_shape), class_pred_shape)

        decoded_boxes = bcoder.batch_decode(prediction_dict['box_encodings'],
                                            self._box_coder, self.anchors)
        decoded_box_tensors_list = tf.unstack(decoded_boxes)
        class_prediction_list = tf.unstack(class_predictions)
        decoded_boxlist_list = []
        for box_location, box_score in zip(decoded_box_tensors_list,
                                           class_prediction_list):
            decoded_boxlist = box_list.BoxList(box_location)
            decoded_boxlist.add_field('scores', box_score)
            decoded_boxlist_list.append(decoded_boxlist)
        return self._hard_example_miner(
            location_losses=location_losses,
            cls_losses=cls_losses,
            decoded_boxlist_list=decoded_boxlist_list,
            match_list=match_list)
Пример #9
0
    def _decode(self, rel_codes, anchors):
        """Decode relative codes to boxes and keypoints.

        Args:
          rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N
            anchor-encoded boxes and keypoints
          anchors: BoxList of anchors.

        Returns:
          boxes: BoxList holding N bounding boxes and keypoints.
        """
        ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes(
        )

        num_codes = tf.shape(rel_codes)[0]
        result = tf.unstack(tf.transpose(rel_codes))
        ty, tx, th, tw = result[:4]
        tkeypoints = result[4:]
        if self._scale_factors:
            ty /= self._scale_factors[0]
            tx /= self._scale_factors[1]
            th /= self._scale_factors[2]
            tw /= self._scale_factors[3]
            tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes])

        w = tf.exp(tw) * wa
        h = tf.exp(th) * ha
        ycenter = ty * ha + ycenter_a
        xcenter = tx * wa + xcenter_a
        ymin = ycenter - h / 2.
        xmin = xcenter - w / 2.
        ymax = ycenter + h / 2.
        xmax = xcenter + w / 2.
        decoded_boxes_keypoints = box_list.BoxList(
            tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))

        tiled_anchor_centers = tf.tile(tf.stack([ycenter_a, xcenter_a]),
                                       [self._num_keypoints, 1])
        tiled_anchor_sizes = tf.tile(tf.stack([ha, wa]),
                                     [self._num_keypoints, 1])
        keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers
        keypoints = tf.reshape(tf.transpose(keypoints),
                               [-1, self._num_keypoints, 2])
        decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints,
                                          keypoints)
        return decoded_boxes_keypoints
Пример #10
0
    def _decode(self, rel_codes, anchors):
        """Decode.

        Args:
          rel_codes: a tensor representing N anchor-encoded boxes.
          anchors: BoxList of anchors.  We assume that anchors has an associated
            stddev field.

        Returns:
          boxes: BoxList holding N bounding boxes
        Raises:
          ValueError: if the anchors BoxList does not have a stddev field
        """
        if not anchors.has_field('stddev'):
            raise ValueError('anchors must have a stddev field')
        means = anchors.get()
        stddevs = anchors.get_field('stddev')
        box_corners = rel_codes * stddevs + means
        return box_list.BoxList(box_corners)
Пример #11
0
def pad_or_clip_box_list(boxlist, num_boxes, scope=None):
    """Pads or clips all fields of a BoxList.

    Args:
      boxlist: A BoxList with arbitrary of number of boxes.
      num_boxes: First num_boxes in boxlist are kept.
        The fields are zero-padded if num_boxes is bigger than the
        actual number of boxes.
      scope: name scope.

    Returns:
      BoxList with all fields padded or clipped.
    """
    with tf.name_scope(scope, 'PadOrClipBoxList'):
        subboxlist = box_list.BoxList(
            shape_util.pad_or_clip_tensor(boxlist.get(), num_boxes))
        for field in boxlist.get_extra_fields():
            subfield = shape_util.pad_or_clip_tensor(boxlist.get_field(field),
                                                     num_boxes)
            subboxlist.add_field(field, subfield)
        return subboxlist
Пример #12
0
    def _assign_targets(self, groundtruth_boxes_list,
                        groundtruth_classes_list):
        """Assign groundtruth targets.

        Adds a background class to each one-hot encoding of groundtruth classes
        and uses target assigner to obtain regression and classification targets.

        Args:
          groundtruth_boxes_list: a list of 2-D tensors of shape [num_boxes, 4]
            containing coordinates of the groundtruth boxes.
              Groundtruth boxes are provided in [y_min, x_min, y_max, x_max]
              format and assumed to be normalized and clipped
              relative to the image window with y_min <= y_max and x_min <= x_max.
          groundtruth_classes_list: a list of 2-D one-hot (or k-hot) tensors of
            shape [num_boxes, num_classes] containing the class targets with the 0th
            index assumed to map to the first non-background class.

        Returns:
          batch_cls_targets: a tensor with shape [batch_size, num_anchors,
            num_classes],
          batch_cls_weights: a tensor with shape [batch_size, num_anchors],
          batch_reg_targets: a tensor with shape [batch_size, num_anchors,
            box_code_dimension]
          batch_reg_weights: a tensor with shape [batch_size, num_anchors],
          match_list: a list of matcher.Match objects encoding the match between
            anchors and groundtruth boxes for each image of the batch,
            with rows of the Match objects corresponding to groundtruth boxes
            and columns corresponding to anchors.
        """
        groundtruth_boxlists = [
            box_list.BoxList(boxes) for boxes in groundtruth_boxes_list
        ]
        groundtruth_classes_with_background_list = [
            tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT')
            for one_hot_encoding in groundtruth_classes_list
        ]
        return target_assigner.batch_assign_targets(
            self._target_assigner, self.anchors, groundtruth_boxlists,
            groundtruth_classes_with_background_list)
Пример #13
0
def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None):
    """Clip bounding boxes to a window.

    This op clips any input bounding boxes (represented by bounding box
    corners) to a window, optionally filtering out boxes that do not
    overlap at all with the window.

    Args:
      boxlist: BoxList holding M_in boxes
      window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
        window to which the op should clip boxes.
      filter_nonoverlapping: whether to filter out boxes that do not overlap at
        all with the window.
      scope: name scope.

    Returns:
      a BoxList holding M_out boxes where M_out <= M_in
    """
    with tf.name_scope(scope, 'ClipToWindow'):
        y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(),
                                              num_or_size_splits=4,
                                              axis=1)
        win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
        y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min)
        y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min)
        x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min)
        x_max_clipped = tf.maximum(tf.minimum(x_max, win_x_max), win_x_min)
        clipped = box_list.BoxList(
            tf.concat(
                [y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped],
                1))
        clipped = _copy_extra_fields(clipped, boxlist)
        if filter_nonoverlapping:
            areas = area(clipped)
            nonzero_area_indices = tf.cast(
                tf.reshape(tf.where(tf.greater(areas, 0.0)), [-1]), tf.int32)
            clipped = gather(clipped, nonzero_area_indices)
        return clipped
Пример #14
0
def boolean_mask(boxlist, indicator, fields=None, scope=None):
    """Select boxes from BoxList according to indicator and return new BoxList.

    `boolean_mask` returns the subset of boxes that are marked as "True" by the
    indicator tensor. By default, `boolean_mask` returns boxes corresponding to
    the input index list, as well as all additional fields stored in the boxlist
    (indexing into the first dimension).  However one can optionally only draw
    from a subset of fields.

    Args:
      boxlist: BoxList holding N boxes
      indicator: a rank-1 boolean tensor
      fields: (optional) list of fields to also gather from.  If None (default),
        all fields are gathered from.  Pass an empty fields list to only gather
        the box coordinates.
      scope: name scope.

    Returns:
      subboxlist: a BoxList corresponding to the subset of the input BoxList
        specified by indicator
    Raises:
      ValueError: if `indicator` is not a rank-1 boolean tensor.
    """
    with tf.name_scope(scope, 'BooleanMask'):
        if indicator.shape.ndims != 1:
            raise ValueError('indicator should have rank 1')
        if indicator.dtype != tf.bool:
            raise ValueError('indicator should be a boolean tensor')
        subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(),
                                                      indicator))
        if fields is None:
            fields = boxlist.get_extra_fields()
        for field in fields:
            if not boxlist.has_field(field):
                raise ValueError('boxlist must contain all specified fields')
            subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator)
            subboxlist.add_field(field, subfieldlist)
        return subboxlist
Пример #15
0
def gather(boxlist, indices, fields=None, scope=None):
    """Gather boxes from BoxList according to indices and return new BoxList.

    By default, `gather` returns boxes corresponding to the input index list, as
    well as all additional fields stored in the boxlist (indexing into the
    first dimension).  However one can optionally only gather from a
    subset of fields.

    Args:
      boxlist: BoxList holding N boxes
      indices: a rank-1 tensor of type int32 / int64
      fields: (optional) list of fields to also gather from.  If None (default),
        all fields are gathered from.  Pass an empty fields list to only gather
        the box coordinates.
      scope: name scope.

    Returns:
      subboxlist: a BoxList corresponding to the subset of the input BoxList
      specified by indices
    Raises:
      ValueError: if specified field is not contained in boxlist or if the
        indices are not of type int32
    """
    with tf.name_scope(scope, 'Gather'):
        if len(indices.shape.as_list()) != 1:
            raise ValueError('indices should have rank 1')
        if indices.dtype != tf.int32 and indices.dtype != tf.int64:
            raise ValueError('indices should be an int32 / int64 tensor')
        subboxlist = box_list.BoxList(tf.gather(boxlist.get(), indices))
        if fields is None:
            fields = boxlist.get_extra_fields()
        for field in fields:
            if not boxlist.has_field(field):
                raise ValueError('boxlist must contain all specified fields')
            subfieldlist = tf.gather(boxlist.get_field(field), indices)
            subboxlist.add_field(field, subfieldlist)
        return subboxlist
Пример #16
0
def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
    """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015.

    Performs box voting as described in 'Object detection via a multi-region &
    semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
    each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes
    with iou overlap >= iou_thresh. The location of B is set to the weighted
    average location of boxes in S (scores are used for weighting). And the score
    of B is set to the average score of boxes in S.

    Args:
      selected_boxes: BoxList containing a subset of boxes in pool_boxes. These
        boxes are usually selected from pool_boxes using non max suppression.
      pool_boxes: BoxList containing a set of (possibly redundant) boxes.
      iou_thresh: (float scalar) iou threshold for matching boxes in
        selected_boxes and pool_boxes.

    Returns:
      BoxList containing averaged locations and scores for each box in
      selected_boxes.

    Raises:
      ValueError: if
        a) selected_boxes or pool_boxes is not a BoxList.
        b) if iou_thresh is not in [0, 1].
        c) pool_boxes does not have a scores field.
    """
    if not 0.0 <= iou_thresh <= 1.0:
        raise ValueError('iou_thresh must be between 0 and 1')
    if not isinstance(selected_boxes, box_list.BoxList):
        raise ValueError('selected_boxes must be a BoxList')
    if not isinstance(pool_boxes, box_list.BoxList):
        raise ValueError('pool_boxes must be a BoxList')
    if not pool_boxes.has_field('scores'):
        raise ValueError('pool_boxes must have a \'scores\' field')

    iou_ = iou(selected_boxes, pool_boxes)
    match_indicator = tf.to_float(tf.greater(iou_, iou_thresh))
    num_matches = tf.reduce_sum(match_indicator, 1)
    # TODO: Handle the case where some boxes in selected_boxes do not match to any
    # boxes in pool_boxes. For such boxes without any matches, we should return
    # the original boxes without voting.
    match_assert = tf.Assert(tf.reduce_all(tf.greater(num_matches, 0)), [
        'Each box in selected_boxes must match with at least one box '
        'in pool_boxes.'
    ])

    scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
    scores_assert = tf.Assert(tf.reduce_all(tf.greater_equal(scores, 0)),
                              ['Scores must be non negative.'])

    with tf.control_dependencies([scores_assert, match_assert]):
        sum_scores = tf.matmul(match_indicator, scores)
    averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches

    box_locations = tf.matmul(match_indicator,
                              pool_boxes.get() * scores) / sum_scores
    averaged_boxes = box_list.BoxList(box_locations)
    _copy_extra_fields(averaged_boxes, selected_boxes)
    averaged_boxes.add_field('scores', averaged_scores)
    return averaged_boxes
Пример #17
0
def _extract_prediction_tensors(model,
                                create_input_dict_fn,
                                ignore_groundtruth=False):
    """Restores the model in a tensorflow session.

    Args:
      model: model to perform predictions with.
      create_input_dict_fn: function to create input tensor dictionaries.
      ignore_groundtruth: whether groundtruth should be ignored.

    Returns:
      tensor_dict: A tensor dictionary with evaluations.
    """
    input_dict = create_input_dict_fn()
    prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
    input_dict = prefetch_queue.dequeue()
    original_image = tf.expand_dims(input_dict[fields.InputDataFields.image],
                                    0)
    preprocessed_image = model.preprocess(tf.to_float(original_image))
    prediction_dict = model.predict(preprocessed_image)
    detections = model.postprocess(prediction_dict)

    original_image_shape = tf.shape(original_image)
    absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
        box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
        original_image_shape[1], original_image_shape[2])
    label_id_offset = 1
    tensor_dict = {
        'original_image':
        original_image,
        'image_id':
        input_dict[fields.InputDataFields.source_id],
        'detection_boxes':
        absolute_detection_boxlist.get(),
        'detection_scores':
        tf.squeeze(detections['detection_scores'], axis=0),
        'detection_classes':
        (tf.squeeze(detections['detection_classes'], axis=0) +
         label_id_offset),
    }
    if 'detection_masks' in detections:
        detection_masks = tf.squeeze(detections['detection_masks'], axis=0)
        detection_boxes = tf.squeeze(detections['detection_boxes'], axis=0)
        # TODO: This should be done in model's postprocess function ideally.
        detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, original_image_shape[1],
            original_image_shape[2])
        detection_masks_reframed = tf.to_float(
            tf.greater(detection_masks_reframed, 0.5))

        tensor_dict['detection_masks'] = detection_masks_reframed
    # load groundtruth fields into tensor_dict
    if not ignore_groundtruth:
        normalized_gt_boxlist = box_list.BoxList(
            input_dict[fields.InputDataFields.groundtruth_boxes])
        gt_boxlist = box_list_ops.scale(normalized_gt_boxlist,
                                        tf.shape(original_image)[1],
                                        tf.shape(original_image)[2])
        groundtruth_boxes = gt_boxlist.get()
        groundtruth_classes = input_dict[
            fields.InputDataFields.groundtruth_classes]
        tensor_dict['groundtruth_boxes'] = groundtruth_boxes
        tensor_dict['groundtruth_classes'] = groundtruth_classes
        tensor_dict['area'] = input_dict[
            fields.InputDataFields.groundtruth_area]
        tensor_dict['is_crowd'] = input_dict[
            fields.InputDataFields.groundtruth_is_crowd]
        tensor_dict['difficult'] = input_dict[
            fields.InputDataFields.groundtruth_difficult]

        if 'detection_masks' in tensor_dict:
            tensor_dict['groundtruth_instance_masks'] = input_dict[
                fields.InputDataFields.groundtruth_instance_masks]
    return tensor_dict
Пример #18
0
def multiclass_non_max_suppression(boxes,
                                   scores,
                                   score_thresh,
                                   iou_thresh,
                                   max_size_per_class,
                                   max_total_size=0,
                                   clip_window=None,
                                   change_coordinate_frame=False,
                                   masks=None,
                                   additional_fields=None,
                                   scope=None):
    """Multi-class version of non maximum suppression.

    This op greedily selects a subset of detection bounding boxes, pruning
    away boxes that have high IOU (intersection over union) overlap (> thresh)
    with already selected boxes.  It operates independently for each class for
    which scores are provided (via the scores field of the input box_list),
    pruning boxes with score less than a provided threshold prior to
    applying NMS.

    Please note that this operation is performed on *all* classes, therefore any
    background classes should be removed prior to calling this function.

    Args:
      boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
        number of classes or 1 depending on whether a separate box is predicted
        per class.
      scores: A [k, num_classes] float32 tensor containing the scores for each of
        the k detections.
      score_thresh: scalar threshold for score (low scoring boxes are removed).
      iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
        with previously selected boxes are removed).
      max_size_per_class: maximum number of retained boxes per class.
      max_total_size: maximum number of boxes retained over all classes. By
        default returns all boxes retained after capping boxes per class.
      clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
        representing the window to clip and normalize boxes to before performing
        non-max suppression.
      change_coordinate_frame: Whether to normalize coordinates after clipping
        relative to clip_window (this can only be set to True if a clip_window
        is provided)
      masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
        containing box masks. `q` can be either number of classes or 1 depending
        on whether a separate mask is predicted per class.
      additional_fields: (optional) If not None, a dictionary that maps keys to
        tensors whose first dimensions are all of size `k`. After non-maximum
        suppression, all tensors corresponding to the selected boxes will be
        added to resulting BoxList.
      scope: name scope.

    Returns:
      a BoxList holding M boxes with a rank-1 scores field representing
        corresponding scores for each box with scores sorted in decreasing order
        and a rank-1 classes field representing a class label for each box.
        If masks, keypoints, keypoint_heatmaps is not None, the boxlist will
        contain masks, keypoints, keypoint_heatmaps corresponding to boxes.

    Raises:
      ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
        a valid scores field.
    """
    if not 0 <= iou_thresh <= 1.0:
        raise ValueError('iou_thresh must be between 0 and 1')
    if scores.shape.ndims != 2:
        raise ValueError('scores field must be of rank 2')
    if scores.shape[1].value is None:
        raise ValueError('scores must have statically defined second '
                         'dimension')
    if boxes.shape.ndims != 3:
        raise ValueError('boxes must be of rank 3.')
    if not (boxes.shape[1].value == scores.shape[1].value
            or boxes.shape[1].value == 1):
        raise ValueError('second dimension of boxes must be either 1 or equal '
                         'to the second dimension of scores')
    if boxes.shape[2].value != 4:
        raise ValueError('last dimension of boxes must be of size 4.')
    if change_coordinate_frame and clip_window is None:
        raise ValueError(
            'if change_coordinate_frame is True, then a clip_window'
            'must be specified.')

    with tf.name_scope(scope, 'MultiClassNonMaxSuppression'):
        num_boxes = tf.shape(boxes)[0]
        num_scores = tf.shape(scores)[0]
        num_classes = scores.get_shape()[1]

        length_assert = tf.Assert(tf.equal(num_boxes, num_scores), [
            'Incorrect scores field length: actual vs expected.', num_scores,
            num_boxes
        ])

        selected_boxes_list = []
        per_class_boxes_list = tf.unstack(boxes, axis=1)
        if masks is not None:
            per_class_masks_list = tf.unstack(masks, axis=1)
        boxes_ids = (range(num_classes)
                     if len(per_class_boxes_list) > 1 else [0] * num_classes)
        for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
            per_class_boxes = per_class_boxes_list[boxes_idx]
            boxlist_and_class_scores = box_list.BoxList(per_class_boxes)
            with tf.control_dependencies([length_assert]):
                class_scores = tf.reshape(
                    tf.slice(scores, [0, class_idx], tf.stack([num_scores,
                                                               1])), [-1])
            boxlist_and_class_scores.add_field(fields.BoxListFields.scores,
                                               class_scores)
            if masks is not None:
                per_class_masks = per_class_masks_list[boxes_idx]
                boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
                                                   per_class_masks)
            if additional_fields is not None:
                for key, tensor in additional_fields.items():
                    boxlist_and_class_scores.add_field(key, tensor)
            boxlist_filtered = box_list_ops.filter_greater_than(
                boxlist_and_class_scores, score_thresh)
            if clip_window is not None:
                boxlist_filtered = box_list_ops.clip_to_window(
                    boxlist_filtered, clip_window)
                if change_coordinate_frame:
                    boxlist_filtered = box_list_ops.change_coordinate_frame(
                        boxlist_filtered, clip_window)
            max_selection_size = tf.minimum(max_size_per_class,
                                            boxlist_filtered.num_boxes())
            selected_indices = tf.image.non_max_suppression(
                boxlist_filtered.get(),
                boxlist_filtered.get_field(fields.BoxListFields.scores),
                max_selection_size,
                iou_threshold=iou_thresh)
            nms_result = box_list_ops.gather(boxlist_filtered,
                                             selected_indices)
            nms_result.add_field(fields.BoxListFields.classes, (tf.zeros_like(
                nms_result.get_field(fields.BoxListFields.scores)) +
                                                                class_idx))
            selected_boxes_list.append(nms_result)
        selected_boxes = box_list_ops.concatenate(selected_boxes_list)
        sorted_boxes = box_list_ops.sort_by_field(selected_boxes,
                                                  fields.BoxListFields.scores)
        if max_total_size:
            max_total_size = tf.minimum(max_total_size,
                                        sorted_boxes.num_boxes())
            sorted_boxes = box_list_ops.gather(sorted_boxes,
                                               tf.range(max_total_size))
        return sorted_boxes