def label_anchors(self, gt_boxes, gt_labels): """Labels anchors with ground truth inputs. Args: gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes. For each row, it stores [y0, x0, y1, x1] for four corners of a box. gt_labels: A integer tensor with shape [N, 1] representing groundtruth classes. Returns: score_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors]. The height_l and width_l represent the dimension of class logits at l-th level. box_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors * 4]. The height_l and width_l represent the dimension of bounding box regression output at l-th level. """ gt_box_list = box_list.BoxList(gt_boxes) anchor_box_list = box_list.BoxList(self._anchor.boxes) # cls_targets, cls_weights, box_weights are not used. _, _, box_targets, _, matches = self._target_assigner.assign( anchor_box_list, gt_box_list, gt_labels) # score_targets contains the subsampled positive and negative anchors. score_targets, _, _ = self._get_rpn_samples(matches.match_results) # Unpacks labels. score_targets_dict = self._anchor.unpack_labels(score_targets) box_targets_dict = self._anchor.unpack_labels(box_targets) return score_targets_dict, box_targets_dict
def scale(boxlist, y_scale, x_scale, scope=None): """scale box coordinates in x and y dimensions. Args: boxlist: BoxList holding N boxes y_scale: (float) scalar tensor x_scale: (float) scalar tensor scope: name scope. Returns: boxlist: BoxList holding N boxes """ with tf.name_scope(scope, 'Scale'): y_scale = tf.cast(y_scale, tf.float32) x_scale = tf.cast(x_scale, tf.float32) y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(), num_or_size_splits=4, axis=1) y_min = y_scale * y_min y_max = y_scale * y_max x_min = x_scale * x_min x_max = x_scale * x_max scaled_boxlist = box_list.BoxList( tf.concat([y_min, x_min, y_max, x_max], 1)) return _copy_extra_fields(scaled_boxlist, boxlist)
def change_coordinate_frame(boxlist, window, scope=None): """Change coordinate frame of the boxlist to be relative to window's frame. Given a window of the form [ymin, xmin, ymax, xmax], changes bounding box coordinates from boxlist to be relative to this window (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)). An example use case is data augmentation: where we are given groundtruth boxes (boxlist) and would like to randomly crop the image to some window (window). In this case we need to change the coordinate frame of each groundtruth box to be relative to this new window. Args: boxlist: A BoxList object holding N boxes. window: A rank 1 tensor [4]. scope: name scope. Returns: Returns a BoxList object with N boxes. """ with tf.name_scope(scope, 'ChangeCoordinateFrame'): win_height = window[2] - window[0] win_width = window[3] - window[1] boxlist_new = scale( box_list.BoxList(boxlist.get() - [window[0], window[1], window[0], window[1]]), 1.0 / win_height, 1.0 / win_width) boxlist_new = _copy_extra_fields(boxlist_new, boxlist) return boxlist_new
def _decode(self, rel_codes, anchors): """Decode relative codes to boxes. Args: rel_codes: a tensor representing N anchor-encoded boxes. anchors: BoxList of anchors. Returns: boxes: BoxList holding N bounding boxes. """ ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes( ) ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes)) if self._scale_factors: ty /= self._scale_factors[0] tx /= self._scale_factors[1] th /= self._scale_factors[2] tw /= self._scale_factors[3] w = tf.exp(tw) * wa h = tf.exp(th) * ha ycenter = ty * ha + ycenter_a xcenter = tx * wa + xcenter_a ymin = ycenter - h / 2. xmin = xcenter - w / 2. ymax = ycenter + h / 2. xmax = xcenter + w / 2. return box_list.BoxList( tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None): """Scales boxes from normalized to pixel coordinates. Args: image: A 3D float32 tensor of shape [height, width, channels]. boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding boxes in normalized coordinates. Each row is of the form [ymin, xmin, ymax, xmax]. keypoints: (optional) rank 3 float32 tensor with shape [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized coordinates. Returns: image: unchanged input image. scaled_boxes: a 2D float32 tensor of shape [num_boxes, 4] containing the bounding boxes in pixel coordinates. scaled_keypoints: a 3D float32 tensor with shape [num_instances, num_keypoints, 2] containing the keypoints in pixel coordinates. """ boxlist = box_list.BoxList(boxes) image_height = tf.shape(input=image)[0] image_width = tf.shape(input=image)[1] scaled_boxes = box_list_scale(boxlist, image_height, image_width).get() result = [image, scaled_boxes] if keypoints is not None: scaled_keypoints = keypoint_scale(keypoints, image_height, image_width) result.append(scaled_keypoints) return tuple(result)
def _create_regression_targets(self, anchors, groundtruth_boxes, match): """Returns a regression target for each anchor. Args: anchors: a BoxList representing N anchors groundtruth_boxes: a BoxList representing M groundtruth_boxes match: a matcher.Match object Returns: reg_targets: a float32 tensor with shape [N, box_code_dimension] """ matched_gt_boxes = match.gather_based_on_match( groundtruth_boxes.get(), unmatched_value=tf.zeros(4), ignored_value=tf.zeros(4)) matched_gt_boxlist = box_list.BoxList(matched_gt_boxes) if groundtruth_boxes.has_field(KEYPOINTS_FIELD_NAME): groundtruth_keypoints = groundtruth_boxes.get_field(KEYPOINTS_FIELD_NAME) matched_keypoints = match.gather_based_on_match( groundtruth_keypoints, unmatched_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]), ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:])) matched_gt_boxlist.add_field(KEYPOINTS_FIELD_NAME, matched_keypoints) matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors) match_results_shape = shape_utils.combined_static_and_dynamic_shape( match.match_results) # Zero out the unmatched and ignored regression targets. unmatched_ignored_reg_targets = tf.tile( self._default_regression_target(), [match_results_shape[0], 1]) matched_anchors_mask = match.matched_column_indicator() reg_targets = tf.where(matched_anchors_mask, matched_reg_targets, unmatched_ignored_reg_targets) return reg_targets
def label_anchors(self, gt_boxes, gt_labels): """Labels anchors with ground truth inputs. Args: gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes. For each row, it stores [y0, x0, y1, x1] for four corners of a box. gt_labels: A integer tensor with shape [N, 1] representing groundtruth classes. Returns: cls_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors_per_location]. The height_l and width_l represent the dimension of class logits at l-th level. box_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors_per_location * 4]. The height_l and width_l represent the dimension of bounding box regression output at l-th level. num_positives: scalar tensor storing number of positives in an image. """ gt_box_list = box_list.BoxList(gt_boxes) anchor_box_list = box_list.BoxList(self._anchor.boxes) # The cls_weights, box_weights are not used. cls_targets, _, box_targets, _, matches = self._target_assigner.assign( anchor_box_list, gt_box_list, gt_labels) # Labels definition in matches.match_results: # (1) match_results[i]>=0, meaning that column i is matched with row # match_results[i]. # (2) match_results[i]=-1, meaning that column i is not matched. # (3) match_results[i]=-2, meaning that column i is ignored. match_results = tf.expand_dims(matches.match_results, axis=1) cls_targets = tf.cast(cls_targets, tf.int32) cls_targets = tf.where(tf.equal(match_results, -1), -tf.ones_like(cls_targets), cls_targets) cls_targets = tf.where(tf.equal(match_results, -2), -2 * tf.ones_like(cls_targets), cls_targets) # Unpacks labels into multi-level representations. cls_targets_dict = self._anchor.unpack_labels(cls_targets) box_targets_dict = self._anchor.unpack_labels(box_targets) num_positives = tf.reduce_sum( tf.cast(tf.greater(matches.match_results, -1), tf.float32)) return cls_targets_dict, box_targets_dict, num_positives
def concatenate(boxlists, fields=None, scope=None): """Concatenate list of BoxLists. This op concatenates a list of input BoxLists into a larger BoxList. It also handles concatenation of BoxList fields as long as the field tensor shapes are equal except for the first dimension. Args: boxlists: list of BoxList objects fields: optional list of fields to also concatenate. By default, all fields from the first BoxList in the list are included in the concatenation. scope: name scope. Returns: a BoxList with number of boxes equal to sum([boxlist.num_boxes() for boxlist in BoxList]) Raises: ValueError: if boxlists is invalid (i.e., is not a list, is empty, or contains non BoxList objects), or if requested fields are not contained in all boxlists """ with tf.name_scope(scope, 'Concatenate'): if not isinstance(boxlists, list): raise ValueError('boxlists should be a list') if not boxlists: raise ValueError('boxlists should have nonzero length') for boxlist in boxlists: if not isinstance(boxlist, box_list.BoxList): raise ValueError( 'all elements of boxlists should be BoxList objects') concatenated = box_list.BoxList( tf.concat([boxlist.get() for boxlist in boxlists], 0)) if fields is None: fields = boxlists[0].get_extra_fields() for field in fields: first_field_shape = boxlists[0].get_field( field).get_shape().as_list() first_field_shape[0] = -1 if None in first_field_shape: raise ValueError( 'field %s must have fully defined shape except for the' ' 0th dimension.' % field) for boxlist in boxlists: if not boxlist.has_field(field): raise ValueError( 'boxlist must contain all requested fields') field_shape = boxlist.get_field(field).get_shape().as_list() field_shape[0] = -1 if field_shape != first_field_shape: raise ValueError( 'field %s must have same shape for all boxlists ' 'except for the 0th dimension.' % field) concatenated_field = tf.concat( [boxlist.get_field(field) for boxlist in boxlists], 0) concatenated.add_field(field, concatenated_field) return concatenated
def sample_boxes_by_jittering(boxlist, num_boxes_to_sample, stddev=0.1, scope=None): """Samples num_boxes_to_sample boxes by jittering around boxlist boxes. It is possible that this function might generate boxes with size 0. The larger the stddev, this is more probable. For a small stddev of 0.1 this probability is very small. Args: boxlist: A boxlist containing N boxes in normalized coordinates. num_boxes_to_sample: A positive integer containing the number of boxes to sample. stddev: Standard deviation. This is used to draw random offsets for the box corners from a normal distribution. The offset is multiplied by the box size so will be larger in terms of pixels for larger boxes. scope: Name scope. Returns: sampled_boxlist: A boxlist containing num_boxes_to_sample boxes in normalized coordinates. """ with tf.name_scope(scope, 'SampleBoxesByJittering'): num_boxes = boxlist.num_boxes() box_indices = tf.random_uniform([num_boxes_to_sample], minval=0, maxval=num_boxes, dtype=tf.int32) sampled_boxes = tf.gather(boxlist.get(), box_indices) sampled_boxes_height = sampled_boxes[:, 2] - sampled_boxes[:, 0] sampled_boxes_width = sampled_boxes[:, 3] - sampled_boxes[:, 1] rand_miny_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev) rand_minx_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev) rand_maxy_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev) rand_maxx_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev) miny = rand_miny_gaussian * sampled_boxes_height + sampled_boxes[:, 0] minx = rand_minx_gaussian * sampled_boxes_width + sampled_boxes[:, 1] maxy = rand_maxy_gaussian * sampled_boxes_height + sampled_boxes[:, 2] maxx = rand_maxx_gaussian * sampled_boxes_width + sampled_boxes[:, 3] maxy = tf.maximum(miny, maxy) maxx = tf.maximum(minx, maxx) sampled_boxes = tf.stack([miny, minx, maxy, maxx], axis=1) sampled_boxes = tf.maximum(tf.minimum(sampled_boxes, 1.0), 0.0) return box_list.BoxList(sampled_boxes)
def gather(boxlist, indices, fields=None, scope=None, use_static_shapes=False): """Gather boxes from BoxList according to indices and return new BoxList. By default, `gather` returns boxes corresponding to the input index list, as well as all additional fields stored in the boxlist (indexing into the first dimension). However one can optionally only gather from a subset of fields. Args: boxlist: BoxList holding N boxes indices: a rank-1 tensor of type int32 / int64 fields: (optional) list of fields to also gather from. If None (default), all fields are gathered from. Pass an empty fields list to only gather the box coordinates. scope: name scope. use_static_shapes: Whether to use an implementation with static shape gurantees. Returns: subboxlist: a BoxList corresponding to the subset of the input BoxList specified by indices Raises: ValueError: if specified field is not contained in boxlist or if the indices are not of type int32 """ with tf.name_scope(scope, 'Gather'): if len(indices.shape.as_list()) != 1: raise ValueError('indices should have rank 1') if indices.dtype != tf.int32 and indices.dtype != tf.int64: raise ValueError('indices should be an int32 / int64 tensor') gather_op = tf.gather if use_static_shapes: gather_op = ops.matmul_gather_on_zeroth_axis subboxlist = box_list.BoxList(gather_op(boxlist.get(), indices)) if fields is None: fields = boxlist.get_extra_fields() fields += ['boxes'] for field in fields: if not boxlist.has_field(field): raise ValueError('boxlist must contain all specified fields') subfieldlist = gather_op(boxlist.get_field(field), indices) subboxlist.add_field(field, subfieldlist) return subboxlist
def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None): """Clip bounding boxes to a window. This op clips any input bounding boxes (represented by bounding box corners) to a window, optionally filtering out boxes that do not overlap at all with the window. Args: boxlist: BoxList holding M_in boxes window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max] window to which the op should clip boxes. filter_nonoverlapping: whether to filter out boxes that do not overlap at all with the window. scope: name scope. Returns: a BoxList holding M_out boxes where M_out <= M_in """ with tf.name_scope(scope, 'ClipToWindow'): y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(), num_or_size_splits=4, axis=1) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min) y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min) x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min) x_max_clipped = tf.maximum(tf.minimum(x_max, win_x_max), win_x_min) clipped = box_list.BoxList( tf.concat( [y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped], 1)) clipped = _copy_extra_fields(clipped, boxlist) if filter_nonoverlapping: areas = area(clipped) nonzero_area_indices = tf.cast( tf.reshape(tf.where(tf.greater(areas, 0.0)), [-1]), tf.int32) clipped = gather(clipped, nonzero_area_indices) return clipped
def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5): """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015. Performs box voting as described in 'Object detection via a multi-region & semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes with iou overlap >= iou_thresh. The location of B is set to the weighted average location of boxes in S (scores are used for weighting). And the score of B is set to the average score of boxes in S. Args: selected_boxes: BoxList containing a subset of boxes in pool_boxes. These boxes are usually selected from pool_boxes using non max suppression. pool_boxes: BoxList containing a set of (possibly redundant) boxes. iou_thresh: (float scalar) iou threshold for matching boxes in selected_boxes and pool_boxes. Returns: BoxList containing averaged locations and scores for each box in selected_boxes. Raises: ValueError: if a) selected_boxes or pool_boxes is not a BoxList. b) if iou_thresh is not in [0, 1]. c) pool_boxes does not have a scores field. """ if not 0.0 <= iou_thresh <= 1.0: raise ValueError('iou_thresh must be between 0 and 1') if not isinstance(selected_boxes, box_list.BoxList): raise ValueError('selected_boxes must be a BoxList') if not isinstance(pool_boxes, box_list.BoxList): raise ValueError('pool_boxes must be a BoxList') if not pool_boxes.has_field('scores'): raise ValueError('pool_boxes must have a \'scores\' field') iou_ = iou(selected_boxes, pool_boxes) match_indicator = tf.cast(tf.greater(iou_, iou_thresh), dtype=tf.float32) num_matches = tf.reduce_sum(match_indicator, 1) # TODO(kbanoop): Handle the case where some boxes in selected_boxes do not # match to any boxes in pool_boxes. For such boxes without any matches, we # should return the original boxes without voting. match_assert = tf.Assert(tf.reduce_all(tf.greater(num_matches, 0)), [ 'Each box in selected_boxes must match with at least one box ' 'in pool_boxes.' ]) scores = tf.expand_dims(pool_boxes.get_field('scores'), 1) scores_assert = tf.Assert(tf.reduce_all(tf.greater_equal(scores, 0)), ['Scores must be non negative.']) with tf.control_dependencies([scores_assert, match_assert]): sum_scores = tf.matmul(match_indicator, scores) averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches box_locations = tf.matmul(match_indicator, pool_boxes.get() * scores) / sum_scores averaged_boxes = box_list.BoxList(box_locations) _copy_extra_fields(averaged_boxes, selected_boxes) averaged_boxes.add_field('scores', averaged_scores) return averaged_boxes
def boolean_mask(boxlist, indicator, fields=None, scope=None, use_static_shapes=False, indicator_sum=None): """Select boxes from BoxList according to indicator and return new BoxList. `boolean_mask` returns the subset of boxes that are marked as "True" by the indicator tensor. By default, `boolean_mask` returns boxes corresponding to the input index list, as well as all additional fields stored in the boxlist (indexing into the first dimension). However one can optionally only draw from a subset of fields. Args: boxlist: BoxList holding N boxes indicator: a rank-1 boolean tensor fields: (optional) list of fields to also gather from. If None (default), all fields are gathered from. Pass an empty fields list to only gather the box coordinates. scope: name scope. use_static_shapes: Whether to use an implementation with static shape gurantees. indicator_sum: An integer containing the sum of `indicator` vector. Only required if `use_static_shape` is True. Returns: subboxlist: a BoxList corresponding to the subset of the input BoxList specified by indicator Raises: ValueError: if `indicator` is not a rank-1 boolean tensor. """ with tf.name_scope(scope, 'BooleanMask'): if indicator.shape.ndims != 1: raise ValueError('indicator should have rank 1') if indicator.dtype != tf.bool: raise ValueError('indicator should be a boolean tensor') if use_static_shapes: if not (indicator_sum and isinstance(indicator_sum, int)): raise ValueError('`indicator_sum` must be a of type int') selected_positions = tf.cast(indicator, dtype=tf.float32) indexed_positions = tf.cast(tf.multiply( tf.cumsum(selected_positions), selected_positions), dtype=tf.int32) one_hot_selector = tf.one_hot(indexed_positions - 1, indicator_sum, dtype=tf.float32) sampled_indices = tf.cast(tf.tensordot(tf.cast(tf.range( tf.shape(indicator)[0]), dtype=tf.float32), one_hot_selector, axes=[0, 0]), dtype=tf.int32) return gather(boxlist, sampled_indices, use_static_shapes=True) else: subboxlist = box_list.BoxList( tf.boolean_mask(boxlist.get(), indicator)) if fields is None: fields = boxlist.get_extra_fields() for field in fields: if not boxlist.has_field(field): raise ValueError( 'boxlist must contain all specified fields') subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator) subboxlist.add_field(field, subfieldlist) return subboxlist
def _strict_random_crop_image(image, boxes, labels, masks=None, keypoints=None, min_object_covered=1.0, aspect_ratio_range=(0.75, 1.33), area_range=(0.1, 1.0), overlap_thresh=0.3, clip_boxes=True): """Performs random crop. Note: Keypoint coordinates that are outside the crop will be set to NaN, which is consistent with the original keypoint encoding for non-existing keypoints. This function always crops the image and is supposed to be used by `random_crop_image` function which sometimes returns the image unchanged. Args: image: rank 3 float32 tensor containing 1 image -> [height, width, channels] with pixel values varying between [0, 1]. boxes: rank 2 float32 tensor containing the bounding boxes with shape [num_instances, 4]. Boxes are in normalized form meaning their coordinates vary between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax]. labels: rank 1 int32 tensor containing the object classes. masks: (optional) rank 3 float32 tensor with shape [num_instances, height, width] containing instance masks. The masks are of the same height, width as the input `image`. keypoints: (optional) rank 3 float32 tensor with shape [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized coordinates. min_object_covered: the cropped image must cover at least this fraction of at least one of the input bounding boxes. aspect_ratio_range: allowed range for aspect ratio of cropped image. area_range: allowed range for area ratio between cropped image and the original image. overlap_thresh: minimum overlap thresh with new cropped image to keep the box. clip_boxes: whether to clip the boxes to the cropped image. Returns: image: image which is the same rank as input image. boxes: boxes which is the same rank as input boxes. Boxes are in normalized form. labels: new labels. If label_weights, multiclass_scores, masks, or keypoints is not None, the function also returns: masks: rank 3 float32 tensor with shape [num_instances, height, width] containing instance masks. keypoints: rank 3 float32 tensor with shape [num_instances, num_keypoints, 2] """ with tf.name_scope('RandomCropImage', values=[image, boxes]): image_shape = tf.shape(image) # boxes are [N, 4]. Lets first make them [N, 1, 4]. boxes_expanded = tf.expand_dims( tf.clip_by_value( boxes, clip_value_min=0.0, clip_value_max=1.0), 1) sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( tf.image.sample_distorted_bounding_box, image_shape, bounding_boxes=boxes_expanded, min_object_covered=min_object_covered, aspect_ratio_range=aspect_ratio_range, area_range=area_range, max_attempts=100, use_image_if_no_bounding_boxes=True) im_box_begin, im_box_size, im_box = sample_distorted_bounding_box new_image = tf.slice(image, im_box_begin, im_box_size) new_image.set_shape([None, None, image.get_shape()[2]]) # [1, 4] im_box_rank2 = tf.squeeze(im_box, axis=[0]) # [4] im_box_rank1 = tf.squeeze(im_box) boxlist = box_list.BoxList(boxes) boxlist.add_field('labels', labels) im_boxlist = box_list.BoxList(im_box_rank2) # remove boxes that are outside cropped image boxlist, inside_window_ids = box_list_ops.prune_completely_outside_window( boxlist, im_box_rank1) # remove boxes that are outside image overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes( boxlist, im_boxlist, overlap_thresh) # change the coordinate of the remaining boxes new_labels = overlapping_boxlist.get_field('labels') new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist, im_box_rank1) new_boxes = new_boxlist.get() if clip_boxes: new_boxes = tf.clip_by_value( new_boxes, clip_value_min=0.0, clip_value_max=1.0) result = [new_image, new_boxes, new_labels] if masks is not None: masks_of_boxes_inside_window = tf.gather(masks, inside_window_ids) masks_of_boxes_completely_inside_window = tf.gather( masks_of_boxes_inside_window, keep_ids) masks_box_begin = [0, im_box_begin[0], im_box_begin[1]] masks_box_size = [-1, im_box_size[0], im_box_size[1]] new_masks = tf.slice( masks_of_boxes_completely_inside_window, masks_box_begin, masks_box_size) result.append(new_masks) if keypoints is not None: keypoints_of_boxes_inside_window = tf.gather(keypoints, inside_window_ids) keypoints_of_boxes_completely_inside_window = tf.gather( keypoints_of_boxes_inside_window, keep_ids) new_keypoints = keypoint_change_coordinate_frame( keypoints_of_boxes_completely_inside_window, im_box_rank1) if clip_boxes: new_keypoints = keypoint_prune_outside_window( new_keypoints, [0.0, 0.0, 1.0, 1.0]) result.append(new_keypoints) return tuple(result)