def call(self, input): class_probs = input[0][:, :, 1] #begin foreground bbox_offset = input[1] bbox_offset = bbox_offset * np.reshape(self.config.BBOX_STD_DEV, [1, 1, 4]) anchors = self.anchors pre_nms_limit = min(self.config.PRE_NMS_LIMIT, self.anchors.shape[0]) ids = tf.nn.top_k( class_probs, pre_nms_limit, sorted=True, name="top_anchors").indices #find k largest probabilities #slice to each batch ( images per process) class_probs = utils.batch_slice([class_probs, ids], lambda x, y: tf.gather(x, y), self.config.IMAGES_PER_GPU) bbox_offset = utils.batch_slice([bbox_offset, ids], lambda x, y: tf.gather(x, y), self.config.IMAGES_PER_GPU) anchors = utils.batch_slice(ids, lambda x: tf.gather(anchors, x), self.config.IMAGES_PER_GPU, names=["pre_nms_anchors"]) #apply bbox to anchor boxes to get better bounding box closer to the closed Foreground object. bboxes = utils.batch_slice([anchors, bbox_offset], lambda x, y: utils.apply_bbox_offset(x, y), self.config.IMAGES_PER_GPU, names=["refined_anchors"]) #clip to 0..1 range h, w = self.config.IMAGE_SHAPE[:2] window = np.array([0, 0, h, w], dtype=np.float32) bboxes = utils.batch_slice(bboxes, lambda x: utils.clip_boxes(x, window), self.config.IMAGES_PER_GPU, names=["refined_anchors_clipped"]) #generate proposal by NMS normalized_bboxes = bboxes / np.array([[h, w, h, w]]) def nms(normalized_bboxes, scores): ids = tf.image.non_max_suppression(normalized_bboxes, scores, self.num_proposal, self.nms_threshold, name="rpn_non_max_suppression") proposals = tf.gather(normalized_bboxes, ids) padding = tf.maximum(self.num_proposal - tf.shape(proposals)[0], 0) proposals = tf.pad(proposals, [(0, padding), (0, 0)]) return proposals proposals = utils.batch_slice([normalized_bboxes, class_probs], nms, self.config.IMAGES_PER_GPU) return proposals
def bbox_reg(self, boxes, box_deltas, im): if CUDA_AVAILABLE: boxes = boxes.data[:,1:].cpu().numpy() box_deltas = box_deltas.data.cpu().numpy() else: boxes = boxes.data[:,1:].numpy() box_deltas = box_deltas.data.numpy() pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.size()[-2:]) return _tovar(pred_boxes)
def get_roi_boxes(self, anchors, rpn_map, rpn_bbox_deltas, im): # TODO fix this!!! im_info = (100, 100, 1) if CUDA_AVAILABLE: bbox_deltas = rpn_bbox_deltas.data.cpu().numpy() else: bbox_deltas = rpn_bbox_deltas.data.numpy() bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want #scores = bottom[0].data[:, self._num_anchors:, :, :] if CUDA_AVAILABLE: scores = rpn_map.data[:, self._num_anchors:, :, :].cpu().numpy() else: scores = rpn_map.data[:, self._num_anchors:, :, :].numpy() scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im.size()[-2:]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = filter_boxes(proposals, self.min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if self.pre_nms_topN > 0: order = order[:self.pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), self.nms_thresh) if self.post_nms_topN > 0: keep = keep[:self.post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] return proposals, scores
def generate_proposals(data): # Extract feature map feature_map = CNN_model_cut.predict( data.reshape(-1, data.shape[0], data.shape[1], data.shape[2])) padded_fcmap = np.pad(feature_map, ((0, 0), (1, 1), (1, 1), (0, 0)), mode='constant') # Extract RPN results RPN_results = RPN_model.predict(padded_fcmap) anchor_probs = RPN_results[0].reshape((-1, 1)) anchor_targets = RPN_results[1].reshape((-1, 4)) # Original anchors feature_size = feature_map.shape[1] number_feature_points = feature_size * feature_size feature_stride = int(image_size / feature_size) base_anchors = generate_anchors(feature_stride, feature_stride, ratios=ANCHOR_RATIOS, scales=ANCHOR_SCALES) shift = np.arange(0, feature_size) * feature_stride shift_x, shift_y = np.meshgrid(shift, shift) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() original_anchors = (base_anchors.reshape( (1, anchor_number, 4)) + shifts.reshape( (1, number_feature_points, 4)).transpose((1, 0, 2))) original_anchors = original_anchors.reshape((-1, 4)) # Proposals by the RPN proposals = bbox_transform_inv(original_anchors, anchor_targets) proposals = clip_boxes(proposals, (data.shape[0], data.shape[1])) # clip to image. high_to_low_scores = anchor_probs.ravel().argsort()[::-1] # highest scores high_to_low_scores = high_to_low_scores[0:N] proposals = proposals[high_to_low_scores, :] anchor_probs = anchor_probs[high_to_low_scores] del original_anchors del RPN_results del feature_map del padded_fcmap return proposals, anchor_probs
def bbox_reg(self, boxes, box_deltas, im): boxes = boxes.data[:, 1:].numpy() box_deltas = box_deltas.data.numpy() pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.size()[-2:]) return to_var(pred_boxes)
def forward(self, input): """ Parameters ---------- input - list contains: cls_prob_alls: (BS , H , W , Ax2) outputs of RPN (here - Feature Pyramid Network), prob of bg or fg; bbox_pred_alls: (BS , H , W , Ax4), rgs boxes output of RPN; im_info: a list of [image_height, image_width, scale_ratios]; rpn_shapes: width and height of feature map; ---------- Returns ---------- rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2] # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) """ scores = input[0][:, :, 1] # batch_size x num_rois x 1 bbox_deltas = input[1] # batch_size x num_rois x 4 im_info = input[2] anchors = torch.from_numpy(generate_anchors_all_pyramids(self.fpn_scales, self.anchor_ratios, feat_shapes, self.feat_strides, self.fpn_anchor_stride)).type_as(scores) num_anchors = anchors.size(0) anchors = anchors.view(1, num_anchors, 4).expand(batch_size, num_anchors, 4) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # keep_idx = self._filter_boxes(proposals, min_size).squeeze().long().nonzero().squeeze() scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, self.post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1,1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(proposals_single, scores_single, self.rpn_nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if self.post_nms_topN > 0: keep_idx_i = keep_idx_i[:self.post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i,:,0] = i output[i,:num_proposal,1:] = proposals_single return output def backward(self, top, propagate_down, bottom): """This layer does not propagate gradients.""" pass def reshape(self, bottom, top): """Reshaping happens during the call to forward.""" pass def _filter_boxes(self, boxes, min_size): """Remove all boxes with any side smaller than min_size.""" ws = boxes[:, :, 2] - boxes[:, :, 0] + 1 hs = boxes[:, :, 3] - boxes[:, :, 1] + 1 keep = ((ws >= min_size) & (hs >= min_size)) return keep
def produce_batch(feature_map, gt_boxes, h_w=None, category=None): height = np.shape(feature_map)[1] width = np.shape(feature_map)[2] num_feature_map = width * height w_stride = h_w[1] / width h_stride = h_w[0] / height #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1) base_anchors = generate_anchors(w_stride, h_stride) shift_x = np.arange(0, width) * w_stride shift_y = np.arange(0, height) * h_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() all_anchors = (base_anchors.reshape((1, anchors_num, 4)) + shifts.reshape( (1, num_feature_map, 4)).transpose((1, 0, 2))) total_anchors = num_feature_map * anchors_num all_anchors = all_anchors.reshape((total_anchors, 4)) # 用训练好的rpn进行预测,得出scores和deltas res = rpn_model.query_cnn(feature_map) scores = res[0] scores = scores.reshape(-1, 1) deltas = res[1] deltas = np.reshape(deltas, (-1, 4)) # 把dx dy转换成具体的xy值,并把照片以外的anchors去掉 proposals = bbox_transform_inv(all_anchors, deltas) proposals = clip_boxes(proposals, (h_w[0], h_w[1])) # remove small boxes keep = filter_boxes(proposals, small_box_threshold) # here threshold is 40 pixel proposals = proposals[keep, :] scores = scores[keep] # sort socres and only keep top 6000. pre_nms_topN = 6000 order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # apply NMS to to 6000, and then keep top 300 post_nms_topN = 300 keep = py_cpu_nms(np.hstack((proposals, scores)), 0.7) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # 把ground true也加到proposals中 proposals = np.vstack((proposals, gt_boxes)) # calculate overlaps of proposal and gt_boxes overlaps = bbox_overlaps(proposals, gt_boxes) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) # labels = gt_labels[gt_assignment] #? # sub sample fg_inds = np.where(max_overlaps >= FG_THRESH)[0] fg_rois_per_this_image = min(int(BATCH * FG_FRAC), fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) bg_inds = np.where((max_overlaps < BG_THRESH_HI) & (max_overlaps >= BG_THRESH_LO))[0] bg_rois_per_this_image = BATCH - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: # labels = labels[keep_inds] rois = proposals[keep_inds] gt_rois = gt_boxes[gt_assignment[keep_inds]] targets = bbox_transform(rois, gt_rois) #input rois rois_num = targets.shape[0] batch_box = np.zeros((rois_num, 200, 4)) for i in range(rois_num): batch_box[i, category] = targets[i] batch_box = np.reshape(batch_box, (rois_num, -1)) # get gt category batch_categories = np.zeros((rois_num, 200, 1)) for i in range(rois_num): batch_categories[i, category] = 1 batch_categories = np.reshape(batch_categories, (rois_num, -1)) return rois, batch_box, batch_categories
def get_text_lines_oriented(self, text_proposals, scores, im_size): """ text_proposals:boxes """ # tp=text proposal tp_groups = self.group_text_proposals(text_proposals, scores, im_size) #首先还是建图,获取到文本行由哪几个小框构成 text_lines = np.zeros((len(tp_groups), 8), np.float32) for index, tp_indices in enumerate(tp_groups): text_line_boxes = text_proposals[list(tp_indices)] #每个文本行的全部小框 X = (text_line_boxes[:, 0] + text_line_boxes[:, 2]) / 2 # 求每一个小框的中心x,y坐标 Y = (text_line_boxes[:, 1] + text_line_boxes[:, 3]) / 2 z1 = np.polyfit(X, Y, 1) #多项式拟合,根据之前求的中心店拟合一条直线(最小二乘) x0 = np.min(text_line_boxes[:, 0]) #文本行x坐标最小值 x1 = np.max(text_line_boxes[:, 2]) #文本行x坐标最大值 offset = (text_line_boxes[0, 2] - text_line_boxes[0, 0]) * 0.5 #小框宽度的一半 # 以全部小框的左上角这个点去拟合一条直线,然后计算一下文本行x坐标的极左极右对应的y坐标 lt_y, rt_y = self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 1], x0 + offset, x1 - offset) # 以全部小框的左下角这个点去拟合一条直线,然后计算一下文本行x坐标的极左极右对应的y坐标 lb_y, rb_y = self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 3], x0 + offset, x1 - offset) score = scores[list(tp_indices)].sum() / float( len(tp_indices)) #求全部小框得分的均值作为文本行的均值 text_lines[index, 0] = x0 text_lines[index, 1] = min(lt_y, rt_y) #文本行上端 线段 的y坐标的小值 text_lines[index, 2] = x1 text_lines[index, 3] = max(lb_y, rb_y) #文本行下端 线段 的y坐标的大值 text_lines[index, 4] = score #文本行得分 text_lines[index, 5] = z1[0] #根据中心点拟合的直线的k,b text_lines[index, 6] = z1[1] height = np.mean( (text_line_boxes[:, 3] - text_line_boxes[:, 1])) #小框平均高度 text_lines[index, 7] = height + 2.5 text_recs = np.zeros((len(text_lines), 9), np.float32) index = 0 for line in text_lines: b1 = line[6] - line[7] / 2 # 根据高度和文本行中心线,求取文本行上下两条线的b值 b2 = line[6] + line[7] / 2 x1 = line[0] y1 = line[5] * line[0] + b1 # 左上 x2 = line[2] y2 = line[5] * line[2] + b1 # 右上 x3 = line[0] y3 = line[5] * line[0] + b2 # 左下 x4 = line[2] y4 = line[5] * line[2] + b2 # 右下 disX = x2 - x1 disY = y2 - y1 width = np.sqrt(disX * disX + disY * disY) # 文本行宽度 fTmp0 = y3 - y1 # 文本行高度 fTmp1 = fTmp0 * disY / width x = np.fabs(fTmp1 * disX / width) # 做补偿 y = np.fabs(fTmp1 * disY / width) if line[5] < 0: x1 -= x y1 += y x4 += x y4 -= y else: x2 += x y2 += y x3 -= x y3 -= y text_recs[index, 0] = x1 text_recs[index, 1] = y1 text_recs[index, 2] = x2 text_recs[index, 3] = y2 text_recs[index, 4] = x3 text_recs[index, 5] = y3 text_recs[index, 6] = x4 text_recs[index, 7] = y4 text_recs[index, 8] = line[4] index = index + 1 text_recs = clip_boxes(text_recs, im_size) return text_recs
def refine_detections(rois, probs, deltas, window, config): # Class IDs per ROI class_ids = tf.argmax(probs, axis=1, output_type=tf.int32) # Class probability of the top class of each ROI indices = tf.stack([tf.range(tf.shape(probs)[0]), class_ids], axis=1) class_scores = tf.gather_nd(probs, indices) # Class-specific bounding box deltas deltas_specific = tf.gather_nd(deltas, indices) # Apply bounding box deltas refined_rois = utils.apply_bbox_offset( rois, deltas_specific * config.BBOX_STD_DEV) # Convert coordiates to image domain # TODO: better to keep them normalized until later height, width = config.IMAGE_SHAPE[:2] refined_rois *= tf.constant([height, width, height, width], dtype=tf.float32) # Clip boxes to image window refined_rois = utils.clip_boxes(refined_rois, window) # Round and cast to int since we're deadling with pixels now refined_rois = tf.cast(tf.math.rint(refined_rois), tf.int32) # TODO: Filter out boxes with zero area # Filter out background boxes keep = tf.where(class_ids > 0)[:, 0] # Filter out low confidence boxes if config.DETECTION_MIN_CONFIDENCE: conf_keep = tf.where( class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0] #keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), # tf.expand_dims(conf_keep, 0)) keep = tf.sets.intersection(tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0)) #keep = tf.sparse_tensor_to_dense(keep)[0] keep = tf.sparse.to_dense(keep)[0] # Apply per-class NMS # 1. Prepare variables pre_nms_class_ids = tf.gather(class_ids, keep) pre_nms_scores = tf.gather(class_scores, keep) pre_nms_rois = tf.gather(refined_rois, keep) unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] def nms_keep_map(class_id): """Apply Non-Maximum Suppression on ROIs of the given class.""" # Indices of ROIs of the given class ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] # Apply NMS class_keep = tf.image.non_max_suppression( #tf.to_float(tf.gather(pre_nms_rois, ixs)), tf.cast(tf.gather(pre_nms_rois, ixs), tf.float32), tf.gather(pre_nms_scores, ixs), max_output_size=config.DETECTION_MAX_INSTANCES, iou_threshold=config.DETECTION_NMS_THRESHOLD) # Map indicies class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) # Pad with -1 so returned tensors have the same shape gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0] class_keep = tf.pad(class_keep, [(0, gap)], mode='CONSTANT', constant_values=-1) # Set shape so map_fn() can infer result shape class_keep.set_shape([config.DETECTION_MAX_INSTANCES]) return class_keep # 2. Map over class IDs nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids, dtype=tf.int64) # 3. Merge results into one list, and remove -1 padding nms_keep = tf.reshape(nms_keep, [-1]) nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0]) # 4. Compute intersection between keep and nms_keep #keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), # tf.expand_dims(nms_keep, 0)) #keep = tf.sparse_tensor_to_dense(keep)[0] keep = tf.sets.intersection(tf.expand_dims(keep, 0), tf.expand_dims(nms_keep, 0)) keep = tf.sparse.to_dense(keep)[0] # Keep top detections roi_count = config.DETECTION_MAX_INSTANCES class_scores_keep = tf.gather(class_scores, keep) num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count) top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] keep = tf.gather(keep, top_ids) # Arrange output as [N, (y1, x1, y2, x2, class_id, score)] # Coordinates are in image domain. detections = tf.concat( [ #tf.to_float(tf.gather(refined_rois, keep)), tf.cast(tf.gather(refined_rois, keep), tf.float32), #tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis], tf.cast(tf.gather(class_ids, keep), tf.float32)[..., tf.newaxis], tf.gather(class_scores, keep)[..., tf.newaxis] ], axis=1) # Pad with zeros if detections < DETECTION_MAX_INSTANCES gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0] detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT") return detections
def produce_batch(filepath, gt_boxes, h_w, category): img = load_img(filepath) img_width = np.shape(img)[1] * scale[1] img_height = np.shape(img)[0] * scale[0] img = img.resize((int(img_width), int(img_height))) #feed image to pretrained model and get feature map img = img_to_array(img) img = np.expand_dims(img, axis=0) feature_map = pretrained_model.predict(img) height = np.shape(feature_map)[1] width = np.shape(feature_map)[2] num_feature_map = width * height #calculate output w, h stride w_stride = h_w[1] / width h_stride = h_w[0] / height #generate base anchors according output stride. #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1) base_anchors = generate_anchors(w_stride, h_stride) #slice tiles according to image size and stride. #each 1x1x1532 feature map is mapping to a tile. shift_x = np.arange(0, width) * w_stride shift_y = np.arange(0, height) * h_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() #apply base anchors to all tiles, to have a num_feature_map*9 anchors. all_anchors = (base_anchors.reshape((1, 9, 4)) + shifts.reshape( (1, num_feature_map, 4)).transpose((1, 0, 2))) total_anchors = num_feature_map * 9 all_anchors = all_anchors.reshape((total_anchors, 4)) # feed feature map to pretrained RPN model, get proposal labels and bboxes. res = rpn_model.predict(feature_map) scores = res[0] scores = scores.reshape(-1, 1) deltas = res[1] deltas = np.reshape(deltas, (-1, 4)) # proposals transform to bbox values (x1, y1, x2, y2) proposals = bbox_transform_inv(all_anchors, deltas) proposals = clip_boxes(proposals, (h_w[0], h_w[1])) # remove small boxes, here threshold is 40 pixel keep = filter_boxes(proposals, 40) proposals = proposals[keep, :] scores = scores[keep] # sort socres and only keep top 6000. pre_nms_topN = 6000 order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # apply NMS to to 6000, and then keep top 300 post_nms_topN = 300 keep = py_cpu_nms(np.hstack((proposals, scores)), 0.7) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # add gt_boxes to proposals. proposals = np.vstack((proposals, gt_boxes)) # calculate overlaps of proposal and gt_boxes overlaps = bbox_overlaps(proposals, gt_boxes) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) # labels = gt_labels[gt_assignment] #? # sub sample fg_inds = np.where(max_overlaps >= FG_THRESH)[0] fg_rois_per_this_image = min(int(BATCH * FG_FRAC), fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) bg_inds = np.where((max_overlaps < BG_THRESH_HI) & (max_overlaps >= BG_THRESH_LO))[0] bg_rois_per_this_image = BATCH - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: # labels = labels[keep_inds] rois = proposals[keep_inds] gt_rois = gt_boxes[gt_assignment[keep_inds]] targets = bbox_transform(rois, gt_rois) #input rois rois_num = targets.shape[0] batch_box = np.zeros((rois_num, 200, 4)) for i in range(rois_num): batch_box[i, category] = targets[i] batch_box = np.reshape(batch_box, (rois_num, -1)) # get gt category batch_categories = np.zeros((rois_num, 200, 1)) for i in range(rois_num): batch_categories[i, category] = 1 batch_categories = np.reshape(batch_categories, (rois_num, -1)) return rois, batch_box, batch_categories