def compute_bbox_regression_targets(rois, overlaps, labels, cfg): """ given rois, overlaps, gt labels, compute bounding box regression targets :param rois: roidb[i]['boxes'] k * 4 :param overlaps: roidb[i]['max_overlaps'] k * 1 :param labels: roidb[i]['max_classes'] k * 1 :return: targets[i][class, dx, dy, dw, dh] k * 5 """ # Ensure ROIs are floats rois = rois.astype(np.float, copy=False) # Sanity check if len(rois) != len(overlaps): print('bbox regression: this should not happen') # Indices of ground-truth ROIs gt_inds = np.where(overlaps == 1)[0] if len(gt_inds) == 0: print('something wrong : zero ground truth rois') # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_REGRESSION_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :]) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] targets = np.zeros((rois.shape[0], 5), dtype=np.float32) targets[ex_inds, 0] = labels[ex_inds] targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) return targets
def create_roidb_from_box_list(self, box_list, gt_roidb): """ given ground truth, prepare roidb :param box_list: [image_index] ndarray of [box_index][x1, x2, y1, y2] :param gt_roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] :return: roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] """ assert len( box_list ) == self.num_images, 'number of boxes matrix must match number of images' roidb = [] for i in range(self.num_images): roi_rec = dict() roi_rec['image'] = gt_roidb[i]['image'] roi_rec['height'] = gt_roidb[i]['height'] roi_rec['width'] = gt_roidb[i]['width'] boxes = box_list[i] if boxes.shape[1] == 5: boxes = boxes[:, :4] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] # n boxes and k gt_boxes => n * k overlap gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) # for each box in n boxes, select only maximum overlap (must be greater than zero) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] roi_rec.update({ 'boxes': boxes, 'gt_classes': np.zeros((num_boxes, ), dtype=np.int32), 'gt_overlaps': overlaps, 'max_classes': overlaps.argmax(axis=1), 'max_overlaps': overlaps.max(axis=1), 'flipped': False }) # background roi => background class zero_indexes = np.where(roi_rec['max_overlaps'] == 0)[0] assert all(roi_rec['max_classes'][zero_indexes] == 0) # foreground roi => foreground class nonzero_indexes = np.where(roi_rec['max_overlaps'] > 0)[0] assert all(roi_rec['max_classes'][nonzero_indexes] != 0) roidb.append(roi_rec) return roidb
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) #(n,k)overlaps gt_assignment = overlaps.argmax( axis=1) #get the gtbox with max overlaps(n,1) max_overlaps = overlaps.max(axis=1) #get the max overlaps labels = gt_boxes[gt_assignment, 4] #(n,1) # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: fg_num = len(fg_inds) labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[fg_rois_per_this_image:] = 0 rois = all_rois[keep_inds] # offset = offsets[keep_inds] # cls = cls_score[keep_inds] bbox_target_data = _compute_targets(rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) #(labels,targets)(n,5) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, bbox_targets, bbox_inside_weights, keep_inds, fg_num
def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg, labels=None, overlaps=None, bbox_targets=None, gt_boxes=None): if labels is None: overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) #每个roi对应的最大的gt的id overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] #求对应的label labels = labels.astype(np.int32) fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] #找出满足条件的正负样本并采样 fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) if len(fg_indexes) > fg_rois_per_this_image: fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) if len(bg_indexes) > bg_rois_per_this_image: bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) keep_indexes = np.append(fg_indexes, bg_indexes) while keep_indexes.shape[0] < rois_per_image: #一直补充到满足每个batch的长度 gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0]) gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False) keep_indexes = np.append(keep_indexes, gap_indexes) labels = labels[keep_indexes] labels[fg_rois_per_this_image:] = 0 rois = rois[keep_indexes] if bbox_targets is not None: bbox_target_data = bbox_targets[keep_indexes, :] else: targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4]) if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) #[batch,5] bbox_targets, bbox_weights = expand_bbox_regression_targets( bbox_target_data, num_classes, cfg) return rois, labels, bbox_targets, bbox_weights
def compute_tp_fp_fn(cls, boxes, gt_boxes, threshold): gt_boxes_num = gt_boxes.shape[0] positive_inds = tf.where(cls > threshold) positive_inds = positive_inds.numpy() positive_num = positive_inds.shape[0] positive_boxes = boxes.numpy()[positive_inds, :] positive_boxes = np.reshape(positive_boxes, (-1, 4)) overlaps = bbox_overlaps( np.ascontiguousarray(positive_boxes[:, :], dtype=np.float), np.ascontiguousarray(gt_boxes[:, 1:-1], dtype=np.float)) # (n,k)overlaps gt_assignment = overlaps.argmax(axis=1) # get the gtbox with max overlaps(n,1) max_overlaps = overlaps.max(axis=1) # get the max overlaps positive_overlaps = np.where(max_overlaps > 0.5) gt_inds = gt_assignment[positive_overlaps] gt_inds = np.unique(gt_inds) TP = gt_inds.size FP = positive_num - TP FN = gt_boxes_num - TP return TP, FP, FN
def gpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height, nms_thresh, merge_thresh, binary_thresh=0.4, device_id=0): """ A wrapper function, note we already know the class of boxes and masks """ nms = gpu_nms_wrapper(nms_thresh, device_id) # Intermediate results t_boxes = [[] for _ in range(num_classes)] t_scores = [[] for _ in range(num_classes)] t_all_scores = [] for i in range(1, num_classes): dets = np.hstack((boxes.astype(np.float32), scores[:, i:i + 1])) inds = nms(dets) num_keep = min(len(inds), max_per_image) inds = inds[:num_keep] t_boxes[i] = boxes[inds] t_scores[i] = scores[inds, i] t_all_scores.extend(scores[inds, i]) sorted_scores = np.sort(t_all_scores)[::-1] num_keep = min(len(sorted_scores), max_per_image) thresh = max(sorted_scores[num_keep - 1], 1e-3) # inds array to record which mask should be aggregated together candidate_inds = [] # weight for each element in the candidate inds candidate_weights = [] # start position for candidate array candidate_start = [] candidate_scores = [] class_bar = [[] for _ in range(num_classes)] for i in range(1, num_classes): keep = np.where(t_scores[i] >= thresh) t_boxes[i] = t_boxes[i][keep] t_scores[i] = t_scores[i][keep] # organize helper variable for gpu mask voting for c in range(1, num_classes): num_boxes = len(t_boxes[c]) for i in range(num_boxes): cur_ov = bbox_overlaps(boxes.astype(np.float), t_boxes[c][i, np.newaxis].astype(np.float)) cur_inds = np.where(cur_ov >= merge_thresh)[0] candidate_inds.extend(cur_inds) cur_weights = scores[cur_inds, c] cur_weights = cur_weights / sum(cur_weights) candidate_weights.extend(cur_weights) candidate_start.append(len(candidate_inds)) candidate_scores.extend(t_scores[c]) class_bar[c] = len(candidate_scores) candidate_inds = np.array(candidate_inds, dtype=np.int32) candidate_weights = np.array(candidate_weights, dtype=np.float32) candidate_start = np.array(candidate_start, dtype=np.int32) candidate_scores = np.array(candidate_scores, dtype=np.float32) # the input masks/boxes are relatively large # select only a subset of them are useful for mask merge unique_inds = np.unique(candidate_inds) unique_inds_order = unique_inds.argsort() unique_map = {} for i in range(len(unique_inds)): unique_map[unique_inds[i]] = unique_inds_order[i] for i in range(len(candidate_inds)): candidate_inds[i] = unique_map[candidate_inds[i]] boxes = boxes[unique_inds, ...] masks = masks[unique_inds, ...] boxes = np.round(boxes) result_mask, result_box = mask_voting_kernel(boxes, masks, candidate_inds, candidate_start, candidate_weights, binary_thresh, im_height, im_width, device_id) result_box = np.hstack((result_box, candidate_scores[:, np.newaxis])) list_result_box = [[] for _ in range(num_classes)] list_result_mask = [[] for _ in range(num_classes)] cls_start = 0 for i in range(1, num_classes): cls_end = class_bar[i] cls_box = result_box[cls_start:cls_end, :] cls_mask = result_mask[cls_start:cls_end, :] valid_ind = np.where((cls_box[:, 2] > cls_box[:, 0]) & (cls_box[:, 3] > cls_box[:, 1]))[0] list_result_box[i] = cls_box[valid_ind, :] list_result_mask[i] = cls_mask[valid_ind, :] cls_start = cls_end return list_result_mask, list_result_box
def cpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height, nms_thresh, merge_thresh, binary_thresh=0.4): """ Wrapper function for mask voting, note we already know the class of boxes and masks """ masks = masks.astype(np.float32) mask_size = masks.shape[-1] nms = py_nms_wrapper(nms_thresh) # apply nms and sort to get first images according to their scores # Intermediate results t_boxes = [[] for _ in range(num_classes)] t_scores = [[] for _ in range(num_classes)] t_all_scores = [] for i in range(1, num_classes): dets = np.hstack((boxes.astype(np.float32), scores[:, i:i + 1])) inds = nms(dets) num_keep = min(len(inds), max_per_image) inds = inds[:num_keep] t_boxes[i] = boxes[inds] t_scores[i] = scores[inds, i] t_all_scores.extend(scores[inds, i]) sorted_scores = np.sort(t_all_scores)[::-1] num_keep = min(len(sorted_scores), max_per_image) thresh = max(sorted_scores[num_keep - 1], 1e-3) for i in range(1, num_classes): keep = np.where(t_scores[i] >= thresh) t_boxes[i] = t_boxes[i][keep] t_scores[i] = t_scores[i][keep] num_detect = boxes.shape[0] res_mask = [[] for _ in range(num_detect)] for i in range(num_detect): box = np.round(boxes[i]).astype(int) mask = cv2.resize(masks[i, 0].astype(np.float32), (box[2] - box[0] + 1, box[3] - box[1] + 1)) res_mask[i] = mask list_result_box = [[] for _ in range(num_classes)] list_result_mask = [[] for _ in range(num_classes)] for c in range(1, num_classes): num_boxes = len(t_boxes[c]) masks_ar = np.zeros((num_boxes, 1, mask_size, mask_size)) boxes_ar = np.zeros((num_boxes, 4)) for i in range(num_boxes): # Get weights according to their segmentation scores cur_ov = bbox_overlaps(boxes.astype(np.float), t_boxes[c][i, np.newaxis].astype(np.float)) cur_inds = np.where(cur_ov >= merge_thresh)[0] cur_weights = scores[cur_inds, c] cur_weights = cur_weights / sum(cur_weights) # Re-format mask when passing it to mask_aggregation p_mask = [res_mask[j] for j in list(cur_inds)] # do mask aggregation orig_mask, boxes_ar[i] = mask_aggregation(boxes[cur_inds], p_mask, cur_weights, im_width, im_height, binary_thresh) masks_ar[i, 0] = cv2.resize(orig_mask.astype(np.float32), (mask_size, mask_size)) boxes_scored_ar = np.hstack((boxes_ar, t_scores[c][:, np.newaxis])) list_result_box[c] = boxes_scored_ar list_result_mask[c] = masks_ar return list_result_mask, list_result_box
def _anchor_target_layer_py(rpn_cls_score, gt_boxes, im_dims, feat_stride, anchor_scales): """ Python version Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap """ _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] # allow boxes to sit over the edge by a small amount _allowed_border = 0 # Only minibatch of 1 supported assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_dims[1] + _allowed_border) & # width (all_anchors[:, 3] < im_dims[0] + _allowed_border) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 # bbox_targets: The deltas (relative to anchors) that Faster R-CNN should # try to predict at each anchor # TODO: This "weights" business might be deprecated. Requires investigation bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets rpn_bbox_targets = bbox_targets.reshape( (1, height, width, A * 4)).transpose(0, 3, 1, 2) # bbox_inside_weights rpn_bbox_inside_weights = bbox_inside_weights.reshape( (1, height, width, A * 4)).transpose(0, 3, 1, 2) # bbox_outside_weights rpn_bbox_outside_weights = bbox_outside_weights.reshape( (1, height, width, A * 4)).transpose(0, 3, 1, 2) return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def assign_anchor(feat_shape, gt_boxes, im_info, cfg, feat_stride=16, scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0): """ assign ground truth boxes to anchor positions :param feat_shape: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shape[-2:] if DEBUG: print('anchors:') print(base_anchors) print('anchor shapes:') print( np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4], base_anchors[:, 3::4] - base_anchors[:, 1::4]))) print('im_info', im_info) print('height', feat_height, 'width', feat_width) print('gt_boxes shape', gt_boxes.shape) print('gt_boxes', gt_boxes) # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride shift_y = np.arange(0, feat_height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print('anchors shape', anchors.shape) # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: labels[:] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) if DEBUG: _sums = bbox_targets[labels == 1, :].sum(axis=0) _squared_sums = (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts = np.sum(labels == 1) means = _sums / (_counts + 1e-14) stds = np.sqrt(_squared_sums / _counts - means**2) print('means', means) print('stdevs', stds) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) if DEBUG: print('rpn: max max_overlaps', np.max(max_overlaps)) print('rpn: num_positives', np.sum(labels == 1)) print('rpn: num_negatives', np.sum(labels == 0)) _fg_sum = np.sum(labels == 1) _bg_sum = np.sum(labels == 0) _count = 1 print('rpn: num_positive avg', _fg_sum / _count) print('rpn: num_negative avg', _bg_sum / _count) labels = labels.reshape( (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape( (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_weights = bbox_weights.reshape( (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) label = { 'label': labels, 'bbox_target': bbox_targets, 'bbox_weight': bbox_weights } return label
def _sample_rois(all_rois, gt_boxes, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, 1:5], dtype=np.float)) # (n,k)overlaps gt_assignment = overlaps.argmax( axis=1) # get the gtbox with max overlaps(n,1) max_overlaps = overlaps.max(axis=1) # get the max overlaps labels = np.where(max_overlaps[:] == 0, np.zeros(gt_assignment.shape, dtype='int32'), np.ones(gt_assignment.shape, 'int32')) # labels0 = tf.gather(gt_boxes, gt_assignment, axis=0) # labels1 = labels0[:, 4] # labels = tf.where(max_overlaps==0, labels1, tf.zeros(labels1.shape, tf.int32)) # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = fg_inds.size # Sample foreground regions without replacement ''' if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) ''' # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) '''bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image''' bg_rois_per_this_image = bg_inds.size # Sample background regions without replacement ''' if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) ''' # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: fg_num = len(fg_inds) # labels = labels[keep_inds] # labels = tf.gather(labels, keep_inds, axis=0) labels = labels[keep_inds] labels[fg_rois_per_this_image:] = 0 # Clamp labels for the background RoIs to 0 # labels_fg = tf.cast(labels[:fg_rois_per_this_image], 'int32') # labels_bg = tf.zeros((labels[fg_rois_per_this_image:].shape[0],), dtype='int32') # labels = tf.concat((labels_fg, labels_bg), axis=-1) rois = all_rois[keep_inds] # temp = gt_boxes[gt_assignment[keep_inds], :4] temp = tf.gather(gt_boxes, gt_assignment[keep_inds]) temp1 = tf.cast(temp[:, :4], 'float32') bbox_target_data = _compute_targets(rois[:, 1:5], temp1, labels) # (labels,targets)(n,5) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, fg_num) return labels, rois, bbox_targets, bbox_inside_weights, keep_inds, fg_num
def evaluate_recall(self, roidb, candidate_boxes=None, thresholds=None): """ evaluate detection proposal recall metrics record max overlap value for each gt box; return vector of overlap values :param roidb: used to evaluate :param candidate_boxes: if not given, use roidb's non-gt boxes :param thresholds: array-like recall threshold :return: None ar: average recall, recalls: vector recalls at each IoU overlap threshold thresholds: vector of IoU overlap threshold, gt_overlaps: vector of all ground-truth overlaps """ all_log_info = '' area_names = [ 'all', '0-25', '25-50', '50-100', '100-200', '200-300', '300-inf' ] area_ranges = [[0**2, 1e5**2], [0**2, 25**2], [25**2, 50**2], [50**2, 100**2], [100**2, 200**2], [200**2, 300**2], [300**2, 1e5**2]] area_counts = [] for area_name, area_range in zip(area_names[1:], area_ranges[1:]): area_count = 0 for i in range(self.num_images): if candidate_boxes is None: # default is use the non-gt boxes from roidb non_gt_inds = np.where(roidb[i]['gt_classes'] == 0)[0] boxes = roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] boxes_areas = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1) valid_range_inds = np.where((boxes_areas >= area_range[0]) & (boxes_areas < area_range[1]))[0] area_count += len(valid_range_inds) area_counts.append(area_count) total_counts = float(sum(area_counts)) for area_name, area_count in zip(area_names[1:], area_counts): log_info = 'percentage of {} {}'.format(area_name, area_count / total_counts) print(log_info) all_log_info += log_info log_info = 'average number of proposal {}'.format(total_counts / self.num_images) print(log_info) all_log_info += log_info for area_name, area_range in zip(area_names, area_ranges): gt_overlaps = np.zeros(0) num_pos = 0 for i in range(self.num_images): # check for max_overlaps == 1 avoids including crowd annotations max_gt_overlaps = roidb[i]['gt_overlaps'].max(axis=1) gt_inds = np.where((roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] gt_boxes = roidb[i]['boxes'][gt_inds, :] gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1) valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas < area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) if candidate_boxes is None: # default is use the non-gt boxes from roidb non_gt_inds = np.where(roidb[i]['gt_classes'] == 0)[0] boxes = roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] if boxes.shape[0] == 0: continue overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) # choose whatever is smaller to iterate rounds = min(boxes.shape[0], gt_boxes.shape[0]) for j in range(rounds): # find which proposal maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # get the IoU amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is covered by most IoU gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert (gt_ovr >= 0), '%s\n%s\n%s' % (boxes, gt_boxes, overlaps) # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the IoU coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert (_gt_overlaps[j] == gt_ovr) # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded IoU coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each IoU threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) ar = recalls.mean() # print results log_info = 'average recall for {}: {:.3f}'.format(area_name, ar) print(log_info) all_log_info += log_info for threshold, recall in zip(thresholds, recalls): log_info = 'recall @{:.2f}: {:.3f}'.format(threshold, recall) print(log_info) all_log_info += log_info return all_log_info