def _compute_targets(gt_rois, ex_rois): """Compute bounding-box regression targets for an image. gt_rois: ground truth rois ex_rois: example rois """ K = ex_rois.shape[0] N = gt_rois.shape[0] # Ensure ROIs are floats gt_rois = gt_rois.astype(np.float, copy=False) ex_rois = ex_rois.astype(np.float, copy=False) # bbox targets: (x1,y1,x2,y2,ex_rois_ind,subreg_ind) targets = np.zeros((0, 7), dtype=np.float32) if K == 0 or N == 0: return targets # For each region, find out objects that are adjacent # Match objects to sub-regions with maximum overlaps. # Objects with large overlaps with any sub-regions are given priority. overlaps = bbox_overlaps(ex_rois, gt_rois) max_overlaps = overlaps.max(axis=1) for k in xrange(K): if max_overlaps[k] < cfg.SEAR.ADJ_THRESH: continue re = ex_rois[k, :] L = np.array([[re[2]-re[0], re[3]-re[1], re[2]-re[0], re[3]-re[1]]]) delta = np.array([[re[0], re[1], re[0], re[1]]]) # sub-regions` s_re = (L * cfg.SEAR.SUBREGION) + delta s_re = s_re.astype(np.float, copy=False) # compute the overlaps between sub-regions and each objects sre_gt_overlaps = bbox_overlaps(s_re, gt_rois) # find out the objects that are actually adjacent adj_th = (sre_gt_overlaps[0] >= cfg.SEAR.ADJ_THRESH) match_inds = np.where(adj_th)[0] sre_gt_overlaps[:, ~adj_th] = -1 # adj_th = (sre_gt_overlaps >= cfg.SEAR.ADJ_THRESH) # match_inds = np.where(np.any(adj_th, axis=0))[0] if match_inds.shape[0]>0: # there is object to match for _ in xrange(min(cfg.SEAR.NUM_SUBREG, match_inds.shape[0])): reg_idx, gt_idx = np.unravel_index(sre_gt_overlaps.argmax(), sre_gt_overlaps.shape) # no more valid match # if sre_gt_overlaps[reg_idx, gt_idx] < cfg.SEAR.ADJ_THRESH: # break t_ki = _compute_bbox_deltas(ex_rois[[k], :], gt_rois[[gt_idx], :]) new_target = np.hstack((t_ki, np.array([[k, reg_idx, overlaps[k, gt_idx]]]))) targets = np.vstack((targets, new_target)) sre_gt_overlaps[reg_idx, :] = -1 sre_gt_overlaps[:, gt_idx] = -1 return targets
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes, dontcare): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # rm dontcare in bg_inds if dontcare.size != 0: overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[bg_inds, 1:5], dtype=np.float), np.ascontiguousarray(dontcare, dtype=np.float)) max_overlaps = overlaps.max(axis=1) rm_inds=np.where(max_overlaps < cfg.TRAIN.FG_THRESH)[0] bg_inds = np.array([bg_inds[i] for i in rm_inds],dtype=np.int) # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) #print 'bg_inds size = %d'%bg_inds.size # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[fg_rois_per_this_image:] = 0 rois = all_rois[keep_inds] bbox_target_data = _compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, bbox_targets, bbox_inside_weights
def create_roidb_from_box_list(self, box_list, gt_roidb): assert len(box_list) == self.num_images, "Number of boxes must match number of ground-truth images" roidb = [] for i in xrange(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None: gt_boxes = gt_roidb[i]["boxes"] gt_classes = gt_roidb[i]["gt_classes"] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) if gt_overlaps.shape[1] > 0: argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] else: overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) overlaps = scipy.sparse.csr_matrix(overlaps) roidb.append( { "boxes": boxes, "gt_classes": np.zeros((num_boxes,), dtype=np.int32), "gt_overlaps": overlaps, "flipped": False, } ) return roidb
def _anchor_target_layer(anchors, gt_boxes, im_info, feat_stride, num_anchors, rpn_cls_score): height, width = rpn_cls_score.shape[1:3] indexs = np.where((anchors[:, 0] > 0) & (anchors[:, 1] > 0) & (anchors[:, 2] < width*feat_stride) & (anchors[:, 3] < height*feat_stride))[0] inside_anchors = anchors[indexs] labels = np.zeros((len(indexs),), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps( np.ascontiguousarray(inside_anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) arg_max_overlaps = np.argmax(overlaps, axis=1) max_overlaps = overlaps[np.arange(overlaps.shape[0]), arg_max_overlaps] gt_arg_max_overlaps = np.argmax(overlaps, axis=0) gt_max_overlaps = overlaps[gt_arg_max_overlaps, np.arange(overlaps.shape[1])] gt_arg_max_overlaps = np.where(overlaps == gt_max_overlaps) labels[max_overlaps < 0.3] = 0 labels[gt_arg_max_overlaps] = 1 labels[max_overlaps > 0.7] = 1
def create_roidb_from_box_list(self, box_list, gt_roidb): assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] for i in xrange(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] overlaps = scipy.sparse.csr_matrix(overlaps) roidb.append({ 'boxes' : boxes, 'gt_classes' : np.zeros((num_boxes,), dtype=np.int32), 'gt_overlaps' : overlaps, 'flipped' : False, 'seg_areas' : np.zeros((num_boxes,), dtype=np.float32), }) return roidb
def _compute_targets(rois, overlaps, labels): """Compute bounding-box regression targets for an image.""" # Indices of ground-truth ROIs gt_inds = np.where(overlaps == 1)[0] if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return np.zeros((rois.shape[0], 5), dtype=np.float32) # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = bbox_overlaps( np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] targets = np.zeros((rois.shape[0], 5), dtype=np.float32) targets[ex_inds, 0] = labels[ex_inds] targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) return targets
def extract_pos_and_neg_feat(self): # extract positive features pos = {} neg = np.zeros([50000, 4096]) for i in range(20): pos[i] = np.zeros([0, 4096]) neg_cnt = 0 for i in range(len(self.imdb.image_index)): print(str(i) + " "), data = sio.loadmat(os.path.join(self.DATA_ROOT_PATH, self.imdb.image_index[i])) boxes = data['boxes'] feat = data['feat'] black_list = [] gt_boxes = gts['boxes'][0][i] gt_classes = gts['class'][0][i] overlaps = bbox_overlaps(gt_boxes.astype(np.float), boxes.astype(np.float)) for idx, gt_box in enumerate(gt_boxes): for j in range(boxes.shape[0]): box = boxes[j,:] # [x1 y1 x2 y2] if overlaps[idx, j] > 0.5: cls = gt_classes[idx][0] - 1 pos[cls] = np.row_stack([pos[cls], feat[j,:]]) if overlaps[idx, j] > 0.2: black_list.append(idx) if neg_cnt < neg.shape[0]: cand = set(range(feat.shape[0])) - set(black_list) rndidx = np.random.permutation(range(len(cand)))[0:50] negidx = np.array(list(cand))[rndidx] neg_feat = feat[negidx, :] neg[neg_cnt:neg_cnt+50] = neg_feat return pos
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes,sample_type='fpn', k0 = 4): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[fg_rois_per_this_image:] = 0 rois = all_rois[keep_inds] bbox_target_data = _compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) if sample_type == 'fpn': #print 0 w = (rois[:,3]-rois[:,1]) h = (rois[:,4]-rois[:,2]) s = w * h s[s<=0]=1e-6 layer_index = np.floor(k0+np.log2(np.sqrt(s)/224)) layer_index[layer_index<2]=2 layer_index[layer_index>5]=5 #print 1 return rois, labels, bbox_targets, bbox_inside_weights, layer_index #rois:[512,5] labels:[512,] else: return rois, labels, bbox_targets, bbox_inside_weights
def create_roidb_from_box_list(self, box_list, gt_roidb): assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images. %s vs. %s' % (len(box_list), self.num_images) roidb = [] print 'create_roidb_from_box_list() start' for i in xrange(self.num_images): max_proposal_box = cfg.MAX_PROPOSAL_NO boxes = box_list[i][:max_proposal_box] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None: gt_boxes = gt_roidb[i]['gt_boxes'] gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] overlaps = scipy.sparse.csr_matrix(overlaps) roidb.append({'boxes' : boxes, 'gt_classes' : np.zeros((num_boxes,), dtype=np.int32), 'gt_overlaps' : overlaps, 'flipped' : False}) print 'create_roidb_from_box_list() end' return roidb
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) if cfg.FRAME_REG: labels = gt_boxes[gt_assignment, 12] else: labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.size > 0 and bg_inds.size > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.size) fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False) bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.size < bg_rois_per_image bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace) elif fg_inds.size > 0: to_replace = fg_inds.size < rois_per_image fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = rois_per_image elif bg_inds.size > 0: to_replace = bg_inds.size < rois_per_image bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = 0 else: import pdb pdb.set_trace() # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_image):] = 0 rois = all_rois[keep_inds] roi_scores = all_scores[keep_inds] if cfg.FRAME_REG: p = 12 else: p = 4 bbox_target_data, poly_target_data = _compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :p], labels) bbox_targets, bbox_inside_weights, poly_targets, poly_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, poly_target_data, num_classes) return labels, rois, roi_scores, bbox_targets, bbox_inside_weights,\ poly_targets, poly_inside_weights
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):#, pose_a, pose_e): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] poses_a = gt_boxes[gt_assignment, 5] poses_e = gt_boxes[gt_assignment, 6] poses_t = gt_boxes[gt_assignment, 7] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] poses_a = poses_a[keep_inds] poses_e = poses_e[keep_inds] poses_t = poses_t[keep_inds] #for p in xrange(int(fg_rois_per_this_image)): # labels[p] = (labels[p]-1) * 24 + poses_a[p]+1 # Clamp labels for the background RoIs to 0 labels[fg_rois_per_this_image:] = 0 poses_a[fg_rois_per_this_image:] = -1 poses_e[fg_rois_per_this_image:] = -1 poses_t[fg_rois_per_this_image:] = -1 rois = all_rois[keep_inds] print zip(labels,poses_a) #pose_a, pose_e = _get_pose_labels(pose_a, pose_e, len(rois), int(fg_rois_per_this_image)) bbox_target_data = _compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) bbox_targets, bbox_inside_weights = _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, bbox_targets, bbox_inside_weights, poses_a, poses_e, poses_t
def calc_precision_recall(all_boxes, imdb): res_num = {'tp': 0, 'gt': 0, 'det': 0, 'bad_case': 0} # save bad case result bad_case_output_dir = os.path.join(cfg.ROOT_DIR, 'data', 'bad_case_'+imdb.name) if not os.path.exists(bad_case_output_dir): os.makedirs(bad_case_output_dir) else: for f in os.listdir(bad_case_output_dir): os.remove(os.path.join(bad_case_output_dir, f)) gt_roidb = imdb.roidb outside_pad = 10 bounding = lambda box, gt_box: np.all((box[:2] <= gt_box[:2] + outside_pad) & (box[2:] >= gt_box[2:] - outside_pad)) for im_i, boxes in enumerate(all_boxes): gt_boxes = gt_roidb[im_i]['boxes'] gt_overlaps = bbox_overlaps(boxes[:,:-1].astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) """ maxes = gt_overlaps.max(axis=1) tp_inds = np.where(maxes >= 0.7)[0] """ tp_inds = np.zeros((argmaxes.shape[0]), dtype=bool) for box_i, box in enumerate(boxes): if bounding(box[:-1], gt_boxes[argmaxes[box_i]]): tp_inds[box_i] = True tp_argmaxes = argmaxes[tp_inds] tp_argmaxes = np.unique(tp_argmaxes) tp_num = tp_argmaxes.size res_num['tp'] = res_num['tp'] + tp_num res_num['gt'] = res_num['gt'] + len(gt_boxes) res_num['det'] = res_num['det'] + len(boxes) if tp_num != len(boxes) or tp_num != len(gt_boxes): res_num['bad_case'] = res_num['bad_case'] + 1 img_path = imdb.image_path_at(im_i) im = cv2.imread(img_path) bad_name = os.path.splitext(os.path.basename(img_path))[0] res_im_file = os.path.join(bad_case_output_dir, '{:s}.jpg'.format(bad_name)) save_detection_res(im, res_im_file, boxes, gt_boxes) print 'images: {:d}/{:d} !!! BAD CASE'.format(im_i, len(all_boxes)) else: print 'images: {:d}/{:d}'.format(im_i, len(all_boxes)) print '=' * 20 print 'final bad case number: {:d}'.format(res_num['bad_case']) print 'final precision: {:.3f}, recall: {:.3f}.'.format( float(res_num['tp'])/float(res_num['det']), float(res_num['tp'])/float(res_num['gt'])) print '=' * 20
def compare(name,dets,thresh): suppressed=nms_proposal(dets,thresh) gt = nms_gt(name) overlaps = bbox_overlaps( np.ascontiguousarray(dets[:,0:4], dtype=np.float), np.ascontiguousarray(gt, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(overlaps)), argmax_overlaps] site = np.where(max_overlaps > 0.5) gt_sup=np.array(suppressed) gt_sup[site]=(argmax_overlaps[site]+1)*-1 return suppressed,gt_sup
def _sample_output(self, all_rois, gt_boxes, im_scale, gt_masks, mask_info): overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Sample foreground indexes fg_inds = np.where(max_overlaps >= cfg.TRAIN.BBOX_THRESH)[0] bg_inds = np.where(max_overlaps < cfg.TRAIN.BBOX_THRESH)[0] keep_inds = np.append(fg_inds, bg_inds).astype(int) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[len(fg_inds):] = 0 rois = all_rois[keep_inds] bbox_target_data = bbox_compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], normalize=True) bbox_target_data = np.hstack((labels[:, np.newaxis], bbox_target_data))\ .astype(np.float32, copy=False) bbox_targets, bbox_inside_weights = get_bbox_regression_label( bbox_target_data, 21) scaled_rois = rois[:, 1:5] / float(im_scale) scaled_gt_boxes = gt_boxes[:, :4] / float(im_scale) pos_masks = np.zeros((len(keep_inds), 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) top_mask_info = np.zeros((len(keep_inds), 12)) top_mask_info[len(fg_inds):, :] = -1 for i, val in enumerate(fg_inds): gt_box = scaled_gt_boxes[gt_assignment[val]] gt_box = np.around(gt_box).astype(int) ex_box = np.around(scaled_rois[i]).astype(int) gt_mask = gt_masks[gt_assignment[val]] gt_mask_info = mask_info[gt_assignment[val]] gt_mask = gt_mask[0:gt_mask_info[0], 0:gt_mask_info[1]] # regression targets is the intersection of bounding box and gt mask ex_mask = intersect_mask(ex_box, gt_box, gt_mask) pos_masks[i, ...] = ex_mask top_mask_info[i, 0] = gt_assignment[val] top_mask_info[i, 1] = gt_mask_info[0] top_mask_info[i, 2] = gt_mask_info[1] top_mask_info[i, 3] = labels[i] top_mask_info[i, 4:8] = ex_box top_mask_info[i, 8:12] = gt_box return labels, rois, fg_inds, keep_inds, pos_masks, top_mask_info, bbox_targets, bbox_inside_weights
def _matched_information(self, curr_retrieved, curr_gt): is_matched = np.zeros((len(self.all_matched_threshold), len(curr_retrieved)), dtype = np.bool) if len(curr_retrieved) == 0 or len(curr_gt) == 0: return is_matched else: gt_overlaps = bbox_overlaps(curr_retrieved.astype(np.float), curr_gt.astype(np.float)) matched_idx = gt_overlaps.argmax(axis = 1) for k in range(len(self.all_matched_threshold)): matched_threshold = self.all_matched_threshold[k] gt_used = np.zeros(len(curr_gt), dtype = np.bool) for i, j in enumerate(matched_idx): if gt_overlaps[i, j] >= matched_threshold and \ gt_used[j] == False: gt_used[j] = True is_matched[k, i] = True return is_matched
def evaluate_recall(self, candidate_boxes=None, ar_thresh=0.5): # Record max overlap value for each gt box # Return vector of overlap values gt_overlaps = np.zeros(0) for i in xrange(self.num_images): gt_inds = np.where(self.roidb[i]['gt_classes'] > 0)[0] gt_boxes = self.roidb[i]['boxes'][gt_inds, :] if candidate_boxes is None: non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0] boxes = self.roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] if boxes.shape[0] == 0: continue overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) # gt_overlaps = np.hstack((gt_overlaps, overlaps.max(axis=0))) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in xrange(gt_boxes.shape[0]): argmax_overlaps = overlaps.argmax(axis=0) max_overlaps = overlaps.max(axis=0) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert(gt_ovr >= 0) box_ind = argmax_overlaps[gt_ind] _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert(_gt_overlaps[j] == gt_ovr) overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) num_pos = gt_overlaps.size gt_overlaps = np.sort(gt_overlaps) step = 0.001 thresholds = np.minimum(np.arange(0.5, 1.0 + step, step), 1.0) recalls = np.zeros_like(thresholds) for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) ar = 2 * np.trapz(recalls, thresholds) return ar, gt_overlaps, recalls, thresholds
def create_roidb_from_box_list(self, box_list, gt_roidb): assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] for i in xrange(self.num_images): boxes = box_list[i] #debug Brian image=cv2.imread(self.image_path_at(i)) width = image.shape[1] for box in boxes: assert box[2]<width num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] if not gt_boxes.shape[0]==0: gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] #is same-working debug Brian #print 'DEBUG overlaps' #for ii in range(0,overlaps.shape[0]): # s='' # for jj in range(0,overlaps.shape[1]): # s+=str(overlaps[ii,jj])+', ' # print s #assert False overlaps = scipy.sparse.csr_matrix(overlaps) #print overlaps roidb.append({'boxes' : boxes, 'gt_classes' : np.zeros((num_boxes,), dtype=np.int32), 'gt_overlaps' : overlaps, 'flipped' : False}) return roidb
def create_roidb_from_box_list(self, box_list, gt_roidb): ''' print '=====' print len(box_list) print self.num_images print '=====' ''' assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] for i in xrange(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] overlaps = scipy.sparse.csr_matrix(overlaps) #if i==823: #print '-===-=-=-==--===823-=-=' #print boxes #print overlaps #exit() roidb.append({'boxes' : boxes, 'gt_classes' : np.zeros((num_boxes,), dtype=np.int32), 'gt_overlaps' : overlaps, 'flipped' : False}) print 'roidb size = %d'%(len(roidb)) #print roidb[823] return roidb
def evalCorLoc2(imdb,nms_dets,overlap=0.5): num_classes = len(nms_dets) num_images = len(nms_dets[0]) gt = imdb.gt_roidb() pos = np.zeros(imdb.num_classes) tot = np.zeros(imdb.num_classes) for cls_ind in xrange(num_classes): for im_ind in xrange(num_images): dets = nms_dets[cls_ind][im_ind] if dets == []: continue if np.all(gt[im_ind]['gt_classes']!=cls_ind): continue sel = gt[im_ind]['gt_classes'] == cls_ind gtdet = (gt[im_ind]['boxes'][sel]).astype(np.float, copy=False) dets = dets.astype(np.float, copy=False) ovr = bbox_overlaps(gtdet,dets) tot[cls_ind] += gtdet.shape[0] pos[cls_ind] += np.sum(ovr.max(1)>overlap) corloc = pos[1:]/tot[1:] return corloc
def create_roidb_from_box_list(self, box_list, gt_roidb, weight_list=None): assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] for i in xrange(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) gt_boxes = [] if gt_roidb is not None: gt_boxes = gt_roidb[i]['boxes'] #Need at least one box for argmax if gt_boxes.shape[0] > 0: gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] weight = None if weight_list is not None: weight = weight_list[i] assert weight.shape[0] == num_boxes, 'weight num should be same as boxes num' else: print 'weight is None\n' overlaps = scipy.sparse.csr_matrix(overlaps) roidb.append({'boxes' : boxes, 'gt_boxes' : gt_boxes, 'gt_classes' : np.zeros((num_boxes,), dtype=np.int32), 'gt_overlaps' : overlaps, 'flipped' : False, 'weight' : weight}) return roidb
def evalCorLoc(imdb,nms_dets,overlap=0.5): num_classes = len(nms_dets) num_images = len(nms_dets[0]) gt = imdb.gt_roidb() pos = np.zeros(imdb.num_classes) tot = np.zeros(imdb.num_classes) for cls_ind in xrange(1,num_classes): for im_ind in xrange(num_images): dets = nms_dets[cls_ind][im_ind] if dets == []: print "Error, no detections!" dfsd continue if np.all(gt[im_ind]['gt_classes']!=cls_ind): continue sel = gt[im_ind]['gt_classes'] == cls_ind gtdet = (gt[im_ind]['boxes'][sel]).astype(np.float, copy=False) dets = dets.astype(np.float, copy=False) ovr = bbox_overlaps(gtdet,dets[:1]) tot[cls_ind] += 1#gtdet.shape[0] pos[cls_ind] += ovr.max()>=overlap #> or >= corloc = pos[1:]/tot[1:] return corloc
def fixMatOverlap((mat_overlap_file,gt_file,im_file,out_file,idx)): print idx; im=scipy.misc.imread(im_file); gt_boxes=np.load(gt_file); gt_boxes=np.array([psr.convertBBoxFormatToStandard(gt_box) for gt_box in gt_boxes]); mat_info=np.load(mat_overlap_file); pred_scores = mat_info['pred_scores'] gt_boxes_size = mat_info['gt_boxes_size'] mat_overlap = mat_info['mat_overlap'] pred_boxes = mat_info['pred_boxes'] # print mat_info.keys(); pred_boxes=mat_info['pred_boxes']; min_arr=np.zeros((pred_boxes.shape[0],2)); min_arr[:,0]=pred_boxes[:,1]; pred_boxes[:,1]=np.max(min_arr,axis=1); min_arr=np.zeros((pred_boxes.shape[0],2)); min_arr[:,0]=pred_boxes[:,0]; pred_boxes[:,0]=np.max(min_arr,axis=1); max_r=im.shape[0]*np.ones((pred_boxes.shape[0],2)); max_r[:,0]=pred_boxes[:,2]; pred_boxes[:,2]=np.min(max_r,axis=1); max_r=im.shape[1]*np.ones((pred_boxes.shape[0],2)); max_r[:,0]=pred_boxes[:,3]; pred_boxes[:,3]=np.min(max_r,axis=1); # mat_overlap_new=psr.getMatOverlap(pred_boxes,gt_boxes) mat_overlap_new=cython_bbox.bbox_overlaps(np.array(pred_boxes,dtype=np.float),np.array(gt_boxes,dtype=np.float)); np.savez(out_file,pred_scores = pred_scores,gt_boxes_size = gt_boxes_size,mat_overlap = mat_overlap_new,pred_boxes = pred_boxes)
def bbox_vote(dets_NMS, dets_all, thresh=0.5): dets_voted = np.zeros_like(dets_NMS) # Empty matrix with the same shape and type _overlaps = bbox_overlaps( np.ascontiguousarray(dets_NMS[:, 0:4], dtype=np.float), np.ascontiguousarray(dets_all[:, 0:4], dtype=np.float)) # for each survived box for i, det in enumerate(dets_NMS): dets_overlapped = dets_all[np.where(_overlaps[i, :] >= thresh)[0]] assert(len(dets_overlapped) > 0) boxes = dets_overlapped[:, 0:4] scores = dets_overlapped[:, 4] out_box = np.dot(scores, boxes) dets_voted[i][0:4] = out_box / sum(scores) # Weighted bounding boxes dets_voted[i][4] = det[4] # Keep the original score # Weighted scores (if enabled) if cfg.TEST.BBOX_VOTE_N_WEIGHTED_SCORE > 1: n_agreement = cfg.TEST.BBOX_VOTE_N_WEIGHTED_SCORE w_empty = cfg.TEST.BBOX_VOTE_WEIGHT_EMPTY n_detected = len(scores) if n_detected >= n_agreement: top_scores = -np.sort(-scores)[:n_agreement] new_score = np.average(top_scores) else: new_score = np.average(scores) * (n_detected * 1.0 + (n_agreement - n_detected) * w_empty) / n_agreement dets_voted[i][4] = min(new_score, dets_voted[i][4]) return dets_voted
def prepare_roidb(imdb): """Enrich the imdb's roidb by adding some derived quantities that are useful for training. This function precomputes the maximum overlap, taken over ground-truth boxes, between each ROI and each ground-truth box. The class with maximum overlap is also recorded. """ cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: imdb._roidb = cPickle.load(fid) print '{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file) return roidb = imdb.roidb for i in xrange(len(imdb.image_index)): roidb[i]['image'] = imdb.image_path_at(i) boxes = roidb[i]['boxes'] labels = roidb[i]['gt_classes'] info_boxes = np.zeros((0, 18), dtype=np.float32) if boxes.shape[0] == 0: roidb[i]['info_boxes'] = info_boxes continue # compute grid boxes s = PIL.Image.open(imdb.image_path_at(i)).size image_height = s[1] image_width = s[0] boxes_grid, cx, cy = get_boxes_grid(image_height, image_width) # for each scale for scale_ind, scale in enumerate(cfg.TRAIN.SCALES): boxes_rescaled = boxes * scale # compute overlap overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float)) max_overlaps = overlaps.max(axis = 1) argmax_overlaps = overlaps.argmax(axis = 1) max_classes = labels[argmax_overlaps] # select positive boxes fg_inds = [] for k in xrange(1, imdb.num_classes): fg_inds.extend(np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0]) if len(fg_inds) > 0: gt_inds = argmax_overlaps[fg_inds] # bounding box regression targets gt_targets = _compute_targets(boxes_grid[fg_inds,:], boxes_rescaled[gt_inds,:]) # scale mapping for RoI pooling scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind] scale_map = cfg.TRAIN.SCALES[scale_ind_map] # contruct the list of positive boxes # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target) info_box = np.zeros((len(fg_inds), 18), dtype=np.float32) info_box[:, 0] = cx[fg_inds] info_box[:, 1] = cy[fg_inds] info_box[:, 2] = scale_ind info_box[:, 3:7] = boxes_grid[fg_inds,:] info_box[:, 7] = scale_ind_map info_box[:, 8:12] = boxes_grid[fg_inds,:] * scale_map / scale info_box[:, 12] = labels[gt_inds] info_box[:, 14:] = gt_targets info_boxes = np.vstack((info_boxes, info_box)) roidb[i]['info_boxes'] = info_boxes with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote gt roidb prepared to {}'.format(cache_file)
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes, bg_aux_label): """Generate a random sample of RoIs comprising foreground and background examples """ # Remove boxes that overlaps with ignored gt boxes ignored_mask = gt_boxes[:, 3] < 0 gt_ignored_boxes = gt_boxes[ignored_mask, :] gt_boxes = gt_boxes[np.logical_not(ignored_mask), :] if len(gt_ignored_boxes): ignored_overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_ignored_boxes[:, :4], dtype=np.float)) max_ignored_overlaps = ignored_overlaps.max(axis=1) all_rois = all_rois[max_ignored_overlaps < 0.4, :] # FIXME: Remove this hardcoded constant # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.array(np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0], dtype=int) # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size)) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image NEAR_FRACTION = 0.2 bg_near_cnt = int(np.floor(bg_rois_per_this_image * NEAR_FRACTION)) bg_near_inds = np.array( np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0], dtype=int) bg_near_cnt = min(bg_near_cnt, bg_near_inds.size) if bg_near_inds.size > 0: bg_near_inds = npr.choice(bg_near_inds, size=bg_near_cnt, replace=False) bg_far_cnt = bg_rois_per_this_image - bg_near_cnt bg_far_inds = np.array((np.where(max_overlaps < 0.01)[0])[:300], dtype=int) bg_far_cnt = int(min(bg_far_cnt, bg_far_inds.size)) bg_far_inds = npr.choice(bg_far_inds, size=bg_far_cnt, replace=False) bg_inds = np.append(bg_near_inds, bg_far_inds) # bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # # Sample background regions without replacement # if bg_inds.size > 0: # bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[fg_rois_per_this_image:] = 0 rois = all_rois[keep_inds] # keep2 = nms(np.hstack((rois, np.linspace(1, 0, len(rois), dtype=np.float32).reshape(-1, 1))), 0.5) # Auxiliary label if available aux_label = None if gt_boxes.shape[1] > 5: aux_label = gt_boxes[gt_assignment, 5] aux_label = aux_label[keep_inds] aux_label[fg_rois_per_this_image:] = bg_aux_label bbox_target_data = _compute_targets(rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, bbox_targets, bbox_inside_weights, aux_label
def forward(self, bottom, top): # prep incoming data========== rpn_boxes = bottom[0].data.copy() bbox_pred = bottom[1].data scores = bottom[2].data im_info = bottom[3].data[0] im_idx = int(bottom[4].data) im_data = bottom[5].data[0, :, :, :].transpose((1, 2, 0)).copy() m = self.meta im_id = self._image_id[im_idx] r_anno = self.r_anno[im_id] # prep done============ # prep blobs for forward blobs = {} s_classeme = [] s_rois = [] s_rois_encoded = [] o_classeme = [] o_rois = [] o_rois_encoded = [] relation_label = [] gt_boxes = [] if hasattr(r_anno, 'relationship'): rpn_boxes_img_coor = rpn_boxes[:, 1:5] / im_info[2] boxes = rpn_boxes_img_coor boxes = bbox_transform_inv(boxes, bbox_pred) boxes = clip_boxes(boxes, (im_info[0] / im_info[2], im_info[1] / im_info[2])) cv2.normalize(im_data, im_data, 255, 0, cv2.NORM_MINMAX) im_data = im_data.astype(np.uint8) origsz = (im_info[1] / im_info[2], im_info[0] / im_info[2]) im_data = cv2.resize(im_data, origsz) thresh_final = .5 res_locations = [] res_classemes = [] res_cls_confs = [] boxes_tosort = [] for j in xrange(1, 101): inds = np.where(scores[:, j] > .3)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], inds[:, np.newaxis])) \ .astype(np.float32, copy=False) # pred_boxes = clip_boxes(pred_boxes, im.shape) if len(cls_scores) <= 0: boxes_tosort.append(cls_dets) continue res_loc = np.hstack((cls_boxes, inds[:, np.newaxis])) res_classeme = scores[inds] res_cls_conf = np.column_stack((np.zeros(cls_scores.shape[0]) + j, cls_scores)) keep = nms(cls_dets[:,:5], .3) # nms threshold cls_dets = cls_dets[keep, :] res_loc = res_loc[keep] res_classeme = res_classeme[keep] res_cls_conf = res_cls_conf[keep] res_classemes.extend(res_classeme) res_locations.extend(res_loc) res_cls_confs.extend(res_cls_conf) boxes_tosort.append(cls_dets) try: # final class confidence inds = np.where(np.array(res_cls_confs)[:, 1] > thresh_final)[0] classemes = np.array(res_classemes)[inds] locations = np.array(res_locations)[inds] cls_confs = np.array(res_cls_confs)[inds] # decide what to pass to top # limit max w, h = self.meta['train/' + im_id + '/w'][...], self.meta['train/' + im_id + '/h'][...] if not isinstance(r_anno.relationship, np.ndarray): r_anno.relationship = [r_anno.relationship] for r in xrange(len(r_anno.relationship)): if not hasattr(r_anno.relationship[r], 'phrase'): continue predicate = r_anno.relationship[r].phrase[1] ymin, ymax, xmin, xmax = r_anno.relationship[r].subBox sub_bbox = [xmin, ymin, xmax, ymax] gt_boxes.append(sub_bbox) ymin, ymax, xmin, xmax = r_anno.relationship[r].objBox obj_bbox = [xmin, ymin, xmax, ymax] gt_boxes.append(obj_bbox) overlaps = bbox_overlaps( np.ascontiguousarray([sub_bbox, obj_bbox], dtype=np.float), np.ascontiguousarray(locations, dtype=np.float)) if overlaps.shape[0] == 0: continue sub_sorted = overlaps[0].argsort()[-40:][::-1] obj_sorted = overlaps[1].argsort()[-40:][::-1] while len(sub_sorted) > 0 and overlaps[0][sub_sorted[-1]] < .6: sub_sorted = sub_sorted[:-1] while len(obj_sorted) > 0 and overlaps[1][obj_sorted[-1]] < .6: obj_sorted = obj_sorted[:-1] if len(sub_sorted) <= 0 or len(obj_sorted) <= 0: continue cnt = 0 for s in sub_sorted[:1]: # sub_idx: for o in obj_sorted[:1]: # obj_idx: if s != o and cnt < 20: sub_clsmemes = classemes[s] obj_clsmemes = classemes[o] sub_box_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([locations[s]]))[0] obj_box_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([locations[o]]))[0] relation = self.meta['meta/pre/name2idx/' + predicate][...] # all done, now we put forward s_classeme.append(sub_clsmemes) o_classeme.append(obj_clsmemes) s_rois.append(rpn_boxes[locations[s][-1]]) o_rois.append(rpn_boxes[locations[o][-1]]) s_rois_encoded.append(sub_box_encoded) o_rois_encoded.append(obj_box_encoded) relation_label.append(np.float32(relation)) cnt += 1 # final step copy all the stuff for forward blobs['s_classeme'] = np.array(s_classeme) blobs['o_classeme'] = np.array(o_classeme) blobs['s_rois'] = np.array(s_rois) blobs['o_rois'] = np.array(o_rois) blobs['s_rois_encoded'] = np.array(s_rois_encoded) blobs['o_rois_encoded'] = np.array(o_rois_encoded) blobs['relation_label'] = np.array(relation_label) except: blobs = self._prev_blob if blobs['s_classeme'].shape[0] == 0: blobs = self._prev_blob else: blobs = self._prev_blob visualize_gt(im_data,gt_boxes) visualize(im_data, boxes_tosort, rpn_boxes_img_coor, m,thresh_final) for blob_name, blob in blobs.iteritems(): top_ind = self._name_to_top_map[blob_name] # Reshape net's input blobs top[top_ind].reshape(*(blob.shape)) # Copy data into net's input blobs top[top_ind].data[...] = blob.astype(np.float32, copy=False) # this becomes a dummy for forward in case things fail if blobs['relation_label'][0] != -1: for blob_name, blob in blobs.iteritems(): blobs[blob_name] = blob[0, np.newaxis] if blob_name == 'relation_label': blobs[blob_name][...] = -1 self._prev_blob = blobs
def imdb_rpn_compute_stats(net, imdb, anchor_scales=(8, 16, 32), feature_stride=16): raw_anchors = generate_anchors(scales=np.array(anchor_scales)) print(raw_anchors.shape) sums = 0 squred_sums = 0 counts = 0 roidb = filter_roidb(imdb.roidb) # Compute a map of input image size and output feature map blob map_w = {} map_h = {} for i in range(50, cfg.TRAIN.MAX_SIZE + 10): blobs = { 'data': np.zeros((1, 3, i, i)), 'im_info': np.asarray([[i, i, 1.0]]) } net.blobs['data'].reshape(*(blobs['data'].shape)) net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) blobs_out = net.forward(data=blobs['data'].astype(np.float32, copy=False), im_info=blobs['im_info'].astype(np.float32, copy=False)) height, width = net.blobs['rpn/output'].data.shape[-2:] map_w[i] = width map_h[i] = height for i in range(len(roidb)): if not i % 5000: print('computing %d/%d' % (i, imdb.num_images)) im = cv2.imread(roidb[i]['image']) im_data, im_info = _get_image_blob(im) gt_boxes = roidb[i]['boxes'] gt_boxes = gt_boxes * im_info[0, 2] height = map_h[im_data.shape[2]] width = map_w[im_data.shape[3]] # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feature_stride shift_y = np.arange(0, height) * feature_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = raw_anchors.shape[0] K = shifts.shape[0] all_anchors = (raw_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0) & (all_anchors[:, 2] < im_info[0, 1]) & # width (all_anchors[:, 3] < im_info[0, 0]) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) # There are 2 types of bbox targets # 1. anchor whose overlaps with gt is greater than RPN_POSITIVE_OVERLAP argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] fg_inds = np.where(max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP)[0] # 2. anchors which best match certain gt gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] fg_inds = np.unique(np.hstack((fg_inds, gt_argmax_overlaps))) gt_rois = gt_boxes[argmax_overlaps, :] anchors = anchors[fg_inds, :] gt_rois = gt_rois[fg_inds, :] targets = bbox_transform(anchors, gt_rois[:, :4]).astype(np.float32, copy=False) sums += targets.sum(axis=0) squred_sums += (targets**2).sum(axis=0) counts += targets.shape[0] means = sums / counts stds = np.sqrt(squred_sums / counts - means**2) print(means) print(stds) return means, stds
def get_label(anchor_list, gt_box, image_raw_size, batch_size): #class label over_lap_matrix = np.zeros([len(anchor_list), len(gt_box)]) label = np.zeros(len(anchor_list)) label.fill(-1) inside_idx = np.where((anchor_list[:, 0] >= 0) & (anchor_list[:, 1] >= 0) & (anchor_list[:, 2] < image_raw_size[1]) & (anchor_list[:, 3] < image_raw_size[0]))[0] over_lap_matrix = bbox_overlaps( np.ascontiguousarray(anchor_list, dtype=np.float), np.ascontiguousarray(gt_box, dtype=np.float)) anchor_max_idx = over_lap_matrix.argmax(axis=1) over_lap_max = over_lap_matrix[np.arange(len(anchor_list)), anchor_max_idx] label[over_lap_max >= RPN_POSITIVE_OVERLAP] = 1 label[(over_lap_max < RPN_NEGATIVE_OVERLAP)] = 0 for i in range(len(anchor_list)): if i not in inside_idx: label[i] = -1 ''' for i, anchor in enumerate(anchor_list): max_area = -1.0 index = -1 for j,box in enumerate(gt_box): if in_image(anchor, image_raw_size): area = over_lap(anchor,box) if max_area < area: max_area = area index = i over_lap_matrix[i,j] = area if index >-1: if max_area >= RPN_POSITIVE_OVERLAP: label[i] = 1 elif max_area < RPN_NEGATIVE_OVERLAP: label[i] = 0 inside_index.append(i) ''' gt_max_index = over_lap_matrix.argmax(axis=0) gt_max = over_lap_matrix[gt_max_index, np.arange(over_lap_matrix.shape[1])] gt_max_index = np.where(over_lap_matrix == gt_max)[0] label[gt_max_index] = 1 ''' for j,box in enumerate(gt_box): max_area = 0.0 index = -1 for i, anchor in enumerate(anchor_list): area = over_lap_matrix[i,j] if max_area < area: max_area = area index = i if index >-1: for i in range(len(anchor_list)): if over_lap_matrix[i,j] == max_area: label[i] = 1 ''' fg_num = int(RPN_FG_FACTOR * batch_size) fg_index = np.where(label == 1)[0] if len(fg_index) > fg_num: remove_index = np.random.choice(fg_index, size=(len(fg_index) - fg_num), replace=False) label[remove_index] = -1 bg_num = batch_size - np.sum(label == 1) bg_index = np.where(label == 0)[0] if len(bg_index) > bg_num: remove_index = np.random.choice(bg_index, size=(len(bg_index) - bg_num), replace=False) label[remove_index] = -1 in_weight = np.zeros([len(anchor_list), 4], dtype=np.float32) out_weight = np.zeros([len(anchor_list), 4], dtype=np.float32) #bbox label dx = np.zeros(len(anchor_list)) dy = np.zeros(len(anchor_list)) dw = np.zeros(len(anchor_list)) dh = np.zeros(len(anchor_list)) ws = anchor_list[inside_idx, 2] - anchor_list[inside_idx, 0] + 1.0 hs = anchor_list[inside_idx, 3] - anchor_list[inside_idx, 1] + 1.0 center_xs = anchor_list[inside_idx, 0] + ws / 2 center_ys = anchor_list[inside_idx, 1] + hs / 2 gt_target = gt_box[anchor_max_idx] target_w = gt_target[inside_idx, 2] - gt_target[inside_idx, 0] + 1.0 target_h = gt_target[inside_idx, 3] - gt_target[inside_idx, 1] + 1.0 target_center_x = gt_target[inside_idx, 0] + target_w / 2.0 target_center_y = gt_target[inside_idx, 1] + target_h / 2.0 dx[inside_idx] = (target_center_x - center_xs) / ws dy[inside_idx] = (target_center_y - center_ys) / hs dw[inside_idx] = np.log(target_w / ws) dh[inside_idx] = np.log(target_h / hs) num_examples = np.sum(label >= 0) in_weight[label == 1] = [1.0] * 4 out_weight[label == 1] = [1.0 / num_examples] * 4 out_weight[label == 0] = [1.0 / num_examples] * 4 ''' for i, anchor in enumerate(anchor_list): w = anchor[2]-anchor[0]+1.0 h = anchor[3]-anchor[1]+1.0 center_x = anchor[0] + w/2 center_y = anchor[1] + h/2 max_gt = over_lap_matrix[i].argmax() if in_image(anchor, image_raw_size): target = gt_box[max_gt] target_w = target[2]-target[0]+1.0 target_h = target[3]-target[1]+1.0 target_center_x = target[0] + target_w/2 target_center_y = target[1] + target_h/2 dx[i] = (target_center_x-center_x)/w dy[i] = (target_center_y-center_y)/h dw[i] = np.log(target_w/w) dh[i] = np.log(target_h/h) if label[i]==1: in_weight[i] = [1.0]*4 out_weight[i] = [1.0/num_examples]*4 if label[i]==0: out_weight[i] = [1.0/num_examples]*4 ''' bbox_target = np.vstack((dx, dy, dw, dh)).transpose() return label, bbox_target, in_weight, out_weight
for i in xrange(len(imdb.image_index)): # Load gt boxes gt_inds = np.where(roidb[i]['gt_classes'] >= 0)[0] gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) gt_boxes[:, 0:4] = roidb[i]['boxes'][gt_inds, :] * im_scale gt_boxes[:, 4] = roidb[i]['gt_classes'][gt_inds] # label: 1 is positive, 0 is negative, -1 is dont care #labels = np.empty((len(inds_inside), ), dtype=np.float32) labels = np.empty((total_anchors, ), dtype=np.float32) labels.fill(-1) # Computer overlap overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) # gt index #max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] max_overlaps = overlaps[np.arange(total_anchors), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) # anchor index gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # bg label: assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap
def _load_pascal3d_voxel_exemplar_annotation(self, index): """ Load image and bounding boxes info from txt file in the pascal subcategory exemplar format. """ if self._image_set == 'val': return self._load_pascal_annotation(index) filename = os.path.join(self._pascal3d_path, cfg.SUBCLS_NAME, index + '.txt') assert os.path.exists(filename), \ 'Path does not exist: {}'.format(filename) # the annotation file contains flipped objects lines = [] lines_flipped = [] with open(filename) as f: for line in f: words = line.split() subcls = int(words[1]) is_flip = int(words[2]) if subcls != -1: if is_flip == 0: lines.append(line) else: lines_flipped.append(line) num_objs = len(lines) # store information of flipped objects assert (num_objs == len(lines_flipped)), 'The number of flipped objects is not the same!' gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) for ix, line in enumerate(lines_flipped): words = line.split() subcls = int(words[1]) gt_subclasses_flipped[ix] = subcls boxes = np.zeros((num_objs, 4), dtype=np.float32) gt_classes = np.zeros((num_objs), dtype=np.int32) gt_subclasses = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) for ix, line in enumerate(lines): words = line.split() cls = self._class_to_ind[words[0]] subcls = int(words[1]) # Make pixel indexes 0-based boxes[ix, :] = [float(n)-1 for n in words[3:7]] gt_classes[ix] = cls gt_subclasses[ix] = subcls overlaps[ix, cls] = 1.0 subindexes[ix, cls] = subcls subindexes_flipped[ix, cls] = gt_subclasses_flipped[ix] overlaps = scipy.sparse.csr_matrix(overlaps) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes_all == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal base_size = 16 ratios = [3.0, 2.0, 1.5, 1.0, 0.75, 0.5, 0.25] scales = 2**np.arange(1, 6, 0.5) anchors = generate_anchors(base_size, ratios, scales) num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0]) return {'boxes' : boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped' : False}
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, anchor_scales, anchor_ratios): """Same as the anchor target layer in original Fast/er RCNN """ scales = np.array(anchor_scales) ratios = np.array(anchor_ratios) num_anchors = scales.shape[0] * ratios.shape[0] A = num_anchors total_anchors = all_anchors.shape[0] K = total_anchors / num_anchors im_info = im_info[0] # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them # first set the negatives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def pose_target_layer(rois, bbox_prob, bbox_pred, gt_boxes, poses, is_training): rois = rois.detach().cpu().numpy() bbox_prob = bbox_prob.detach().cpu().numpy() bbox_pred = bbox_pred.detach().cpu().numpy() gt_boxes = gt_boxes.detach().cpu().numpy() num_classes = bbox_prob.shape[1] # process boxes if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes)) means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes)) bbox_pred *= stds bbox_pred += means boxes = rois[:, 2:6].copy() pred_boxes = bbox_transform_inv(boxes, bbox_pred) # assign boxes for i in range(rois.shape[0]): cls = int(rois[i, 1]) rois[i, 2:6] = pred_boxes[i, cls * 4:cls * 4 + 4] rois[i, 6] = bbox_prob[i, cls] # convert boxes to (batch_ids, x1, y1, x2, y2, cls) roi_blob = rois[:, (0, 2, 3, 4, 5, 1)] gt_box_blob = np.zeros((0, 6), dtype=np.float32) pose_blob = np.zeros((0, 9), dtype=np.float32) for i in range(gt_boxes.shape[0]): for j in range(gt_boxes.shape[1]): if gt_boxes[i, j, -1] > 0: gt_box = np.zeros((1, 6), dtype=np.float32) gt_box[0, 0] = i gt_box[0, 1:5] = gt_boxes[i, j, :4] gt_box[0, 5] = gt_boxes[i, j, 4] gt_box_blob = np.concatenate((gt_box_blob, gt_box), axis=0) poses[i, j, 0] = i pose_blob = np.concatenate( (pose_blob, poses[i, j, :].cpu().reshape(1, 9)), axis=0) if gt_box_blob.shape[0] == 0: num = rois.shape[0] poses_target = np.zeros((num, 4 * num_classes), dtype=np.float32) poses_weight = np.zeros((num, 4 * num_classes), dtype=np.float32) else: # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(roi_blob[:, :5], dtype=np.float), np.ascontiguousarray(gt_box_blob[:, :5], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_box_blob[gt_assignment, 5] quaternions = pose_blob[gt_assignment, 2:6] # Select foreground RoIs as those with >= FG_THRESH overlap bg_inds = np.where(max_overlaps < cfg.TRAIN.FG_THRESH_POSE)[0] labels[bg_inds] = 0 bg_inds = np.where(roi_blob[:, -1] != labels)[0] labels[bg_inds] = 0 # in training, only use the positive boxes for pose regression if is_training: fg_inds = np.where(labels > 0)[0] if len(fg_inds) > 0: rois = rois[fg_inds, :] quaternions = quaternions[fg_inds, :] labels = labels[fg_inds] # pose regression targets and weights poses_target, poses_weight = _compute_pose_targets( quaternions, labels, num_classes) return torch.from_numpy(rois).cuda(), torch.from_numpy( poses_target).cuda(), torch.from_numpy(poses_weight).cuda()
def create_roidb_from_box_list(self, box_list, gt_roidb): assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] for i in xrange(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) subindexes = np.zeros((num_boxes, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_boxes, self.num_classes), dtype=np.int32) if cfg.TRAIN.VIEWPOINT == True or cfg.TEST.VIEWPOINT == True: viewindexes_azimuth = np.zeros((num_boxes, self.num_classes), dtype=np.float32) viewindexes_azimuth_flipped = np.zeros( (num_boxes, self.num_classes), dtype=np.float32) viewindexes_elevation = np.zeros((num_boxes, self.num_classes), dtype=np.float32) viewindexes_elevation_flipped = np.zeros( (num_boxes, self.num_classes), dtype=np.float32) viewindexes_rotation = np.zeros((num_boxes, self.num_classes), dtype=np.float32) viewindexes_rotation_flipped = np.zeros( (num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None: gt_boxes = gt_roidb[i]['boxes'] if gt_boxes.shape[0] != 0 and num_boxes != 0: gt_classes = gt_roidb[i]['gt_classes'] gt_subclasses = gt_roidb[i]['gt_subclasses'] gt_subclasses_flipped = gt_roidb[i][ 'gt_subclasses_flipped'] if cfg.TRAIN.VIEWPOINT == True or cfg.TEST.VIEWPOINT == True: gt_viewpoints = gt_roidb[i]['gt_viewpoints'] gt_viewpoints_flipped = gt_roidb[i][ 'gt_viewpoints_flipped'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] subindexes[I, gt_classes[argmaxes[I]]] = gt_subclasses[ argmaxes[I]] subindexes_flipped[ I, gt_classes[argmaxes[I]]] = gt_subclasses_flipped[ argmaxes[I]] if cfg.TRAIN.VIEWPOINT == True or cfg.TEST.VIEWPOINT == True: viewindexes_azimuth[ I, gt_classes[argmaxes[I]]] = gt_viewpoints[ argmaxes[I], 0] viewindexes_azimuth_flipped[ I, gt_classes[argmaxes[I]]] = gt_viewpoints_flipped[ argmaxes[I], 0] viewindexes_elevation[ I, gt_classes[argmaxes[I]]] = gt_viewpoints[ argmaxes[I], 1] viewindexes_elevation_flipped[ I, gt_classes[argmaxes[I]]] = gt_viewpoints_flipped[ argmaxes[I], 1] viewindexes_rotation[ I, gt_classes[argmaxes[I]]] = gt_viewpoints[ argmaxes[I], 2] viewindexes_rotation_flipped[ I, gt_classes[argmaxes[I]]] = gt_viewpoints_flipped[ argmaxes[I], 2] overlaps = scipy.sparse.csr_matrix(overlaps) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.TRAIN.VIEWPOINT == True or cfg.TEST.VIEWPOINT == True: viewindexes_azimuth = scipy.sparse.csr_matrix( viewindexes_azimuth) viewindexes_azimuth_flipped = scipy.sparse.csr_matrix( viewindexes_azimuth_flipped) viewindexes_elevation = scipy.sparse.csr_matrix( viewindexes_elevation) viewindexes_elevation_flipped = scipy.sparse.csr_matrix( viewindexes_elevation_flipped) viewindexes_rotation = scipy.sparse.csr_matrix( viewindexes_rotation) viewindexes_rotation_flipped = scipy.sparse.csr_matrix( viewindexes_rotation_flipped) roidb.append({ 'boxes': boxes, 'gt_classes': np.zeros((num_boxes, ), dtype=np.int32), 'gt_viewpoints': np.zeros((num_boxes, 3), dtype=np.float32), 'gt_viewpoints_flipped': np.zeros((num_boxes, 3), dtype=np.float32), 'gt_viewindexes_azimuth': viewindexes_azimuth, 'gt_viewindexes_azimuth_flipped': viewindexes_azimuth_flipped, 'gt_viewindexes_elevation': viewindexes_elevation, 'gt_viewindexes_elevation_flipped': viewindexes_elevation_flipped, 'gt_viewindexes_rotation': viewindexes_rotation, 'gt_viewindexes_rotation_flipped': viewindexes_rotation_flipped, 'gt_subclasses': np.zeros((num_boxes, ), dtype=np.int32), 'gt_subclasses_flipped': np.zeros((num_boxes, ), dtype=np.int32), 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }) else: roidb.append({ 'boxes': boxes, 'gt_classes': np.zeros((num_boxes, ), dtype=np.int32), 'gt_subclasses': np.zeros((num_boxes, ), dtype=np.int32), 'gt_subclasses_flipped': np.zeros((num_boxes, ), dtype=np.int32), 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }) return roidb
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): """Same as the anchor target layer in original Fast/er RCNN """ A = num_anchors total_anchors = all_anchors.shape[0] # anchor的总数 K = total_anchors / num_anchors # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] # only keep anchors inside the image # 筛选出all_anchors中所有满足条件的anchor的索引 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) # 建立一个随机生成的数组,维度指定 labels.fill(-1) # labels中的内容用-1初始化(1:前景,0:背景,-1:忽略) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) # 计算rpn得到的anchor和groundtrue_box的重叠面积shape=(len(anchors),len(gx_boxes)) # overlaps[i][j]代表了第i个anchor与第j个gtbox的重叠面积 argmax_overlaps = overlaps.argmax(axis=1) # 返回每个anchor对应的最匹配的gt_box的编号 # axis=1:找每一行的最大值,拿出第1+1维度进行比较 max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] # 根据索引得到值 # max_overlap是满足要求的anchor的分数 gt_argmax_overlaps = overlaps.argmax(axis=0) # 取每一列的最大值,返回与每个gt_box最匹配的anchor gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] # 返回与每个gt_box最匹配的anchor gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # np.where输出overlaps中满足条件的元素的位置索引。[0]是第0维坐标 # !!特么返回的是gt_max_overlaps按照降序排列后在overlaps中位置的第多少行 # 返回每个gt_boxes对应的overlap最大的anchor的序号,降序排列 if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them # first set the negatives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # 记录anchor与gt_box的ioU值小于RPN_NEGATIVE_OVERLAP的为负样本 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # 记录anchor与gt_box的ioU值最大的为正样本 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 # 记录anchor与gt_box的ioU值大于RPN_POSITIVE_OVERLAP的为正样本 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many # 如果正样本过多,就进行采样。采样比例由RPN_FG_FRACTION和RPN_BATCHSIZE控制 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) # 0.5*256 fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # numpy.random.choice 参数size表示输出的shape, # subsample negative labels if we have too many # 如果负样本过多,就进行采样。采样比例由 num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) # anchor是所有满足条件的anchor,argmax_overlaps是每个anchor对应的最匹配的gt_box的编号 # gt_boxes是ground truth边界框 # gt_boxes[argmax_overlaps, :]是每个anchor对应ioU最大的gt_boxes的边界框, # _compute_targets返回gt框和anchor框相差的dxdydhdw bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) # RPN_BBOX_INSIDE_WEIGHTS=[1,1,1,1] bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) # 计算正样本/负样本和anchor总数的比值 bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights # rpn_bbox_inside_weights用于把是object的box过滤出来, # 因为并不是所有的anchors都是有object的。 # rpn_bbox_inside_weights用于设置标记为1的box和标记为0的box的权值比率 # rpn_bbox_targets是计算出来的dxdydhdw # rpn_labels是标签值,1,0,-1 return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def prepare_roidb(imdb): """Enrich the imdb's roidb by adding some derived quantities that are useful for training. This function precomputes the maximum overlap, taken over ground-truth boxes, between each ROI and each ground-truth box. The class with maximum overlap is also recorded. """ cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: imdb._roidb = cPickle.load(fid) print '{} gt roidb prepared loaded from {}'.format( imdb.name, cache_file) return roidb = imdb.roidb for i in xrange(len(imdb.image_index)): roidb[i]['image'] = imdb.image_path_at(i) boxes = roidb[i]['boxes'] labels = roidb[i]['gt_classes'] info_boxes = np.zeros((0, 18), dtype=np.float32) if boxes.shape[0] == 0: roidb[i]['info_boxes'] = info_boxes continue # compute grid boxes s = PIL.Image.open(imdb.image_path_at(i)).size image_height = s[1] image_width = s[0] boxes_grid, cx, cy = get_boxes_grid(image_height, image_width) # for each scale for scale_ind, scale in enumerate(cfg.TRAIN.SCALES): boxes_rescaled = boxes * scale # compute overlap overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float)) max_overlaps = overlaps.max(axis=1) argmax_overlaps = overlaps.argmax(axis=1) max_classes = labels[argmax_overlaps] # select positive boxes fg_inds = [] for k in xrange(1, imdb.num_classes): fg_inds.extend( np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0]) if len(fg_inds) > 0: gt_inds = argmax_overlaps[fg_inds] # bounding box regression targets gt_targets = _compute_targets(boxes_grid[fg_inds, :], boxes_rescaled[gt_inds, :]) # scale mapping for RoI pooling scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind] scale_map = cfg.TRAIN.SCALES[scale_ind_map] # contruct the list of positive boxes # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target) info_box = np.zeros((len(fg_inds), 18), dtype=np.float32) info_box[:, 0] = cx[fg_inds] info_box[:, 1] = cy[fg_inds] info_box[:, 2] = scale_ind info_box[:, 3:7] = boxes_grid[fg_inds, :] info_box[:, 7] = scale_ind_map info_box[:, 8:12] = boxes_grid[fg_inds, :] * scale_map / scale info_box[:, 12] = labels[gt_inds] info_box[:, 14:] = gt_targets info_boxes = np.vstack((info_boxes, info_box)) roidb[i]['info_boxes'] = info_boxes with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote gt roidb prepared to {}'.format(cache_file)
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ ''' 对ROI进行采样 ''' # overlaps: (rois x gt_boxes) # 计算rois和gt_boxes的overlaps # roi格式(0, x1, y1, x2, y2),gt_box格式(x,y,x,y,label) # 只取对应的xyxy overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) # 返回每一行最大那一列的下标,也就是rois对应overlap最大的gt_box的索引 gt_assignment = overlaps.argmax(axis=1) # 一样,只不过返回的是值 max_overlaps = overlaps.max(axis=1) # 对应最大gt_box的label labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap # max_overlaps>=0.5的记录为前景,返回的也是下标 fg_inds = np.where( max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # __C.TRAIN.FG_THRESH = 0.5 # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) # max_overlaps在[0.1,0.5]记录为背景 bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & # __C.TRAIN.BG_THRESH_HI = 0.5 (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[ 0] # __C.TRAIN.BG_THRESH_LO = 0.1 # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.size > 0 and bg_inds.size > 0: # 如果前景、背景都存在 # 下面的意思就是如果样本很多,则随机采样去除一些 # 在anchor_target_layer里面有相同的操作 fg_rois_per_image = min(fg_rois_per_image, fg_inds.size) fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False) bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.size < bg_rois_per_image bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace) elif fg_inds.size > 0: to_replace = fg_inds.size < rois_per_image fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = rois_per_image elif bg_inds.size > 0: to_replace = bg_inds.size < rois_per_image bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = 0 else: import pdb pdb.set_trace() # The indices that we're selecting (both fg and bg) # 将前景背景的下标拼起来 keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: # 提取对应的labels,相当于重新排了一下,前景在前,背景在后 labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 # 然后把背景的全部赋值为0 labels[int(fg_rois_per_image):] = 0 # 下两个提取对应的roi和得分 rois = all_rois[keep_inds] roi_scores = all_scores[keep_inds] # 用_compute_targets函数把xyxy坐标转换成delta坐标 ,也就是计算偏移量 bbox_target_data = _compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) # gt_assignment = overlaps.argmax(axis=1) # 最后bbox_target_data格式[[label,tx,ty,tw,th],[label,tx,ty,tw,th]] # 根据bbox_target_data偏移量,计算出回归的label bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = bottom[0].data.shape[-2:] # GT boxes (x1, y1, x2, y2, label) gt_boxes = bottom[1].data # im_info im_info = bottom[2].data[0, :] if DEBUG: print '' print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) print 'height, width: ({}, {})'.format(height, width) print 'rpn: gt_boxes.shape', gt_boxes.shape print 'rpn: gt_boxes', gt_boxes # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] all_anchors = (self._anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -self._allowed_border) & (all_anchors[:, 1] >= -self._allowed_border) & (all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width (all_anchors[:, 3] < im_info[0] + self._allowed_border) # height )[0] if DEBUG: print 'total_anchors', total_anchors print 'inds_inside', len(inds_inside) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors.shape', anchors.shape # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) #一维 labels.fill(-1) #全部置为初始值-1,这两步难道不能一步到位么? # overlaps between the anchors and the gt boxes # overlaps (ex, gt) # 这里的gt_boxes是真的gt?是的!这就是就是来确定anchor的正负 overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) """ 这样? gt0 gt1 gt2 gt3 ... a0 0.2 0.3 0.1 0.7 ... a1 a2 """ argmax_overlaps = overlaps.argmax(axis=1) # 取最大值索引 #取每个anchor对应的最大iou的值 max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] #上面是1-anchor对多-gt,这里是1-gt对多anchor gt_argmax_overlaps = overlaps.argmax(axis=0) # 相当于通[i,j]来对二维矩阵进行索引取值 gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] #得到(array([x0,x1,..],array([y0,y1,..])) gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] """ 注意,在默认设置中这里确定ANCHOR的正负所用的阈值并不是连续的, 例如RPN_POSITIVE_OVERLAP等于0.5,而RPN_NEGATIVE_OVERLAP等于0.3, 那么中间必然出现一段真空区域,在这个区域内的ANCHOR自然也就是-1, 会被丢弃! """ if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # 两种情况下的anchor为positive # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many # default, 0.5 * 256, 因为论文中所说的anchor的正负比例为1:1 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] # 如果fg多了就要丢弃一些 if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # 丢弃的置为-1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #print "was %s inds, disabling %s, now %s inds" % ( #len(bg_inds), len(disable_inds), np.sum(labels == 0)) bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: self._sums += bbox_targets[labels == 1, :].sum(axis=0) self._squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0) self._counts += np.sum(labels == 1) means = self._sums / self._counts stds = np.sqrt(self._squared_sums / self._counts - means**2) print 'means:' print means print 'stdevs:' print stds # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) if DEBUG: print 'rpn: max max_overlap', np.max(max_overlaps) print 'rpn: num_positive', np.sum(labels == 1) print 'rpn: num_negative', np.sum(labels == 0) self._fg_sum += np.sum(labels == 1) self._bg_sum += np.sum(labels == 0) self._count += 1 print 'rpn: num_positive avg', self._fg_sum / self._count print 'rpn: num_negative avg', self._bg_sum / self._count # labels, rpn-labels也就是从rpn的角度分出来的fg,bg labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) top[0].reshape(*labels.shape) top[0].data[...] = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) top[1].reshape(*bbox_targets.shape) top[1].data[...] = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) assert bbox_inside_weights.shape[2] == height assert bbox_inside_weights.shape[3] == width top[2].reshape(*bbox_inside_weights.shape) top[2].data[...] = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) assert bbox_outside_weights.shape[2] == height assert bbox_outside_weights.shape[3] == width top[3].reshape(*bbox_outside_weights.shape) top[3].data[...] = bbox_outside_weights
def _load_kitti_voxel_exemplar_annotation(self, index): """ Load image and bounding boxes info from txt file in the KITTI voxel exemplar format. """ if self._image_set == 'training' and self._seq_name != 'trainval': prefix = 'train' elif self._image_set == 'training': prefix = 'trainval' else: prefix = '' if prefix == '': lines = [] lines_flipped = [] else: filename = os.path.join(self._kitti_tracking_path, cfg.SUBCLS_NAME, prefix, index + '.txt') if os.path.exists(filename): print filename # the annotation file contains flipped objects lines = [] lines_flipped = [] with open(filename) as f: for line in f: words = line.split() subcls = int(words[1]) is_flip = int(words[2]) if subcls != -1: if is_flip == 0: lines.append(line) else: lines_flipped.append(line) else: lines = [] lines_flipped = [] num_objs = len(lines) # store information of flipped objects assert (num_objs == len(lines_flipped)), 'The number of flipped objects is not the same!' gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) for ix, line in enumerate(lines_flipped): words = line.split() subcls = int(words[1]) gt_subclasses_flipped[ix] = subcls boxes = np.zeros((num_objs, 4), dtype=np.float32) gt_classes = np.zeros((num_objs), dtype=np.int32) gt_subclasses = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) for ix, line in enumerate(lines): words = line.split() cls = self._class_to_ind[words[0]] subcls = int(words[1]) boxes[ix, :] = [float(n) for n in words[3:7]] gt_classes[ix] = cls gt_subclasses[ix] = subcls overlaps[ix, cls] = 1.0 subindexes[ix, cls] = subcls subindexes_flipped[ix, cls] = gt_subclasses_flipped[ix] overlaps = scipy.sparse.csr_matrix(overlaps) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes_all == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal base_size = 16 ratios = [3.0, 2.0, 1.5, 1.0, 0.75, 0.5, 0.25] scales = 2**np.arange(1, 6, 0.5) anchors = generate_anchors(base_size, ratios, scales) num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0]) return {'boxes' : boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped' : False}
def get_minibatch(self): blobs = {} idx = np.random.choice(len(self.rdata['annotation_train']), self._batch_size) # labels_blob = np.zeros(self._batch_size,np.int32) data = [] visual = [] classeme = [] classeme_s = [] classeme_o = [] visual_s = [] visual_o = [] loc_s = [] loc_o = [] location = [] labels = [] cnt = 0 while cnt < self._batch_size: idx = np.random.choice(len(self.rdata['annotation_train']), 1) anno = self.rdata['annotation_train'][idx[0]] objs = [] im_id = anno.filename.split('.')[0] if im_id not in self.vgg_data: continue classemes = self.vgg_data[im_id]['classemes'] visuals = self.vgg_data[im_id]['visuals'] locations = self.vgg_data[im_id]['locations'] cls_confs = self.vgg_data[im_id]['cls_confs'] w, h = self.meta['train/' + im_id + '/w'][...], self.meta['train/' + im_id + '/h'][...] if hasattr(anno, 'relationship'): if not isinstance(anno.relationship, np.ndarray): anno.relationship = [anno.relationship] for r in xrange(len(anno.relationship)): if not hasattr(anno.relationship[r], 'phrase'): continue predicate = anno.relationship[r].phrase[1] ymin, ymax, xmin, xmax = anno.relationship[r].subBox sub_bbox = [xmin, ymin, xmax, ymax] ymin, ymax, xmin, xmax = anno.relationship[r].objBox obj_bbox = [xmin, ymin, xmax, ymax] overlaps = bbox_overlaps( np.ascontiguousarray([sub_bbox, obj_bbox], dtype=np.float), np.ascontiguousarray(locations, dtype=np.float)) if overlaps.shape[0] == 0: continue try: assignment = overlaps.argmax(axis=1) except: continue sub_sorted = overlaps[0].argsort()[-30:][::-1] obj_sorted = overlaps[1].argsort()[-30:][::-1] while len(sub_sorted) > 0 and overlaps[0][sub_sorted[-1]] < .7: sub_sorted = sub_sorted[:-1] while len(obj_sorted) > 0 and overlaps[1][obj_sorted[-1]] < .7: obj_sorted = obj_sorted[:-1] if len(sub_sorted) <= 0 or len(obj_sorted) <= 0: continue sub_idx = np.random.choice(len(sub_sorted), 1) obj_idx = np.random.choice(len(obj_sorted), 1) for s in sub_sorted[:1]: # sub_idx: for o in obj_sorted[:1]: # obj_idx: if s != o and cnt < self._batch_size: sub_visual = visuals[s] obj_visual = visuals[o] sub_clsmemes = classemes[s] obj_clsmemes = classemes[o] sub_box_encoded = bbox_transform(np.array([locations[o]]), np.array([locations[s]]))[0] obj_box_encoded = bbox_transform(np.array([locations[s]]), np.array([locations[o]]))[0] #sub_box_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([locations[s]]))[0] #obj_box_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([locations[o]]))[0] relation = self.meta['meta/pre/name2idx/' + predicate][...] labels.append(np.float32(relation)) classeme_s.append(sub_clsmemes) classeme_o.append(obj_clsmemes) visual_s.append(sub_visual) visual_o.append(obj_visual) loc_s.append(sub_box_encoded) loc_o.append(obj_box_encoded) #visual.append(np.hstack((sub_visual, obj_visual))) #classeme.append(np.hstack((sub_clsmemes, obj_clsmemes))) location.append(sub_box_encoded) cnt += 1 if cnt >= self._batch_size: break # bbox_transform() # blobs['visual'] = np.array(visual) blobs['classeme_s'] = np.array(classeme_s) blobs['classeme_o'] = np.array(classeme_o) blobs['visual_s'] = np.array(visual_s) blobs['visual_o'] = np.array(visual_o) blobs['location_s'] = np.array(loc_s) blobs['location_o'] = np.array(loc_o) # blobs['classeme'] = np.array(classeme) # blobs['location'] = np.array(location) blobs['label'] = np.array(labels) return blobs
def forward(self, step = 1): selected = False while not selected: index = self.index[self.iter] img_names = self.imagelist[index] proc_imgs = [] for img_name in img_names: img_path = osp.join(self.root_dir, img_name) assert osp.isfile(img_path) proc_img, scale = _get_image_blob(cv2.imread(img_path)) proc_imgs.append(proc_img) blobs = np.vstack(proc_imgs) bboxes = self.bbox[index][0][:,:4] gts = self.gt[index] self.iter += step if self.iter >= len(self.imagelist): self.iter -= len(self.imagelist) if gts[0].shape[0] > 0: selected = True # sample rois overlaps = bbox_overlaps(np.require(bboxes, dtype=np.float), np.require(gts[0], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) fg_inds = np.where(max_overlaps >= self.config['select_overlap'])[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = min(self.config['batch_size'], fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) bg_inds = np.where(max_overlaps < self.config['select_overlap'])[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = self.config['batch_size'] - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) labels = np.ones((self.config['batch_size'], 1), dtype=np.float) labels[fg_rois_per_this_image:] = 0 keep_ids = np.append(fg_inds, bg_inds) # n * 1 * 4 rois = bboxes[keep_ids][:,np.newaxis,:] rois = np.tile(rois, (1, self.length, 1)) rois = rois * scale # scale rois to match image scale assignment = np.tile(np.arange(self.length), (self.config['batch_size'], 1))[:,:,np.newaxis] rois = np.concatenate((assignment, rois), axis=2).reshape((-1, 5)) # compute targets and weights bbox_targets = [] bbox_weights = [] for gt in gts[1:]: cur_bbox_targets = bbox_transform(gts[0][gt_assignment[keep_ids]], gt[gt_assignment[keep_ids]]) cur_bbox_weights = np.zeros_like(cur_bbox_targets) cur_bbox_weights[labels.flatten().astype('bool'), ...] = 1 bbox_targets.append(cur_bbox_targets) bbox_weights.append(cur_bbox_weights) bbox_targets = np.hstack(bbox_targets) bbox_weights = np.hstack(bbox_weights) bbox_targets = (bbox_targets - self.bbox_mean) / self.bbox_std return blobs, rois, labels, bbox_targets, bbox_weights
def ssd_gengt_layer(batch_pred_conf, prior_boxes, batch_gt_boxes): batch_labels = [] batch_deltas = [] overlap_threshold = cfg.TRAIN.overlap_threshold negative_mining_thresh = cfg.TRAIN.neg_overlap for tl in range(len(batch_gt_boxes)): pred_conf = batch_pred_conf[tl] gt_boxes = batch_gt_boxes[tl] first_ignore = np.argmax(np.fabs(gt_boxes[:, 0] - -1) < 1e-3) if np.fabs(gt_boxes[first_ignore, 0] - -1) < 1e-3: gt_boxes = gt_boxes[:first_ignore] num_gt_boxes = len(gt_boxes) num_anchors = len(prior_boxes) num_positive = 0 overlaps = bbox_overlaps( np.ascontiguousarray(prior_boxes * cfg.image_size, dtype=np.float), np.ascontiguousarray(gt_boxes * cfg.image_size, dtype=np.float)) # overlaps = bbox_overlaps_float( # np.ascontiguousarray(prior_boxes, dtype=np.float), # np.ascontiguousarray(gt_boxes, dtype=np.float)) anchor_flags = np.empty((len(prior_boxes),), dtype=np.int32) anchor_flags.fill(-1) gt_flags = np.empty((len(prior_boxes),), dtype=np.bool) gt_flags.fill(False) max_matches_iou = np.empty((len(prior_boxes),), dtype=np.float32) max_matches_iou.fill(-1.0) max_matches_gtid = np.empty((len(prior_boxes),), dtype=np.int32) max_matches_gtid.fill(-1) # gt_boxes match priors queues = [] queue_tops = [] for i in range(len(gt_boxes)): inds = np.argpartition( overlaps[:, i], num_anchors - num_gt_boxes)[-num_gt_boxes:] sort_inds = np.argsort(overlaps[inds, i])[::-1] queues.append(inds[sort_inds]) queue_tops.append(0) for i in range(num_gt_boxes): max_overlap = 1e-6 best_gt = -1 best_anchor = -1 for j in range(num_gt_boxes): if gt_flags[j]: continue while anchor_flags[queues[j][queue_tops[j]]] != -1: queue_tops[j] += 1 _anchor = queues[j][queue_tops[j]] if max_overlap < overlaps[_anchor][j]: max_overlap = overlaps[_anchor][j] best_gt = j best_anchor = _anchor anchor_flags[best_anchor] = 1 gt_flags[best_gt] = True max_matches_iou[best_anchor] = max_overlap max_matches_gtid[best_anchor] = best_gt num_positive += 1 anchor_argmax_iou = overlaps.argmax(axis=1) anchor_max_iou = overlaps[np.arange(num_anchors), anchor_argmax_iou] # priors match gt_boxes if overlap_threshold > 0: inds = np.where((anchor_max_iou > 1e-6) & (anchor_flags != 1)) max_matches_iou[inds] = anchor_max_iou[inds] max_matches_gtid[inds] = anchor_argmax_iou[inds] inds = np.where( (anchor_max_iou > overlap_threshold) & (anchor_flags != 1)) gt_flags[anchor_argmax_iou[inds]] = True anchor_flags[inds] = 1 num_positive += len(inds[0]) # Negative mining max_pred_conf_head = np.max(pred_conf, axis=1, keepdims=True) pred_conf = np.exp(pred_conf - max_pred_conf_head) max_pred_conf = np.max( pred_conf[:, 1:], axis=1, keepdims=True) / \ np.sum(pred_conf, axis=1, keepdims=True) if cfg.TRAIN.do_neg_mining: num_negative = int(num_positive * cfg.TRAIN.neg_pos_ratio) if num_negative > (num_anchors - num_positive): num_negative = num_anchors - num_positive if num_negative > 0: inds = np.where((anchor_flags != 1) & ( max_matches_iou < negative_mining_thresh)) max_matches_iou[inds] = anchor_max_iou[inds] max_matches_gtid[inds] = anchor_argmax_iou[inds] neg_inds = np.where((anchor_flags != 1) & ( max_matches_iou < negative_mining_thresh))[0] order = max_pred_conf[neg_inds].argsort(axis=0)[::-1] anchor_flags[neg_inds[order[:num_negative, 0]]] = 0 labels = np.array(anchor_flags) inds = np.where(anchor_flags == 1) labels[inds] = gt_boxes[max_matches_gtid[inds], 4] deltas = bbox_transform( prior_boxes, gt_boxes[max_matches_gtid, :][:, :4].astype( np.float32, copy=False)) batch_labels.append(labels) batch_deltas.append(deltas) return np.asarray(batch_labels, dtype=np.int32), np.asarray( batch_deltas, dtype=np.float32)
def _load_pascal_annotation(self, index): """ Load image and bounding boxes info from XML file in the PASCAL VOC format. """ filename = os.path.join(self._data_path, 'Annotations', index + '.xml') # print 'Loading: {}'.format(filename) def get_data_from_tag(node, tag): return node.getElementsByTagName(tag)[0].childNodes[0].data with open(filename) as f: data = minidom.parseString(f.read()) objs = data.getElementsByTagName('object') num_objs = len(objs) boxes = np.zeros((num_objs, 4), dtype=np.uint16) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) # Load object bounding boxes into a data frame. for ix, obj in enumerate(objs): # Make pixel indexes 0-based x1 = float(get_data_from_tag(obj, 'xmin')) - 1 y1 = float(get_data_from_tag(obj, 'ymin')) - 1 x2 = float(get_data_from_tag(obj, 'xmax')) - 1 y2 = float(get_data_from_tag(obj, 'ymax')) - 1 name = str(get_data_from_tag(obj, "name")).lower().strip() if name in self._classes: cls = self._class_to_ind[name] else: cls = 0 boxes[ix, :] = [x1, y1, x2, y2] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 overlaps = scipy.sparse.csr_matrix(overlaps) gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes_all == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal anchors = generate_anchors() num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0]) return {'boxes' : boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps' : overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped' : False}
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # fg_rois_per_image = 32 # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # print ("===================all_rois_len: " + str(len(all_rois)) + ". gt_assignment len: " + str(len(gt_assignment))) # print ("gt_assignment: ") # print gt_assignment max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_this_image), replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_this_image), replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_this_image):] = 0 rois = all_rois[keep_inds] # positive rois rois_pos = np.zeros( (fg_inds.size, 5), dtype=np.float32 ) #because return rois_pos as top ---> allocate memory for it rois_pos[:, :] = all_rois[fg_inds] gt_assignment_pos = gt_assignment[fg_inds] bbox_target_data = _compute_targets(rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) #return labels, rois, bbox_targets, bbox_inside_weights return labels, rois, bbox_targets, bbox_inside_weights, gt_boxes[ gt_assignment[ keep_inds], :], rois_pos, gt_assignment_pos #[return them gt_boxes: original coordinate and class of gt ]
cfg.TRAIN.USE_FLIPPED = False imdb, roidb = combined_roidb('visual_genome_train_rel') num_images = len(roidb) data_layer = RoIDataLayer(imdb, roidb, bbox_means, bbox_stds) epoch = 10 thresh = 0.8 fg_bg = AverageMeter() print_freq = 100 for e in range(epoch): for i in range(num_images): blobs = data_layer.forward() predicates = blobs['predicates'] rel_rois = blobs['rel_rois'] fg_rel_inds = np.where(predicates)[0] bg_rel_inds = np.where(predicates==0)[0] fg_rel_rois = rel_rois[fg_rel_inds, 1:] bg_rel_rois = rel_rois[bg_rel_inds, 1:] fg_bg_overlaps = bbox_overlaps(fg_rel_rois, bg_rel_rois) fg_fg_overlaps = bbox_overlaps(fg_rel_rois, fg_rel_rois) fg_inds, bg_inds = np.where(fg_bg_overlaps > thresh) num_fg_bg_pair = len(fg_rel_inds) * len(bg_rel_inds) num = len(fg_inds) fg_bg.update(num / num_fg_bg_pair, num_fg_bg_pair) if i > 0 and i % print_freq == 0: print('(fg/bg)Val: {fg_bg.val:.3f}| (fg/bg)Avg: {fg_bg.avg:.3f}'.format(fg_bg=fg_bg)) print('epoch {0}: (fg/bg)Val: {fg_bg.val:.3f}| (fg/bg)Avg: {fg_bg.avg:.3f}'.format(e, fg_bg=fg_bg))
def forward(self, bottom, top): # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN # (i.e., rpn.proposal_layer.ProposalLayer), or any other source bbox_pred = bottom[0].data # GT boxes (x1, y1, x2, y2, label) # TODO(rbg): it's annoying that sometimes I have extra info before # and other times after box coordinates -- normalize to one format gt_boxes = bottom[1].data cls_score = bottom[2].data #calculate overlaps overlaps = bbox_overlaps( np.ascontiguousarray(bbox_pred[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) #sift the boxs hard_indexs=[] labels=[] for i,max_overlap in enumerate(max_overlaps): if max_overlap>0.5 and cls_score[i][0]>cls_score[i][1]: hard_indexs.append(i) labels.append(1) if max_overlap<0.5 and cls_score[i][0]<cls_score[i][1]: hard_indexs.append(i) labels.append(0) if len(hard_indexs)==0: hard_indexs=range(len(bbox_pred)) labels=bottom[4].data hard_num=len(hard_indexs) for i in xrange(len(bottom[7].data)): if i >= hard_num: hard_indexs.append(hard_indexs[i % hard_num]) labels.append(labels[i % hard_num]) labels = np.array(labels, dtype=np.float32) bbox_target_data = _compute_targets( bbox_pred[hard_indexs, 1:5], gt_boxes[gt_assignment[hard_indexs], :4], labels) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, self._num_classes) hard_num=len(bottom[7].data) # top: 'cls_score_OHEM' top[0].reshape(hard_num,cls_score.shape[1]) top[0].data[...]=cls_score[hard_indexs] #top[0].data[...]=cls_score#[hard_indexs] #top: 'labels_OHEM' top[1].reshape(hard_num) top[1].data[...]=labels #top[1].data[...]=bottom[4].data #top: 'bbox_targets_OHEM' top[2].reshape(hard_num,self._num_classes * 4) top[2].data[...] = bbox_targets #top[2].data[...] = bottom[3].data #top: 'bbox_pred_OHEM' top[3].reshape(hard_num, self._num_classes * 4) top[3].data[...] = bbox_pred[hard_indexs] #top[3].data[...] = bbox_pred#[hard_indexs] #top: 'bbox_inside_weights_OHEM' top[4].reshape(hard_num, self._num_classes*4) top[4].data[...] = bbox_inside_weights #top[4].data[...] = bottom[5].data #top: 'bbox_outside_weights_OHEM' top[5].reshape(hard_num, self._num_classes * 4) top[5].data[...] = np.array(bbox_inside_weights > 0).astype(np.float32) #top[5].data[...] = bottom[6].data #top: 'rois_OHEM' top[6].reshape(hard_num, 5) top[6].data[...] = bottom[7].data[hard_indexs]
def evaluate_recall(self, candidate_boxes=None, thresholds=None, area='all', limit=None): """Evaluate detection proposal recall metrics. Returns: results: dictionary of results with keys 'ar': average recall 'recalls': vector recalls at each IoU overlap threshold 'thresholds': vector of IoU overlap thresholds 'gt_overlaps': vector of all ground-truth overlaps """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7 } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], # 512-inf ] assert areas.has_key(area), 'unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for i in xrange(self.num_images): # Checking for max_overlaps == 1 avoids including crowd annotations # (...pretty hacking :/) max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max( axis=1) gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] gt_boxes = self.roidb[i]['boxes'][gt_inds, :] gt_areas = self.roidb[i]['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) if candidate_boxes is None: # If candidate_boxes is not supplied, the default is to use the # non-ground-truth boxes from this roidb non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0] boxes = self.roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in xrange(gt_boxes.shape[0]): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert (gt_ovr >= 0) # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert (_gt_overlaps[j] == gt_ovr) # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { 'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps }
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes, num_attr_classes, num_rel_classes, ignore_label): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) # GT boxes (x1, y1, x2, y2, label, attributes[16], relations[num_objs]) has_attributes = num_attr_classes > 0 if has_attributes: assert gt_boxes.shape[1] >= 21 has_relations = num_rel_classes > 0 if has_relations: assert gt_boxes.shape[0] == gt_boxes.shape[1]-21, \ 'relationships not found in gt_boxes, item length is only %d' % gt_boxes.shape[1] gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size)) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = int(min(bg_rois_per_this_image, bg_inds.size)) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # print 'proposal_target_layer:', keep_inds # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 / ignore_label labels[fg_rois_per_this_image:] = 0 fg_gt = np.array(gt_assignment[fg_inds]) if has_attributes: attributes = np.ones((fg_rois_per_image, 16)) * ignore_label attributes[:fg_rois_per_this_image, :] = gt_boxes[fg_gt, 5:21] np.place(attributes[:, 1:], attributes[:, 1:] == 0, ignore_label) else: attributes = None if has_relations: expand_rels = gt_boxes[fg_gt, 21:].T[fg_gt].T num_relations_per_this_image = np.count_nonzero(expand_rels) # Keep an equal number of 'no relation' outputs, the rest can be ignore expand_rels = expand_rels.flatten() no_rel_inds = np.where(expand_rels == 0)[0] if len(no_rel_inds) > num_relations_per_this_image: no_rel_inds = npr.choice(no_rel_inds, size=num_relations_per_this_image, replace=False) np.place(expand_rels, expand_rels == 0, ignore_label) expand_rels[no_rel_inds] = 0 relations = np.ones((fg_rois_per_image, fg_rois_per_image), dtype=np.float) * ignore_label relations[:fg_rois_per_this_image, : fg_rois_per_this_image] = expand_rels.reshape( (fg_rois_per_this_image, fg_rois_per_this_image)) relations = relations.reshape((relations.size, 1, 1, 1)) else: relations = None rois = all_rois[keep_inds] # print 'proposal_target_layer:', rois bbox_target_data = _compute_targets(rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) # print 'proposal_target_layer:', bbox_target_data bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, bbox_targets, bbox_inside_weights, attributes, relations
def _sample_rois(all_rois, gt_boxes, gt_masks, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size)) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # use [0] because max_overlaps is a column vector # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[fg_rois_per_this_image:] = 0 rois = all_rois[keep_inds] bbox_target_data = _compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) #********************* # sample gt_masks # clip to roi region # resize to 14*14 #********************* mask_gt_keep = gt_masks[gt_assignment[keep_inds], :, :] scale = cfg.TRAIN.ROI_OUTPUT_SIZE*2 mask_gt_data = np.zeros((len(keep_inds), scale, scale)) for i in range(len(keep_inds)): if labels[i] >0: roi = rois[i,1:5] if cfg.DEBUG: print '_sample_roi' print 'i: '+ str(i) +' labels[i]:' + str(labels[i]) print 'roi' +str(roi[0]) + ' ' + str(roi[1]) + ' ' + str(roi[2]) + ' ' + str(roi[3]) mask_gt_clip = mask_gt_keep[i, int(round(roi[1])) : int(round(roi[3]))+1, int(round(roi[0])) : int(round(roi[2]))+1] if cfg.DEBUG: print 'mask_gt_keep.shape[1]: ' +str(mask_gt_keep.shape[1]) print 'mask_gt_keep.shape[2]: ' + str(mask_gt_keep.shape[2]) print 'mask_gt_clip.shape[0]: ' +str(mask_gt_clip.shape[0]) print 'mask_gt_clip.shape[1]: ' + str(mask_gt_clip.shape[1]) fx = float(scale)/mask_gt_clip.shape[1] fy = float(scale)/mask_gt_clip.shape[0] if cfg.DEBUG: print 'mask_gt_clip.shape[0]: ' +str(mask_gt_clip.shape[0]) print 'mask_gt_clip.shape[1]: ' + str(mask_gt_clip.shape[1]) print 'scale: ' +str(scale) print 'fx:' +str(fx) print 'fy:' +str(fy) mask_gt_data[i,:,:] = np.round(cv2.resize(mask_gt_clip, None, fx=fx, fy=fy)) else: mask_gt_data[i,:,:] = np.zeros((scale,scale)) labels_data = labels bbox_targets, bbox_inside_weights, labels, label_weights, mask_gt, mask_weights = \ _get_bbox_regression_labels(bbox_target_data, labels_data, mask_gt_data, num_classes) if cfg.TRACE: print '========sample rois========' print 'fg_inds' print fg_inds print 'bg_inds' print bg_inds print 'rois: ' print rois[0:5,:] print 'labels: ' print labels[0:5,:] print 'label_weights: ' print label_weights[0:5,:] print 'bbox_targets: ' print bbox_targets[0:5,4*59:4*60] print 'mask_weighs: ' print mask_weights[0:5,:,:,59] print 'save mask_gt' cv2.imwrite('/home/chsiyuan/Documents/542FinalProject/experiments/mask_gt.png',mask_gt[0,:,:,59]*255) return labels_data, rois, bbox_targets, bbox_inside_weights, mask_gt, label_weights, mask_weights
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride=[ 16, ], anchor_scales=[4, 8, 16, 32]): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. """ _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] if DEBUG: print 'anchors:' print _anchors print 'anchor shapes:' print np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], )) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) #height, width = rpn_cls_score.shape[1:3] im_info = im_info[0] # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] if DEBUG: print 'AnchorTargetLayer: height', height, 'width', width print '' print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) print 'height, width: ({}, {})'.format(height, width) print 'rpn: gt_boxes.shape', gt_boxes.shape print 'rpn: gt_boxes', gt_boxes # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print 'total_anchors', total_anchors print 'inds_inside', len(inds_inside) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors.shape', anchors.shape # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) # max overlapped ground-truth box for each anchor (filtered by inds_inside) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] # max overlapped anchor for each ground-truth box gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU, not really IOU but the # percentage of overlapped area to ground-truth box labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #print "was %s inds, disabling %s, now %s inds" % ( #len(bg_inds), len(disable_inds), np.sum(labels == 0)) # labels[] = 1, foreground # 0, background # -1, disabled (not used) bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) # Generate transform information to transform anchor to max # overlapped ground truth box bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: _sums += bbox_targets[labels == 1, :].sum(axis=0) _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts += np.sum(labels == 1) means = _sums / _counts stds = np.sqrt(_squared_sums / _counts - means**2) print 'means:' print means print 'stdevs:' print stds # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) if DEBUG: print 'rpn: max max_overlap', np.max(max_overlaps) print 'rpn: num_positive', np.sum(labels == 1) print 'rpn: num_negative', np.sum(labels == 0) _fg_sum += np.sum(labels == 1) _bg_sum += np.sum(labels == 0) _count += 1 print 'rpn: num_positive avg', _fg_sum / _count print 'rpn: num_negative avg', _bg_sum / _count # labels #pdb.set_trace() labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #assert bbox_inside_weights.shape[2] == height #assert bbox_inside_weights.shape[3] == width rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #assert bbox_outside_weights.shape[2] == height #assert bbox_outside_weights.shape[3] == width rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): """Same as the anchor target layer in original Fast/er RCNN """ A = num_anchors total_anchors = all_anchors.shape[0] K = total_anchors / num_anchors im_info = im_info[0] # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them # first set the negatives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # # labels labels = labels.reshape((1, height, width*A, 1)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.size > 0 and bg_inds.size > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.size) fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False) bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.size < bg_rois_per_image bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace) elif fg_inds.size > 0: to_replace = fg_inds.size < rois_per_image fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = rois_per_image elif bg_inds.size > 0: to_replace = bg_inds.size < rois_per_image bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = 0 else: import pdb pdb.set_trace() # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_image):] = 0 rois = all_rois[keep_inds] roi_scores = all_scores[keep_inds] bbox_target_data = _compute_targets(rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = bottom[0].data.shape[-2:] # GT boxes (x1, y1, x2, y2, label) gt_boxes = bottom[1].data # im_info im_info = bottom[2].data[0, :] if DEBUG: print '' print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) print 'height, width: ({}, {})'.format(height, width) print 'rpn: gt_boxes.shape', gt_boxes.shape print 'rpn: gt_boxes', gt_boxes # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] all_anchors = (self._anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -self._allowed_border) & (all_anchors[:, 1] >= -self._allowed_border) & (all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width (all_anchors[:, 3] < im_info[0] + self._allowed_border) # height )[0] if DEBUG: print 'total_anchors', total_anchors print 'inds_inside', len(inds_inside) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors.shape', anchors.shape # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #print "was %s inds, disabling %s, now %s inds" % ( #len(bg_inds), len(disable_inds), np.sum(labels == 0)) bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: self._sums += bbox_targets[labels == 1, :].sum(axis=0) self._squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0) self._counts += np.sum(labels == 1) means = self._sums / self._counts stds = np.sqrt(self._squared_sums / self._counts - means ** 2) print 'means:' print means print 'stdevs:' print stds # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) if DEBUG: print 'rpn: max max_overlap', np.max(max_overlaps) print 'rpn: num_positive', np.sum(labels == 1) print 'rpn: num_negative', np.sum(labels == 0) self._fg_sum += np.sum(labels == 1) self._bg_sum += np.sum(labels == 0) self._count += 1 print 'rpn: num_positive avg', self._fg_sum / self._count print 'rpn: num_negative avg', self._bg_sum / self._count # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) top[0].reshape(*labels.shape) top[0].data[...] = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) top[1].reshape(*bbox_targets.shape) top[1].data[...] = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) assert bbox_inside_weights.shape[2] == height assert bbox_inside_weights.shape[3] == width top[2].reshape(*bbox_inside_weights.shape) top[2].data[...] = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) assert bbox_outside_weights.shape[2] == height assert bbox_outside_weights.shape[3] == width top[3].reshape(*bbox_outside_weights.shape) top[3].data[...] = bbox_outside_weights
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap # bottom[0] is the rpn_cls_score the foreground background classification prob (only the shape is used) # single item batches in training assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) w, h of feature map height, width = bottom[0].data.shape[-2:] # GT boxes (x1, y1, x2, y2, label) gt_boxes = bottom[1].data # im_info im_info = bottom[2].data[0, :] if DEBUG: print '' print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) print 'height, width: ({}, {})'.format(height, width) print 'rpn: gt_boxes.shape', gt_boxes.shape print 'rpn: gt_boxes', gt_boxes # 1. Generate proposals from bbox deltas and shifted anchors # shift based on origin image size shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] all_anchors = (self._anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -self._allowed_border) & (all_anchors[:, 1] >= -self._allowed_border) & (all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width (all_anchors[:, 3] < im_info[0] + self._allowed_border) # height )[0] if DEBUG: print 'total_anchors', total_anchors print 'inds_inside', len(inds_inside) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors.shape', anchors.shape # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) # the index of max overlap for each anchors argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] # the index of max overlap for gt_box gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #print "was %s inds, disabling %s, now %s inds" % ( #len(bg_inds), len(disable_inds), np.sum(labels == 0)) bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) # assign smoothl1 loss weight, the weight with -1 label will be assigned with 0 bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: self._sums += bbox_targets[labels == 1, :].sum(axis=0) self._squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0) self._counts += np.sum(labels == 1) means = self._sums / self._counts stds = np.sqrt(self._squared_sums / self._counts - means**2) print 'means:' print means print 'stdevs:' print stds # map up to original set of anchors # the useful anchors will be kept, invalid value will fill the value with "fill" parameter labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) if DEBUG: print 'rpn: max max_overlap', np.max(max_overlaps) print 'rpn: num_positive', np.sum(labels == 1) print 'rpn: num_negative', np.sum(labels == 0) self._fg_sum += np.sum(labels == 1) self._bg_sum += np.sum(labels == 0) self._count += 1 print 'rpn: num_positive avg', self._fg_sum / self._count print 'rpn: num_negative avg', self._bg_sum / self._count # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) top[0].reshape(*labels.shape) top[0].data[...] = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) top[1].reshape(*bbox_targets.shape) top[1].data[...] = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) assert bbox_inside_weights.shape[2] == height assert bbox_inside_weights.shape[3] == width top[2].reshape(*bbox_inside_weights.shape) top[2].data[...] = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) assert bbox_outside_weights.shape[2] == height assert bbox_outside_weights.shape[3] == width top[3].reshape(*bbox_outside_weights.shape) top[3].data[...] = bbox_outside_weights
def evaluation(self, output_dir): self.write_dop_results(output_dir) filename = os.path.join(output_dir, 'results_deepim.mat') num_iterations = cfg.TEST.ITERNUM if os.path.exists(filename): results_all = scipy.io.loadmat(filename) print('load results from file') print(filename) distances_sys = results_all['distances_sys'] distances_non = results_all['distances_non'] errors_rotation = results_all['errors_rotation'] errors_translation = results_all['errors_translation'] results_seq_id = results_all['results_seq_id'].flatten() results_frame_id = results_all['results_frame_id'].flatten() results_object_id = results_all['results_object_id'].flatten() results_cls_id = results_all['results_cls_id'].flatten() else: # save results num_max = 200000 num_results = num_iterations + 1 distances_sys = np.zeros((num_max, num_results), dtype=np.float32) distances_non = np.zeros((num_max, num_results), dtype=np.float32) errors_rotation = np.zeros((num_max, num_results), dtype=np.float32) errors_translation = np.zeros((num_max, num_results), dtype=np.float32) results_seq_id = np.zeros((num_max, ), dtype=np.float32) results_frame_id = np.zeros((num_max, ), dtype=np.float32) results_object_id = np.zeros((num_max, ), dtype=np.float32) results_cls_id = np.zeros((num_max, ), dtype=np.float32) # for each image count = -1 for i in range(len(self._mapping)): s, c, f = self._mapping[i] is_testing = f % _BOP_EVAL_SUBSAMPLING_FACTOR == 0 if not is_testing: continue # intrinsics intrinsics = self._intrinsics[c] intrinsic_matrix = np.eye(3, dtype=np.float32) intrinsic_matrix[0, 0] = intrinsics['fx'] intrinsic_matrix[1, 1] = intrinsics['fy'] intrinsic_matrix[0, 2] = intrinsics['ppx'] intrinsic_matrix[1, 2] = intrinsics['ppy'] # parse keyframe name scene_id, im_id = self.get_bop_id_from_idx(i) # load result filename = os.path.join(output_dir, '%04d_%06d.mat' % (scene_id, im_id)) print(filename) if not os.path.exists(filename): print('file %s not exist' % (filename)) continue result = scipy.io.loadmat(filename) # load gt d = os.path.join(self._data_dir, self._sequences[s], self._serials[c]) label_file = os.path.join(d, self._label_format.format(f)) label = np.load(label_file) cls_indexes = np.array(self._ycb_ids[s]).flatten() # poses poses = label['pose_y'] if len(poses.shape) == 2: poses = np.reshape(poses, (1, 3, 4)) num = poses.shape[0] assert num == len( cls_indexes ), 'number of poses not equal to number of objects' # instance label im_label = label['seg'] instance_ids = np.unique(im_label) if instance_ids[0] == 0: instance_ids = instance_ids[1:] if instance_ids[-1] == 255: instance_ids = instance_ids[:-1] # for each gt poses for j in range(len(instance_ids)): cls = instance_ids[j] # find the number of pixels of the object pixels = np.sum(im_label == cls) if pixels < 200: continue count += 1 # find the pose object_index = np.where(cls_indexes == cls)[0][0] RT_gt = poses[object_index, :, :] box_gt = self.compute_box(cls - 1, intrinsic_matrix, RT_gt) results_seq_id[count] = scene_id results_frame_id[count] = im_id results_object_id[count] = object_index results_cls_id[count] = cls # network result roi_index = [] if len(result['rois']) > 0: for k in range(result['rois'].shape[0]): ind = int(result['rois'][k, 1]) if cls == cfg.TRAIN.CLASSES[ind] + 1: roi_index.append(k) # select the roi if len(roi_index) > 1: # overlaps: (rois x gt_boxes) roi_blob = result['rois'][roi_index, :] roi_blob = roi_blob[:, (0, 2, 3, 4, 5, 1)] gt_box_blob = np.zeros((1, 5), dtype=np.float32) gt_box_blob[0, 1:] = box_gt overlaps = bbox_overlaps( np.ascontiguousarray(roi_blob[:, :5], dtype=np.float), np.ascontiguousarray(gt_box_blob, dtype=np.float)).flatten() assignment = overlaps.argmax() roi_index = [roi_index[assignment]] if len(roi_index) > 0: RT = np.zeros((3, 4), dtype=np.float32) ind = int(result['rois'][roi_index, 1]) points = self._points[ind] # initial pose RT[:3, :3] = quat2mat( result['poses_init'][roi_index, 2:6].flatten()) RT[:, 3] = result['poses_init'][roi_index, 6:] distances_sys[count, 0] = adi(RT[:3, :3], RT[:, 3], RT_gt[:3, :3], RT_gt[:, 3], points) distances_non[count, 0] = add(RT[:3, :3], RT[:, 3], RT_gt[:3, :3], RT_gt[:, 3], points) errors_rotation[count, 0] = re(RT[:3, :3], RT_gt[:3, :3]) errors_translation[count, 0] = te(RT[:, 3], RT_gt[:, 3]) # pose after refinement for k in range(num_iterations): RT[:3, :3] = quat2mat( result['poses_est'][k][roi_index, 2:6].flatten()) RT[:, 3] = result['poses_est'][k][roi_index, 6:] distances_sys[count, k + 1] = adi(RT[:3, :3], RT[:, 3], RT_gt[:3, :3], RT_gt[:, 3], points) distances_non[count, k + 1] = add(RT[:3, :3], RT[:, 3], RT_gt[:3, :3], RT_gt[:, 3], points) errors_rotation[count, k + 1] = re( RT[:3, :3], RT_gt[:3, :3]) errors_translation[count, k + 1] = te( RT[:, 3], RT_gt[:, 3]) else: distances_sys[count, :] = np.inf distances_non[count, :] = np.inf errors_rotation[count, :] = np.inf errors_translation[count, :] = np.inf distances_sys = distances_sys[:count + 1, :] distances_non = distances_non[:count + 1, :] errors_rotation = errors_rotation[:count + 1, :] errors_translation = errors_translation[:count + 1, :] results_seq_id = results_seq_id[:count + 1] results_frame_id = results_frame_id[:count + 1] results_object_id = results_object_id[:count + 1] results_cls_id = results_cls_id[:count + 1] results_all = { 'distances_sys': distances_sys, 'distances_non': distances_non, 'errors_rotation': errors_rotation, 'errors_translation': errors_translation, 'results_seq_id': results_seq_id, 'results_frame_id': results_frame_id, 'results_object_id': results_object_id, 'results_cls_id': results_cls_id } filename = os.path.join(output_dir, 'results_deepim.mat') scipy.io.savemat(filename, results_all) # print the results # for each class import matplotlib.pyplot as plt max_distance = 0.1 color = ['r', 'g', 'b', 'y', 'c'] index_plot = [0] leng = ['Initial'] for k in range(num_iterations): leng.append('Iteration %d' % (k + 1)) index_plot.append(k + 1) num = len(leng) ADD = np.zeros((self._num_classes_all + 1, num), dtype=np.float32) ADDS = np.zeros((self._num_classes_all + 1, num), dtype=np.float32) TS = np.zeros((self._num_classes_all + 1, num), dtype=np.float32) classes = list(copy.copy(self._classes_all)) classes.append('all') for k in range(self._num_classes_all + 1): fig = plt.figure(figsize=(16.0, 14.0)) if k == self._num_classes_all: index = range(len(results_cls_id)) else: index = np.where(results_cls_id == k + 1)[0] if len(index) == 0: continue print('%s: %d objects' % (classes[k], len(index))) # distance symmetry ax = fig.add_subplot(3, 3, 1) lengs = [] for i in index_plot: D = distances_sys[index, i] ind = np.where(D > max_distance)[0] D[ind] = np.inf d = np.sort(D) n = len(d) accuracy = np.cumsum(np.ones((n, ), np.float32)) / n plt.plot(d, accuracy, color[i], linewidth=2) ADDS[k, i] = VOCap(d, accuracy) lengs.append('%s (%.2f)' % (leng[i], ADDS[k, i] * 100)) print('%s, %s: %d objects missed' % (classes[k], leng[i], np.sum(np.isinf(D)))) ax.legend(lengs) plt.xlabel('Average distance threshold in meter (symmetry)') plt.ylabel('accuracy') ax.set_title(classes[k]) # distance non-symmetry ax = fig.add_subplot(3, 3, 2) lengs = [] for i in index_plot: D = distances_non[index, i] ind = np.where(D > max_distance)[0] D[ind] = np.inf d = np.sort(D) n = len(d) accuracy = np.cumsum(np.ones((n, ), np.float32)) / n plt.plot(d, accuracy, color[i], linewidth=2) ADD[k, i] = VOCap(d, accuracy) lengs.append('%s (%.2f)' % (leng[i], ADD[k, i] * 100)) print('%s, %s: %d objects missed' % (classes[k], leng[i], np.sum(np.isinf(D)))) ax.legend(lengs) plt.xlabel('Average distance threshold in meter (non-symmetry)') plt.ylabel('accuracy') ax.set_title(classes[k]) # translation ax = fig.add_subplot(3, 3, 3) lengs = [] for i in index_plot: D = errors_translation[index, i] ind = np.where(D > max_distance)[0] D[ind] = np.inf d = np.sort(D) n = len(d) accuracy = np.cumsum(np.ones((n, ), np.float32)) / n plt.plot(d, accuracy, color[i], linewidth=2) TS[k, i] = VOCap(d, accuracy) lengs.append('%s (%.2f)' % (leng[i], TS[k, i] * 100)) print('%s, %s: %d objects missed' % (classes[k], leng[i], np.sum(np.isinf(D)))) ax.legend(lengs) plt.xlabel('Translation threshold in meter') plt.ylabel('accuracy') ax.set_title(classes[k]) # rotation histogram count = 4 for i in index_plot: ax = fig.add_subplot(3, 3, count) D = errors_rotation[index, i] ind = np.where(np.isfinite(D))[0] D = D[ind] ax.hist(D, bins=range(0, 190, 10), range=(0, 180)) plt.xlabel('Rotation angle error') plt.ylabel('count') ax.set_title(leng[i]) count += 1 # mng = plt.get_current_fig_manager() # mng.full_screen_toggle() filename = output_dir + '/' + classes[k] + '.png' plt.savefig(filename) # plt.show() # print ADD for i in range(cfg.TEST.ITERNUM + 1): if i == 0: prefix = 'Initial' else: prefix = 'Iteration %d' % (i) print('==================ADD %s======================' % (prefix)) for k in range(len(classes)): print('%s: %f' % (classes[k], ADD[k, i])) print('mean: %f' % (np.mean(ADD[:-1, i]))) for k in range(len(classes)): print('%f' % (ADD[k, i])) print(cfg.TRAIN.SNAPSHOT_INFIX) print('===========================================') # print ADD-S print('==================ADD-S %s====================' % (prefix)) for k in range(len(classes)): print('%s: %f' % (classes[k], ADDS[k, i])) print('mean: %f' % (np.mean(ADDS[:-1, i]))) for k in range(len(classes)): print('%f' % (ADDS[k, i])) print(cfg.TRAIN.SNAPSHOT_INFIX) print('===========================================')
def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride=[16, ], anchor_scales=[4, 8, 16, 32]): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. Parameters ---------- rpn_cls_score: for pytorch (1, Ax2, H, W) bg/fg scores of previous conv layer gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] gt_ishard: (G, 1), 1 or 0 indicates difficult or not dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0 im_info: a list of [image_height, image_width, scale_ratios] _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform) that are the regression objectives rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg, beacuse the numbers of bgs and fgs mays significiantly different """ _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] if DEBUG: print ('anchors:') print (_anchors) print ('anchor shapes:') print (np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], ))) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) # height, width = rpn_cls_score.shape[1:3] im_info = im_info[0] # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) # pytorch (bs, c, h, w) height, width = rpn_cls_score.shape[2:4] if DEBUG: print ('AnchorTargetLayer: height', height, 'width', width) print ('') print ('im_size: ({}, {})'.format(im_info[0], im_info[1])) print ('scale: {}'.format(im_info[2])) print ('height, width: ({}, {})'.format(height, width)) print ('rpn: gt_boxes.shape', gt_boxes.shape) print ('rpn: gt_boxes', gt_boxes) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) # in W H order # K is H x W shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print ('total_anchors', total_anchors) print ('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print ('anchors.shape', anchors.shape) # label: 1 is positive, 0 is negative, -1 is dont care # (A) labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt), shape is A x G overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) # (A) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) # G gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # preclude dontcare areas if dontcare_areas is not None and dontcare_areas.shape[0] > 0: # intersec shape is D x A intersecs = bbox_intersections( np.ascontiguousarray(dontcare_areas, dtype=np.float), # D x 4 np.ascontiguousarray(anchors, dtype=np.float) # A x 4 ) intersecs_ = intersecs.sum(axis=0) # A x 1 labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1 # preclude hard samples that are highly occlusioned, truncated or difficult to see if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[0] > 0: assert gt_ishard.shape[0] == gt_boxes.shape[0] gt_ishard = gt_ishard.astype(int) gt_hardboxes = gt_boxes[gt_ishard == 1, :] if gt_hardboxes.shape[0] > 0: # H x A hard_overlaps = bbox_overlaps( np.ascontiguousarray(gt_hardboxes, dtype=np.float), # H x 4 np.ascontiguousarray(anchors, dtype=np.float)) # A x 4 hard_max_overlaps = hard_overlaps.max(axis=0) # (A) labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1 max_intersec_label_inds = hard_overlaps.argmax(axis=1) # H x 1 labels[max_intersec_label_inds] = -1 # # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 # print "was %s inds, disabling %s, now %s inds" % ( # len(bg_inds), len(disable_inds), np.sum(labels == 0)) # bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) # num_examples = np.sum(labels >= 0) + 1 # positive_weights = np.ones((1, 4)) * 1.0 / num_examples # negative_weights = np.ones((1, 4)) * 1.0 / num_examples positive_weights = np.ones((1, 4)) negative_weights = np.zeros((1, 4)) else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / (np.sum(labels == 1)) + 1) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / (np.sum(labels == 0)) + 1) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: _sums += bbox_targets[labels == 1, :].sum(axis=0) _squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0) _counts += np.sum(labels == 1) means = _sums / _counts stds = np.sqrt(_squared_sums / _counts - means ** 2) print ('means:') print (means) print ('stdevs:') print (stds) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) if DEBUG: print ('rpn: max max_overlap', np.max(max_overlaps)) print ('rpn: num_positive', np.sum(labels == 1)) print ('rpn: num_negative', np.sum(labels == 0)) _fg_sum += np.sum(labels == 1) _bg_sum += np.sum(labels == 0) _count += 1 print ('rpn: num_positive avg', _fg_sum / _count) print ('rpn: num_negative avg', _bg_sum / _count) # labels # pdb.set_trace() labels = labels.reshape((1, height, width, A)) labels = labels.transpose(0, 3, 1, 2) rpn_labels = labels.reshape((1, 1, A * height, width)).transpose(0, 2, 3, 1) # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) # assert bbox_inside_weights.shape[2] == height # assert bbox_inside_weights.shape[3] == width rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) # assert bbox_outside_weights.shape[2] == height # assert bbox_outside_weights.shape[3] == width rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def evaluate_recall(self, candidate_boxes=None, thresholds=None, area='all', limit=None): """Evaluate detection proposal recall metrics. Returns: results: dictionary of results with keys 'ar': average recall 'recalls': vector recalls at each IoU overlap threshold 'thresholds': vector of IoU overlap thresholds 'gt_overlaps': vector of all ground-truth overlaps """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7} area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], # 512-inf ] assert areas.has_key(area), 'unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for i in xrange(self.num_images): # Checking for max_overlaps == 1 avoids including crowd annotations # (...pretty hacking :/) max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1) gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] gt_boxes = self.roidb[i]['boxes'][gt_inds, :] gt_areas = self.roidb[i]['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) if candidate_boxes is None: # If candidate_boxes is not supplied, the default is to use the # non-ground-truth boxes from this roidb non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0] boxes = self.roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in xrange(gt_boxes.shape[0]): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert(gt_ovr >= 0) # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert(_gt_overlaps[j] == gt_ovr) # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return ar, gt_overlaps, recalls, thresholds
def forward(self, bottom, top): assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = bottom[0].data.shape[-2:] # GT boxes (x1, y1, x2, y2, label) gt_boxes = bottom[1].data # im_info im_info = bottom[2].data[0, :] # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] all_anchors = (self._anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -self._allowed_border) & (all_anchors[:, 1] >= -self._allowed_border) & (all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width (all_anchors[:, 3] < im_info[0] + self._allowed_border) # height )[0] # keep only inside anchors if inds_inside.shape[0]==0: # If no anchors inside use whatever anchors we have inds_inside = np.arange(0,all_anchors.shape[0]) anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.ANCHOR_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap if cfg.TRAIN.FORCE_FG_FOR_EACH_GT: gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= self._positive_overlap] = 1 # Subsample positives num_fg = int(cfg.TRAIN.ANCHOR_FG_FRACTION * cfg.TRAIN.ANCHORS_PER_BATCH) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: if self._hard_mining and cfg.TRAIN.HARD_POSITIVE_MINING: ohem_scores = bottom[4].data[:, self._num_anchors:, :, :] ohem_scores = ohem_scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) ohem_scores = ohem_scores[inds_inside] pos_ohem_scores = 1 - ohem_scores[fg_inds] order_pos_ohem_scores = pos_ohem_scores.ravel().argsort()[::-1] ohem_sampled_fgs = fg_inds[order_pos_ohem_scores[:num_fg]] labels[fg_inds] = -1 labels[ohem_sampled_fgs] = 1 else: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # Subsample negatives n_fg = np.sum(labels == 1) num_bg = cfg.TRAIN.ANCHORS_PER_BATCH - n_fg bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: if not self._hard_mining: # randomly sub-sample negatives disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 else: # sort ohem scores ohem_scores = bottom[4].data[:, self._num_anchors:, :, :] ohem_scores = ohem_scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) ohem_scores = ohem_scores[inds_inside] neg_ohem_scores = ohem_scores[bg_inds] order_neg_ohem_scores = neg_ohem_scores.ravel().argsort()[::-1] ohem_sampled_bgs = bg_inds[order_neg_ohem_scores[:num_bg]] labels[bg_inds] = -1 labels[ohem_sampled_bgs] = 0 bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.POSITIVE_WEIGHT > 0) & (cfg.TRAIN.POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) top[0].reshape(*labels.shape) top[0].data[...] = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) top[1].reshape(*bbox_targets.shape) top[1].data[...] = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) assert bbox_inside_weights.shape[2] == height assert bbox_inside_weights.shape[3] == width top[2].reshape(*bbox_inside_weights.shape) top[2].data[...] = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) assert bbox_outside_weights.shape[2] == height assert bbox_outside_weights.shape[3] == width top[3].reshape(*bbox_outside_weights.shape) top[3].data[...] = bbox_outside_weights
def prepare_roidb(imdb): """Enrich the imdb's roidb by adding some derived quantities(可以求导的量) that are useful for training. This function precomputes the maximum overlap, taken over ground-truth boxes, between each ROI and each ground-truth box. The class with maximum overlap is also recorded. """ # 如果有cache文件,加载后直接返回即可 cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: imdb._roidb = cPickle.load(fid) print '{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file) return roidb = imdb.roidb for i in xrange(len(imdb.image_index)): roidb[i]['image'] = imdb.image_path_at(i) # 这应该是gt box # roidb中的box并没有对应到原图!!!!!!!!!!!!!!!!!! boxes = roidb[i]['boxes'] labels = roidb[i]['gt_classes'] # feamap每个点9个box,每个box对应两个概率:是fg的概率;不是bg的概率 # 生成的就是个空array # array([], shape=(0, 18), dtype=float32) info_boxes = np.zeros((0, 18), dtype=np.float32) if boxes.shape[0] == 0: roidb[i]['info_boxes'] = info_boxes continue # compute grid boxes s = PIL.Image.open(imdb.image_path_at(i)).size image_height = s[1] image_width = s[0] # 输入:图片的真是高度和宽度 # 输出:boxes_grid:非常多(feamap所有点的数量*num_aspect)个[x1,y1,x2,y2], centers[:,0], centers[:,1] # 输出:box在原图中的左上角和右下角坐标;feature map中各个点对应的x坐标和y坐标 # 这个box不是gt,这里是给feature map中的每个点生成多个box(不同比例的) roidb中的box是 gt boxes_grid, cx, cy = get_boxes_grid(image_height, image_width) # Scales to use during training (can list multiple scales) # Each scale is the pixel size of an image's shortest side #__C.TRAIN.SCALES = (600,) # for each scale for scale_ind, scale in enumerate(cfg.TRAIN.SCALES): # scale应该是16 boxes_rescaled = boxes * scale # compute overlap overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float)) # 为每个box 找个与它最match的gt box # 最大的IoU值 max_overlaps = overlaps.max(axis = 1) # 最大的IoU值对应的gt box的索引 argmax_overlaps = overlaps.argmax(axis = 1) # 最match的gt box对应的类别 max_classes = labels[argmax_overlaps] # select positive boxes fg_inds = [] # 遍历所有类别,找出满足条件的boxes作为fg for k in xrange(1, imdb.num_classes): # IoU超过一定阈值的box才是fg! fg_inds.extend(np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0]) if len(fg_inds) > 0: # fg对应的gt box的索引 gt_inds = argmax_overlaps[fg_inds] # bounding box regression targets # 计算当前fg box 和其对应的 gt box 的偏移量 # 返回值是2维的,有4列。第0列:x的偏移量;第1列:y的偏移量;第2列:w的伸缩量;第4列:h的伸缩量 gt_targets = _compute_targets(boxes_grid[fg_inds,:], boxes_rescaled[gt_inds,:]) # scale mapping for RoI pooling # cfg中没有这个变量??? scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind] scale_map = cfg.TRAIN.SCALES[scale_ind_map] # 创建fg对应的list # contruct the list of positive boxes # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target) # 这里的18可不是9个anchor,而是1个anchor,用了18列存储相关信息 info_box = np.zeros((len(fg_inds), 18), dtype=np.float32) info_box[:, 0] = cx[fg_inds] info_box[:, 1] = cy[fg_inds] info_box[:, 2] = scale_ind info_box[:, 3:7] = boxes_grid[fg_inds,:] info_box[:, 7] = scale_ind_map info_box[:, 8:12] = boxes_grid[fg_inds,:] * scale_map / scale info_box[:, 12] = labels[gt_inds] info_box[:, 14:] = gt_targets info_boxes = np.vstack((info_boxes, info_box)) roidb[i]['info_boxes'] = info_boxes with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote gt roidb prepared to {}'.format(cache_file)
def anchor_target_layer(rpn_cls_prob, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors, target_name): """Same as the anchor target layer in original Fast/er RCNN """ A = num_anchors total_anchors = all_anchors.shape[0] K = total_anchors / num_anchors hard_mining = cfg.TRAIN.HARD_POSITIVE_MINING # allow boxes to sit over the edge by a small amount # _allowed_border = 0 # follow the SSH setting if target_name == "M3": _allowed_border = 512 else: _allowed_border = 0 # map of shape (..., H, W) height, width = rpn_cls_prob.shape[1:3] # print("image_hw:", im_info[0], im_info[1]) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # only keep anchors inside anchors # keep away the problem of ‘ValueError: attempt to get argmax of an empty sequence’ during training if inds_inside.shape[0] == 0: # If no anchors inside use whatever anchors we have inds_inside = np.arange(0, total_anchors) # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them # first set the negatives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap if cfg.TRAIN.FORCE_FG_FOR_EACH_GT: gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 ##################### Add OHEM for subsample positive labels(Online Hard Examples Mining) ########## num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: if hard_mining: ohem_scores = rpn_cls_prob[:, :, :, num_anchors:] ohem_scores = ohem_scores.reshape((-1, 1)) ohem_scores = ohem_scores[inds_inside] pos_ohem_scores = 1 - ohem_scores[fg_inds] order_pos_ohem_scores = pos_ohem_scores.ravel().argsort()[::-1] ohem_sampled_fgs = fg_inds[order_pos_ohem_scores[:num_fg]] labels[fg_inds] = -1 labels[ohem_sampled_fgs] = 1 else: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 ########################################## End ################################################## ################# Add OHEM for subsample negative labels(Online Hard Examples Mining) ############ num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: if not hard_mining: # randomly sub-sampling negatives disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 else: # sort ohem scores ohem_scores = rpn_cls_prob[:, :, :, num_anchors:] ohem_scores = ohem_scores.reshape((-1, 1)) ohem_scores = ohem_scores[inds_inside] neg_ohem_scores = ohem_scores[bg_inds] order_neg_ohem_scores = neg_ohem_scores.ravel().argsort()[::-1] ohem_sampled_bgs = bg_inds[order_neg_ohem_scores[:num_bg]] labels[bg_inds] = -1 labels[ohem_sampled_bgs] = 0 ########################################## End ############################################## bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights