def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes,sample_type='fpn', k0 = 4): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[fg_rois_per_this_image:] = 0 rois = all_rois[keep_inds] bbox_target_data = _compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) if sample_type == 'fpn': #print 0 w = (rois[:,3]-rois[:,1]) h = (rois[:,4]-rois[:,2]) s = w * h s[s<=0]=1e-6 layer_index = np.floor(k0+np.log2(np.sqrt(s)/224)) layer_index[layer_index<2]=2 layer_index[layer_index>5]=5 #print 1 return rois, labels, bbox_targets, bbox_inside_weights, layer_index #rois:[512,5] labels:[512,] else: return rois, labels, bbox_targets, bbox_inside_weights
def forward(self, regressions, anchors, annotations, iou_thresh=0.5): losses = [] batch_size = regressions.shape[0] for j in range(batch_size): regression = regressions[j, :, :] bbox_annotation = annotations[j, :, :] bbox_annotation = bbox_annotation[bbox_annotation[:, -1] != -1] if bbox_annotation.shape[0] == 0: losses.append(torch.tensor(0).float().cuda()) continue indicator = bbox_overlaps( min_area_square(anchors[j, :, :]), min_area_square(bbox_annotation[:, :-1]) ) overlaps = rbox_overlaps( anchors[j, :, :].cpu().numpy(), bbox_annotation[:, :-1].cpu().numpy(), indicator.cpu().numpy(), thresh=1e-1 ) if not torch.is_tensor(overlaps): overlaps = torch.from_numpy(overlaps).cuda() iou_max, iou_argmax = torch.max(overlaps, dim=1) positive_indices = torch.ge(iou_max, iou_thresh) assigned_annotations = bbox_annotation[iou_argmax, :] if positive_indices.sum() > 0: all_rois = anchors[j, positive_indices, :] gt_boxes = assigned_annotations[positive_indices, :] targets = self.box_coder.encode(all_rois, gt_boxes) loss = self.criteron(regression[positive_indices, :], targets) losses.append(loss) else: losses.append(torch.tensor(0).float().cuda()) return torch.stack(losses).mean(dim=0, keepdim=True)
def _compute_targets(rois, overlaps, labels): """ Compute bounding-box regression targets for an image. for each roi find the corresponding gt_box, then compute the distance. """ # Indices of ground-truth ROIs gt_inds = np.where(overlaps == 1)[0] if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return np.zeros((rois.shape[0], 5), dtype=np.float32) # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = bbox_overlaps( np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] targets = np.zeros((rois.shape[0], 5), dtype=np.float32) targets[ex_inds, 0] = labels[ex_inds] targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) return targets
def create_roidb_from_box_list(self, box_list, gt_roidb): assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] for i in range(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] overlaps = scipy.sparse.csr_matrix(overlaps) roidb.append({ 'boxes': boxes, 'gt_classes': np.zeros((num_boxes, ), dtype=np.int32), 'gt_overlaps': overlaps, 'flipped': False, 'seg_areas': np.zeros((num_boxes, ), dtype=np.float32), }) return roidb
def create_roidb_from_box_list(self, box_list, gt_roidb): assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] for i in range(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] overlaps = scipy.sparse.csr_matrix(overlaps) roidb.append({ 'boxes': boxes, 'gt_classes': np.zeros((num_boxes,), dtype=np.int32), 'gt_overlaps': overlaps, 'flipped': False, 'seg_areas': np.zeros((num_boxes,), dtype=np.float32), }) return roidb
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.size > 0 and bg_inds.size > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.size) fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False) bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.size < bg_rois_per_image bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace) elif fg_inds.size > 0: to_replace = fg_inds.size < rois_per_image fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = rois_per_image elif bg_inds.size > 0: to_replace = bg_inds.size < rois_per_image bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = 0 else: import pdb pdb.set_trace() # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_image):] = 0 rois = all_rois[keep_inds] roi_scores = all_scores[keep_inds] bbox_target_data = _compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ npr.seed(cfg.RNG_SEED) # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( all_rois[:, 1:5].data, gt_boxes[:, :4].data) max_overlaps, gt_assignment = overlaps.max(1) labels = gt_boxes[gt_assignment, [4]] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1) # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = ((max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1) # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.numel() > 0 and bg_inds.numel() > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel()) fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().cuda()] bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.numel() < bg_rois_per_image bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().cuda()] elif fg_inds.numel() > 0: to_replace = fg_inds.numel() < rois_per_image fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] fg_rois_per_image = rois_per_image elif bg_inds.numel() > 0: to_replace = bg_inds.numel() < rois_per_image bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] fg_rois_per_image = 0 else: import pdb pdb.set_trace() # The indices that we're selecting (both fg and bg) keep_inds = torch.cat([fg_inds, bg_inds], 0) # Select sampled values from various arrays: labels = labels[keep_inds].contiguous() # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_image):] = 0 rois = all_rois[keep_inds].contiguous() roi_scores = all_scores[keep_inds].contiguous() bbox_target_data = _compute_targets( rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( all_rois[:, 1:5].data, gt_boxes[:, :4].data) max_overlaps, gt_assignment = overlaps.max(1) labels = gt_boxes[gt_assignment, [4]] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1) # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = ((max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1) # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.numel() > 0 and bg_inds.numel() > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel()) fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().cuda()] bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.numel() < bg_rois_per_image bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().cuda()] elif fg_inds.numel() > 0: to_replace = fg_inds.numel() < rois_per_image fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] fg_rois_per_image = rois_per_image elif bg_inds.numel() > 0: to_replace = bg_inds.numel() < rois_per_image bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] fg_rois_per_image = 0 else: import pdb pdb.set_trace() # The indices that we're selecting (both fg and bg) keep_inds = torch.cat([fg_inds, bg_inds], 0) # Select sampled values from various arrays: labels = labels[keep_inds].contiguous() # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_image):] = 0 rois = all_rois[keep_inds].contiguous() roi_scores = all_scores[keep_inds].contiguous() bbox_target_data = _compute_targets( rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def _sample_rois(all_rois,all_scores,gt_boxes,fg_rois_per_image,rois_per_image,num_classes): # overlaps (rois x gt_boxes) overlaps = bbox_overlaps(np.ascontiguousarray(all_rois[:,1:5],dtype=np.float), np.ascontiguousarray(gt_boxes[:,:4],dtype=np.float)) # 对于每个anchor,重叠最大的gt编号 gt_assignment = overlaps.argmax(axis = 1) max_overlaps = overlaps.max(axis=1) # 重叠最大的gt的标签 labels = gt_boxes[gt_assignment,4] # 选择前景rois 大于前景阈值部分 fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # 背景rois在背景阈值之间 bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI)&(max_overlaps>=cfg.TRAIN.BG_THRESH_LO))[0] # bg_inds = np.where(max_overlaps < cfg.TRAIN.BG_THRESH_HI)[0] # 确保对固定数量的区域进行采样 if fg_inds.size > 0 and bg_inds.size >0: fg_rois_per_image = min(fg_rois_per_image,fg_inds.size) fg_inds = npr.choice(fg_inds,size=int(fg_rois_per_image),replace=False) # 每张图片的背景roi数为总roi-前景数 bg_rois_per_image = rois_per_image - fg_rois_per_image # 如果背景数量少于每张图片背景数,replace置为True,即可以对同一元素反复选取 to_replace = bg_inds.size < bg_rois_per_image bg_inds = npr.choice(bg_inds,size=int(bg_rois_per_image),replace=to_replace) elif fg_inds.size>0: # 没有背景roi,如果前景少于总的 允许重复 to_replace = fg_inds.size < rois_per_image fg_inds = npr.choice(fg_inds,size=int(rois_per_image),replace=to_replace) fg_rois_per_image = rois_per_image elif bg_inds.size>0: # 没有前景,如果背景少于总的,允许重复 to_replace = bg_inds.size < rois_per_image bg_inds = npr.choice(bg_inds,size=int(rois_per_image),replace=to_replace) fg_rois_per_image = 0 else: # 否则 在线调试代码 print(1) import pdb pdb.set_trace() # 刚才选择的前景和背景序号 keep_inds = np.append(fg_inds,bg_inds) labels = labels[keep_inds] # 将背景标签置为0 labels[int(fg_rois_per_image):] =0 rois = all_rois[keep_inds] roi_scores = all_scores[keep_inds] # 返回目标数据框,标签加四个回归数据目标tx ty tw th bbox_target_data = _compute_targets(rois[:,1:5],gt_boxes[gt_assignment[keep_inds],:4],labels) # 返回有类别的目标框以及内部权重 bbox_targets,bbox_inside_weights = _get_bbox_regression_labels(bbox_target_data,num_classes) return labels,rois,roi_scores,bbox_targets,bbox_inside_weights
def _get_proposal_clusters(all_rois, proposals, im_labels): """Generate a random sample of RoIs comprising foreground and background examples. """ num_images, num_classes = im_labels.shape assert num_images == 1, 'batch size shoud be equal to 1' # overlaps: (rois x gt_boxes) gt_boxes = proposals['gt_boxes'] gt_labels = proposals['gt_classes'] #gt_scores = proposals['gt_scores'] overlaps = bbox_overlaps( all_rois.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_labels[gt_assignment, 0] # cls_loss_weights = gt_scores[gt_assignment, 0] # # Select foreground RoIs as those with >= FG_THRESH overlap # fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # # # Select background RoIs as those with < FG_THRESH overlap # bg_inds = np.where(max_overlaps < cfg.TRAIN.FG_THRESH)[0] # # ig_inds = np.where(max_overlaps < cfg.TRAIN.BG_THRESH)[0] # cls_loss_weights[ig_inds] = 0.0 # # labels[bg_inds] = 0 # gt_assignment[bg_inds] = -1 # # img_cls_loss_weights = np.zeros(gt_boxes.shape[0], dtype=np.float32) # pc_probs = np.zeros(gt_boxes.shape[0], dtype=np.float32) # pc_labels = np.zeros(gt_boxes.shape[0], dtype=np.int32) # pc_count = np.zeros(gt_boxes.shape[0], dtype=np.int32) # # for i in xrange(gt_boxes.shape[0]): # po_index = np.where(gt_assignment == i)[0] # img_cls_loss_weights[i] = np.sum(cls_loss_weights[po_index]) # pc_labels[i] = gt_labels[i, 0] # pc_count[i] = len(po_index) # pc_probs[i] = np.average(cls_prob[po_index, pc_labels[i]]) return max_overlaps, labels
def forward(self, classifications, anchors, annotations, iou_thresh=0.5): losses = [] batch_size = classifications.shape[0] for j in range(batch_size): classification = classifications[j, :, :] bbox_annotation = annotations[j, :, :] bbox_annotation = bbox_annotation[bbox_annotation[:, -1] != -1] if bbox_annotation.shape[0] == 0: losses.append(torch.tensor(0).float().cuda()) continue classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) indicator = bbox_overlaps( min_area_square(anchors[j, :, :]), min_area_square(bbox_annotation[:, :-1]) ) overlaps = rbox_overlaps( anchors[j, :, :].cpu().numpy(), bbox_annotation[:, :-1].cpu().numpy(), indicator.cpu().numpy(), thresh=1e-1 ) if not torch.is_tensor(overlaps): overlaps = torch.from_numpy(overlaps).cuda() iou_max, iou_argmax = torch.max(overlaps, dim=1) targets = (torch.ones(classification.shape) * -1).cuda() targets[torch.lt(iou_max, 0.4), :] = 0 positive_indices = torch.ge(iou_max, iou_thresh) num_positive_anchors = positive_indices.sum() assigned_annotations = bbox_annotation[iou_argmax, :] targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[positive_indices, -1].long()] = 1 alpha_factor = torch.ones(targets.shape).cuda() * self.alpha alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, self.gamma) bin_cross_entropy = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) cls_loss = focal_weight * bin_cross_entropy cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda()) losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0)) return torch.stack(losses).mean(dim=0, keepdim=True)
def bbox_vote(dets_NMS, dets_all, thresh=0.5): dets_voted = np.zeros_like( dets_NMS) # Empty matrix with the same shape and type _overlaps = bbox_overlaps( np.ascontiguousarray(dets_NMS[:, 0:4], dtype=np.float), np.ascontiguousarray(dets_all[:, 0:4], dtype=np.float)) # for each survived box for i, det in enumerate(dets_NMS): dets_overlapped = dets_all[np.where(_overlaps[i, :] >= thresh)[0]] assert (len(dets_overlapped) > 0) boxes = dets_overlapped[:, 0:4] scores = dets_overlapped[:, 4] out_box = np.dot(scores, boxes) dets_voted[i][0:4] = out_box / sum(scores) # Weighted bounding boxes dets_voted[i][4] = det[4] # Keep the original score # Weighted scores (if enabled) if cfg.TEST.BBOX_VOTE_N_WEIGHTED_SCORE > 1: n_agreement = cfg.TEST.BBOX_VOTE_N_WEIGHTED_SCORE w_empty = cfg.TEST.BBOX_VOTE_WEIGHT_EMPTY n_detected = len(scores) if n_detected >= n_agreement: top_scores = -np.sort(-scores)[:n_agreement] new_score = np.average(top_scores) else: new_score = np.average(scores) * ( n_detected * 1.0 + (n_agreement - n_detected) * w_empty) / n_agreement dets_voted[i][4] = min(new_score, dets_voted[i][4]) return dets_voted
def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride=[ 16, ], anchor_scales=[ 16, ]): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. Parameters ---------- rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] gt_ishard: (G, 1), 1 or 0 indicates difficult or not dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0 im_info: a list of [image_height, image_width, scale_ratios] _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform) that are the regression objectives rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg, beacuse the numbers of bgs and fgs mays significiantly different """ _anchors = generate_anchors( scales=np.array(anchor_scales)) #生成基本的anchor,一共9个 _num_anchors = _anchors.shape[0] #9个anchor if DEBUG: print('anchors:') print(_anchors) print('anchor shapes:') print( np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], ))) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) #height, width = rpn_cls_score.shape[1:3] im_info = im_info[0] #图像的高宽及通道数 #在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标 # Algorithm: # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] #feature-map的高宽 if DEBUG: print('AnchorTargetLayer: height', height, 'width', width) print('') print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) print('height, width: ({}, {})'.format(height, width)) print('rpn: gt_boxes.shape', gt_boxes.shape) print('rpn: gt_boxes', gt_boxes) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) # in W H order # K is H x W shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() #生成feature-map和真实image上anchor之间的偏移量 # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors #9个anchor K = shifts.shape[0] #50*37,feature-map的宽乘高的大小 all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) #相当于复制宽高的维度,然后相加 all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image #仅保留那些还在图像内部的anchor,超出图像的都删掉 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] #保留那些在图像内的anchor if DEBUG: print('anchors.shape', anchors.shape) #至此,anchor准备好了 #-------------------------------------------------------------- # label: 1 is positive, 0 is negative, -1 is dont care # (A) labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) #初始化label,均为-1 # overlaps between the anchors and the gt boxes # overlaps (ex, gt), shape is A x G #计算anchor和gt-box的overlap,用来给anchor上标签 overlaps = bbox_overlaps(np.ascontiguousarray( anchors, dtype=np.float), np.ascontiguousarray( gt_boxes, dtype=np.float)) #假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组 # 存放每一个anchor和每一个gtbox之间的overlap argmax_overlaps = overlaps.argmax( axis=1) # (A)#找到和每一个gtbox,overlap最大的那个anchor max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax( axis=0) # G#找到每个位置上9个anchor中与gtbox,overlap最大的那个 gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 #先给背景上标签,小于0.3overlap的 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 #每个位置上的9个anchor中overlap最大的认为是前景 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 #overlap大于0.7的认为是前景 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # preclude dontcare areas if dontcare_areas is not None and dontcare_areas.shape[ 0] > 0: #这里我们暂时不考虑有doncare_area的存在 # intersec shape is D x A intersecs = bbox_intersections( np.ascontiguousarray(dontcare_areas, dtype=np.float), # D x 4 np.ascontiguousarray(anchors, dtype=np.float) # A x 4 ) intersecs_ = intersecs.sum(axis=0) # A x 1 labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1 #这里我们暂时不考虑难样本的问题 # preclude hard samples that are highly occlusioned, truncated or difficult to see if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[ 0] > 0: assert gt_ishard.shape[0] == gt_boxes.shape[0] gt_ishard = gt_ishard.astype(int) gt_hardboxes = gt_boxes[gt_ishard == 1, :] if gt_hardboxes.shape[0] > 0: # H x A hard_overlaps = bbox_overlaps( np.ascontiguousarray(gt_hardboxes, dtype=np.float), # H x 4 np.ascontiguousarray(anchors, dtype=np.float)) # A x 4 hard_max_overlaps = hard_overlaps.max(axis=0) # (A) labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1 max_intersec_label_inds = hard_overlaps.argmax(axis=1) # H x 1 labels[max_intersec_label_inds] = -1 # # subsample positive labels if we have too many #对正样本进行采样,如果正样本的数量太多的话 # 限制正样本的数量不超过128个 #TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) #随机去除掉一些正样本 labels[disable_inds] = -1 #变为-1 # subsample negative labels if we have too many #对负样本进行采样,如果负样本的数量太多的话 # 正负样本总数是256,限制正样本数目最多128, # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本 num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #print "was %s inds, disabling %s, now %s inds" % ( #len(bg_inds), len(disable_inds), np.sum(labels == 0)) # 至此, 上好标签,开始计算rpn-box的真值 #-------------------------------------------------------------- bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets( anchors, gt_boxes[argmax_overlaps, :]) #根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) #内部权重,前景就给1,其他是0 bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: #暂时使用uniform 权重,也就是正样本是1,负样本是0 # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) + 1 # positive_weights = np.ones((1, 4)) * 1.0 / num_examples # negative_weights = np.ones((1, 4)) * 1.0 / num_examples positive_weights = np.ones((1, 4)) negative_weights = np.zeros((1, 4)) else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / (np.sum(labels == 1)) + 1) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / (np.sum(labels == 0)) + 1) bbox_outside_weights[labels == 1, :] = positive_weights #外部权重,前景是1,背景是0 bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: _sums += bbox_targets[labels == 1, :].sum(axis=0) _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts += np.sum(labels == 1) means = _sums / _counts stds = np.sqrt(_squared_sums / _counts - means**2) print('means:') print(means) print('stdevs:') print(stds) # map up to original set of anchors # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来 labels = _unmap(labels, total_anchors, inds_inside, fill=-1) #这些anchor的label是-1,也即dontcare bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) #这些anchor的真值是0,也即没有值 bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) #内部权重以0填充 bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) #外部权重以0填充 if DEBUG: print('rpn: max max_overlap', np.max(max_overlaps)) print('rpn: num_positive', np.sum(labels == 1)) print('rpn: num_negative', np.sum(labels == 0)) _fg_sum += np.sum(labels == 1) _bg_sum += np.sum(labels == 0) _count += 1 print('rpn: num_positive avg', _fg_sum / _count) print('rpn: num_negative avg', _bg_sum / _count) # labels labels = labels.reshape((1, height, width, A)) #reshap一下label rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4))#reshape rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def forward(self, bottom, top): assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) h = [] w = [] for i in range(5): height, width = bottom[i].data.shape[-2:] h.append(height) w.append(width) # GT boxes (x1, y1, x2, y2, label) gt_boxes = bottom[5].data # im_info im_info = bottom[6].data[0, :] all_anchors_list = [] inds_inside_list = [] total_anchors = 0 feat_strides = self._feat_stride ratios = self._ratios scales = self._scales fpn_args = [] fpn_anchors_fid = np.zeros(0).astype(int) fpn_anchors = np.zeros([0, 4]) fpn_labels = np.zeros(0) fpn_inds_inside = [] for feat_id in range(len(feat_strides)): # len(scales.shape) == 1 just for backward compatibility, will remove in the future base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios, scales=scales) num_anchors = base_anchors.shape[0] feat_height = h[feat_id] feat_width = w[feat_id] # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_strides[feat_id] shift_y = np.arange(0, feat_height) * feat_strides[feat_id] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -self._allowed_border) & (all_anchors[:, 1] >= -self._allowed_border) & (all_anchors[:, 2] < im_info[1] + self._allowed_border) & (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care # for sigmoid classifier, ignore the 'background' class labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside))) fpn_anchors = np.vstack((fpn_anchors, anchors)) fpn_labels = np.hstack((fpn_labels, labels)) fpn_inds_inside.append(inds_inside) fpn_args.append([feat_height, feat_width, A, total_anchors]) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(fpn_anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(fpn_anchors)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap fpn_labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: fpn_labels[:] = 0 # subsample positive labels if we have too many num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCHSIZE == -1 else int( cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(fpn_labels >= 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] fpn_labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = fpn_labels.shape[ 0] if cfg.TRAIN.RPN_BATCHSIZE == -1 else cfg.TRAIN.RPN_BATCHSIZE - np.sum( fpn_labels >= 1) bg_inds = np.where(fpn_labels == 0)[0] fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum())) # if balance_scale_bg: # num_bg_scale = num_bg / len(feat_strides) # for feat_id in range(0, len(feat_strides)): # bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id]) & (bg_inds < fpn_anchors_fid[feat_id+1])] # if len(bg_ind_scale) > num_bg_scale: # disable_inds = npr.choice(bg_ind_scale, size=(len(bg_ind_scale) - num_bg_scale), replace=False) # fpn_labels[disable_inds] = -1 # else: if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] fpn_labels[disable_inds] = -1 fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32) if gt_boxes.size > 0: fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform( fpn_anchors[fpn_labels >= 1, :], gt_boxes[argmax_overlaps[fpn_labels >= 1], :4]) # fpn_bbox_targets[:] = bbox_transform(fpn_anchors, gt_boxes[argmax_overlaps, :4]) # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS) fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32) fpn_bbox_weights[fpn_labels >= 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) fpn_bbox_outside_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(fpn_labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(fpn_labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(fpn_labels == 0)) fpn_bbox_outside_weights[fpn_labels == 1, :] = positive_weights fpn_bbox_outside_weights[fpn_labels == 0, :] = negative_weights label_list = [] bbox_target_list = [] bbox_weight_list = [] bbox_outside_weight_list = [] for feat_id in range(0, len(feat_strides)): feat_height, feat_width, A, total_anchors = fpn_args[feat_id] # map up to original set of anchors labels = _unmap( fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=-1) bbox_targets = _unmap(fpn_bbox_targets[ fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=0) bbox_weights = _unmap(fpn_bbox_weights[ fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=0) bbox_outside_weights = _unmap(fpn_bbox_outside_weights[ fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=0) labels = labels.reshape( (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape( (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_targets = bbox_targets.reshape((1, A * 4, -1)) bbox_weights = bbox_weights.reshape( (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) bbox_weights = bbox_weights.reshape((1, A * 4, -1)) bbox_outside_weights = bbox_outside_weights.reshape( (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) bbox_outside_weights = bbox_outside_weights.reshape((1, A * 4, -1)) label_list.append(labels) bbox_target_list.append(bbox_targets) bbox_weight_list.append(bbox_weights) bbox_outside_weight_list.append(bbox_outside_weights) # label.update({'label_p' + str(feat_id + feat_id_start): labels, # 'bbox_target_p' + str(feat_id + feat_id_start): bbox_targets, # 'bbox_weight_p' + str(feat_id + feat_id_start): bbox_weights}) labels = np.concatenate(label_list, axis=1) bbox_targets = np.concatenate(bbox_target_list, axis=2) bbox_inside_weights = np.concatenate(bbox_weight_list, axis=2) bbox_outside_weights = np.concatenate(bbox_outside_weight_list, axis=2) # print bbox_targets.shape # print bbox_inside_weights.shape # print bbox_outside_weights.shape # print labels.shape top[0].reshape(*labels.shape) top[0].data[...] = labels # bbox_targets top[1].reshape(*bbox_targets.shape) top[1].data[...] = bbox_targets # bbox_inside_weights top[2].reshape(*bbox_inside_weights.shape) top[2].data[...] = bbox_inside_weights # bbox_outside_weights top[3].reshape(*bbox_outside_weights.shape) top[3].data[...] = bbox_outside_weights
def _sample_rois(all_rois, all_scores, gt_boxes, gt_texts, gt_pair, fg_rois_per_image, rois_per_image, num_classes, gt_rois_per_image): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) no_gt_size = all_rois.size(0) - gt_rois_per_image #print("all_rois") #print(all_rois) overlaps = bbox_overlaps(all_rois[:, 1:5].data, gt_boxes[:, :4].data) max_overlaps, gt_assignment = overlaps.max(1) labels = gt_boxes[gt_assignment, [4]] texts = [gt_texts[i] for i in gt_assignment] pair = torch.LongTensor([int(gt_pair[i]) for i in gt_assignment]).cuda() pair_ = torch.FloatTensor([int(gt_pair[i]) for i in gt_assignment]).cuda() ##################################################################### ##### Till now, the GT class and other info can be implemented ###### ##### into the proposal regions, ################################### ##################################################################### ''' print("pair") print(pair) print("labels") print(labels) print("all_scores") print(all_scores) ''' ''' bar_inds = (labels == 9).nonzero().view(-1) print("bar_inds") print(bar_inds) ''' ''' bar_inds = ((labels == 9)+(pair == 0)==2).nonzero().view(-1) #bar_inds = ((pair_ == 0)).nonzero().view(-1) print("bar_inds") print(bar_inds) ''' # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = (max_overlaps[:no_gt_size] >= cfg.TRAIN.FG_THRESH).nonzero().view(-1) # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = ((max_overlaps[:no_gt_size] < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps[:no_gt_size] >= cfg.TRAIN.BG_THRESH_LO) == 2 ).nonzero().view(-1) # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.numel() > 0 and bg_inds.numel() > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel()) fg_inds = fg_inds[torch.from_numpy( npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().cuda()] bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.numel() < bg_rois_per_image bg_inds = bg_inds[torch.from_numpy( npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().cuda()] elif fg_inds.numel() > 0: to_replace = fg_inds.numel() < rois_per_image fg_inds = fg_inds[torch.from_numpy( npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] fg_rois_per_image = rois_per_image elif bg_inds.numel() > 0: to_replace = bg_inds.numel() < rois_per_image bg_inds = bg_inds[torch.from_numpy( npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] fg_rois_per_image = 0 else: import pdb pdb.set_trace() ''' if (gt_rois_per_image>0): gt_inds = torch.arange(no_gt_size,gt_rois_per_image+no_gt_size) ''' gt_inds = torch.arange(no_gt_size, gt_rois_per_image + no_gt_size).long().cuda() # The indices that we're selecting (both fg and bg) keep_inds = torch.cat( [fg_inds[:fg_rois_per_image - gt_rois_per_image], gt_inds, bg_inds], 0) # Select sampled values from various arrays: labels = labels[keep_inds].contiguous() texts = [texts[i] for i in keep_inds] pair = torch.LongTensor([int(pair[i]) for i in keep_inds]) # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_image):] = 0 #print("after clamp") #print(labels) rois = all_rois[keep_inds].contiguous() roi_scores = all_scores[keep_inds].contiguous() bbox_target_data = _compute_targets( rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, texts, pair, rois, roi_scores, bbox_targets, bbox_inside_weights
def anchor_target_layer_torch(gt_boxes, gt_boxes_dc, info, all_anchors, num_anchors, height, width, dev): """Same as the anchor target layer in original Fast/er RCNN """ A = num_anchors #print('num anchors') #print(num_anchors) #print(im_info[1]) #print(im_info[0]) total_anchors = all_anchors.shape[0] K = total_anchors / num_anchors # allow boxes to sit over the edge by a small amount _allowed_border = 0 # only keep anchors inside the image inds_inside = torch.where( (all_anchors[:, 0] >= info[0] - _allowed_border) & #width_max (all_anchors[:, 1] >= info[2] - _allowed_border) & #height_min (all_anchors[:, 2] < info[1] + _allowed_border) & # width_max (all_anchors[:, 3] < info[3] + _allowed_border) # height_max )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care #Subset of anchors within image boundary labels = torch.full((len(inds_inside), ), -1, dtype=torch.int64).to(device=dev) #labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) #from utils.bbox import bbox_overlaps overlaps = bbox_overlaps(anchors.contiguous(), gt_boxes.contiguous()) if cfg.TRAIN.IGNORE_DC: overlaps_dc = bbox_overlaps(anchors.contiguous(), gt_boxes_dc.contiguous()) overlaps_dc_idx = torch.argwhere(overlaps_dc > cfg.TRAIN.DC_THRESH) labels[overlaps_dc_idx[:, 0]] = -1 #overlaps: (N, K) overlap between boxes and query_boxes argmax_overlaps = overlaps.argmax(dim=1) #grab subset of 2D array to only get [:,max_overlap_index] max_overlaps = overlaps[torch.arange(len(inds_inside)).to(device=dev), argmax_overlaps] #max_overlaps_2 = torch.index_select(overlaps, 0, argmax_overlaps) gt_argmax_overlaps = overlaps.argmax(dim=0) #grab same subset of 2D array to get corresponding GT boxes with their max overlap counterpart gt_max_overlaps = overlaps[gt_argmax_overlaps, torch.arange(overlaps.shape[1]).to(device=dev)] gt_max_overlaps = torch.clamp(gt_max_overlaps, torch.finfo(torch.float32).eps, float('inf')) gt_argmax_overlaps = torch.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them # first set the negatives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap #gt_argmax_overlaps is an index subset of the anchors that max overlap with a gt box labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU #anything else needs a large overlap as well nz_max_overlaps = max_overlaps.nonzero() labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = torch.where(labels == 1)[0] #TODO: Really, randomly select indices to disable? Why not worst ones? At least dont do this for the argmax.. #If too many foreground entries if len(fg_inds) > num_fg: perm = torch.randperm(fg_inds.numel(), device=dev)[num_fg:] fg_inds_subset = fg_inds[perm] labels[fg_inds_subset] = -1 # subsample negative labels if we have too many fg_sum = torch.sum(labels == 1) num_bg = cfg.TRAIN.RPN_BATCHSIZE - fg_sum bg_inds = torch.where(labels == 0)[0] if len(bg_inds) > num_bg: perm = torch.randperm(bg_inds.numel(), device=dev)[num_bg:] bg_inds_subset = bg_inds[perm] labels[bg_inds_subset] = -1 #Find target bounding boxes #bbox_targets = torch.zeros((len(inds_inside), 4), dtype=torch.float32).to(device=dev) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) #print('GT BOXES') #print(bbox_targets.shape) bbox_inside_weights = torch.zeros((len(inds_inside), 4), dtype=torch.float32).to(device=dev) # only the positive ones have regression targets bbox_inside_weights[labels == 1, :] = torch.from_numpy( np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS, dtype=np.float32)).to(device=dev) bbox_outside_weights = torch.zeros((len(inds_inside), 4), dtype=torch.float32).to(device=dev) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples is a max of 256 by default num_examples = torch.sum(labels >= 0) #positive_weights = torch.ones((1, 4)) * 1.0 / num_examples #negative_weights = torch.ones((1, 4)) * 1.0 / num_examples positive_weights = 1.0 / float(num_examples) negative_weights = 1.0 / float(num_examples) else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) #TODO: Broken positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / torch.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / torch.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights #print('bbox weights') #print(bbox_outside_weights) #print(bbox_inside_weights) # map up to original set of anchors labels = _unmap(labels.type(dtype=torch.float32), total_anchors, inds_inside, fill=-1, dev=dev) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0, dev=dev) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0, dev=dev) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0, dev=dev) # labels labels = labels.reshape((1, height, width, A)).permute(0, 3, 1, 2) #labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def evaluate_recall(self, candidate_boxes=None, thresholds=None, area='all', limit=None): """Evaluate detection proposal recall metrics. Returns: results: dictionary of results with keys 'ar': average recall 'recalls': vector recalls at each IoU overlap threshold 'thresholds': vector of IoU overlap thresholds 'gt_overlaps': vector of all ground-truth overlaps """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7 } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], # 512-inf ] assert area in areas, 'unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for i in range(self.num_images): # Checking for max_overlaps == 1 avoids including crowd annotations # (...pretty hacking :/) max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max( axis=1) gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] gt_boxes = self.roidb[i]['boxes'][gt_inds, :] gt_areas = self.roidb[i]['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) if candidate_boxes is None: # If candidate_boxes is not supplied, the default is to use the # non-ground-truth boxes from this roidb non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0] boxes = self.roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in range(gt_boxes.shape[0]): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert (gt_ovr >= 0) # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert (_gt_overlaps[j] == gt_ovr) # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { 'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps }
def _build_graph(boxes, iou_threshold): """Build graph based on box IoU""" overlaps = bbox_overlaps(boxes.astype(dtype=np.float32, copy=False), boxes.astype(dtype=np.float32, copy=False)) return (overlaps > iou_threshold).astype(np.float32)
def _sample_rois(all_rois, all_scores, gt_boxes, gt_weights, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( all_rois[:, 1:5].data, gt_boxes[:, :4].data) max_overlaps, gt_assignment = overlaps.max(1) labels = gt_boxes[gt_assignment, [4]] ''' add weights items by pseudo scores ''' gt_weights = gt_weights.detach().data gt_weights_tile = gt_weights.view(1,-1).expand_as(overlaps) loss_weights = gt_weights_tile[torch.arange(0,overlaps.size(0)).long(), gt_assignment] #print((gt_assignment==1).sum()) #print(loss_weights) ''' end of modification ''' # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1) # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = ((max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1) # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.numel() > 0 and bg_inds.numel() > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel()) fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().cuda()] bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.numel() < bg_rois_per_image bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().cuda()] elif fg_inds.numel() > 0: to_replace = fg_inds.numel() < rois_per_image fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] fg_rois_per_image = rois_per_image elif bg_inds.numel() > 0: to_replace = bg_inds.numel() < rois_per_image bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] fg_rois_per_image = 0 else: import pdb pdb.set_trace() # The indices that we're selecting (both fg and bg) keep_inds = torch.cat([fg_inds, bg_inds], 0) # Select sampled values from various arrays: labels = labels[keep_inds].contiguous() # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_image):] = 0 rois = all_rois[keep_inds].contiguous() roi_scores = all_scores[keep_inds].contiguous() bbox_target_data = _compute_targets( rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) ''' modified by jiajie ''' #loss_weights = loss_weights[keep_inds].contiguous() + 1.0 loss_weights = loss_weights[keep_inds].contiguous() loss_weights[int(fg_rois_per_image):] = 1.0 ''' end of modification ''' #bbox_outside_weights[labels == 1, :] = loss_weights[labels==1].reshape(-1,1) * positive_weights #bbox_outside_weights[labels == 0, :] = loss_weights[labels==0].reshape(-1,1) * negative_weights return labels, rois, roi_scores, bbox_targets, bbox_inside_weights, loss_weights
def _write_detect_results_file(self, all_boxes, conf_thresh, iou_thresh, net_name): #added by yuesongtian filename = cfg.ROOT_DIR + '/output/FP_Net_end2end/voc_2007_test/' + str( len(self.classes)) + '_' + net_name + '.txt' print 'Writing detection results to {}'.format(filename) if not os.path.exists(filename): os.system(r'touch %s' % filename) with open(filename, 'wt') as f: for im_ind, index in enumerate(self.image_index): f.write('{:s}'.format(index)) for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue max_gt_overlaps = self.roidb[im_ind][ 'gt_overlaps'].toarray().max(axis=1) gt_inds = np.where( (self.roidb[im_ind]['gt_classes'] == cls_ind) & (max_gt_overlaps == 1))[0] gt_boxes = self.roidb[im_ind]['boxes'][gt_inds, :] gt_areas = self.roidb[im_ind]['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= 0**2) & (gt_areas <= 1e5**2))[0] gt_boxes = gt_boxes[valid_gt_inds, :] if (len(all_boxes[cls_ind][im_ind]) == 0): continue inds = np.where( all_boxes[cls_ind][im_ind][:, 4] > conf_thresh)[0] dets_ = all_boxes[cls_ind][im_ind][inds, :] if dets_ == [] or gt_boxes.shape[0] == 0: continue overlaps = bbox_overlaps(dets_.astype(np.float), gt_boxes.astype(np.float)) if overlaps.shape[0] == 1: #print 'overlaps is ', overlaps, overlaps.shape[0], dets_.shape, gt_boxes.shape argmax_overlaps = overlaps.argmax(axis=0) max_overlaps = overlaps.max(axis=0) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() if gt_ovr >= iou_thresh: box_ind = argmax_overlaps[gt_ind] f.write( ' {:.1f}({:.3f}) {:.1f} {:.1f} {:.1f} {:.1f}' .format(cls_ind, dets_[box_ind, -1], dets_[box_ind, 0] + 1, dets_[box_ind, 1] + 1, dets_[box_ind, 2] + 1, dets_[box_ind, 3] + 1)) if (gt_boxes.shape[0] == 1): f.write('*') f.write(' {:.1f} {:.1f} {:.1f} {:.1f}'.format( gt_boxes[0, 0] + 1, gt_boxes[0, 1] + 1, gt_boxes[0, 2] + 1, gt_boxes[0, 3] + 1)) else: f.write('*') for gt_index in range(gt_boxes.shape[0]): f.write( ' {:.1f} {:.1f} {:.1f} {:.1f}'.format( gt_boxes[gt_index, 0] + 1, gt_boxes[gt_index, 1] + 1, gt_boxes[gt_index, 2] + 1, gt_boxes[gt_index, 3] + 1)) elif overlaps.shape[0] > 1: for j in xrange(gt_boxes.shape[0]): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() if gt_ovr < 0: break assert (gt_ovr >= 0) box_ind = argmax_overlaps[gt_ind] if gt_ovr < iou_thresh: overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 continue # write box > iou_thresh to f f.write( ' {:.1f}({:.3f}) {:.1f} {:.1f} {:.1f} {:.1f}' .format(cls_ind, dets_[box_ind, -1], dets_[box_ind, 0] + 1, dets_[box_ind, 1] + 1, dets_[box_ind, 2] + 1, dets_[box_ind, 3] + 1)) # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 if (gt_boxes.shape[0] == 1): f.write('*') f.write(' {:.1f} {:.1f} {:.1f} {:.1f}'.format( gt_boxes[0, 0] + 1, gt_boxes[0, 1] + 1, gt_boxes[0, 2] + 1, gt_boxes[0, 3] + 1)) else: f.write('*') for gt_index in range(gt_boxes.shape[0]): f.write( ' {:.1f} {:.1f} {:.1f} {:.1f}'.format( gt_boxes[gt_index, 0] + 1, gt_boxes[gt_index, 1] + 1, gt_boxes[gt_index, 2] + 1, gt_boxes[gt_index, 3] + 1)) else: if (gt_boxes.shape[0] == 1): f.write('*') f.write(' {:.1f} {:.1f} {:.1f} {:.1f}'.format( gt_boxes[0, 0] + 1, gt_boxes[0, 1] + 1, gt_boxes[0, 2] + 1, gt_boxes[0, 3] + 1)) else: f.write('*') for gt_index in range(gt_boxes.shape[0]): f.write( ' {:.1f} {:.1f} {:.1f} {:.1f}'.format( gt_boxes[gt_index, 0] + 1, gt_boxes[gt_index, 1] + 1, gt_boxes[gt_index, 2] + 1, gt_boxes[gt_index, 3] + 1)) f.write('\n') f.close() return filename
def anchor_target_layer(gt_boxes, gt_boxes_dc, info, _feat_stride, all_anchors, num_anchors, height, width): """Same as the anchor target layer in original Fast/er RCNN """ A = num_anchors #print('num anchors') #print(num_anchors) #print(info[1]) #print(info[0]) total_anchors = all_anchors.shape[0] K = total_anchors / num_anchors # allow boxes to sit over the edge by a small amount _allowed_border = 0 # only keep anchors inside the frame #TODO: Torchify #TODO: Subtract minimum value between GT boxes and anchors as to not get the overlaps issue (maybe also track and see it happen?) inds_inside = np.where( (all_anchors[:, 0] >= info[0] - _allowed_border) & #width_max (all_anchors[:, 1] >= info[2] - _allowed_border) & #height_min (all_anchors[:, 2] < info[1] + _allowed_border) & # width_max (all_anchors[:, 3] < info[3] + _allowed_border) # height_max )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care #Subset of anchors within image boundary labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) #from utils.bbox import bbox_overlaps overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) #np.set_printoptions(threshold=np.inf) #print('----------------------------------------------') #overlaps_trimmed = overlaps[~np.all(overlaps == 0, axis=1)] #print(overlaps_trimmed) #print('----------------------------------------------') if cfg.TRAIN.IGNORE_DC: overlaps_dc = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes_dc, dtype=np.float)) overlaps_dc_idx = np.argwhere(overlaps_dc > cfg.TRAIN.DC_THRESH) labels[overlaps_dc_idx[:, 0]] = -1 #overlaps: (N, K) overlap between boxes and query_boxes argmax_overlaps = overlaps.argmax( axis=1) #Best fiting GT for each anchor (1,N) gt_argmax_overlaps = overlaps.argmax( axis=0) #Best fitting anchor for each GT box (K,1) #grab subset of 2D array to only get [:,max_overlap_index] #max_overlaps = overlaps.take(argmax_overlaps,axis=1) #np.set_printoptions(threshold=np.inf) #print(argmax_overlaps) #print(overlaps) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] #max_overlaps = overlaps[:, argmax_overlaps] #max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] #grab same subset of 2D array to get corresponding GT boxes with their max overlap counterpart #gt_max_overlaps = overlaps[gt_argmax_overlaps, # np.arange(overlaps.shape[1])] #TODO: How the f**k does this work #gt_max_overlaps = overlaps[gt_argmax_overlaps,:] gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them # first set the negatives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap #gt_argmax_overlaps is an index subset of the anchors that max overlap with a gt box labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU #anything else needs a large overlap as well #TODO: Distance based overlap threshold? labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] #TODO: Really, randomly select indices to disable? Why not worst ones? At least dont do this for the argmax.. #If too many foreground entries if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #Find target bounding boxes bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) #print('GT BOXES') #print(bbox_targets.shape) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) #Create a mask where labels == 1 bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) #Sample weighting is turned off if int(cfg.TRAIN.RPN_POSITIVE_WEIGHT) == -1: # uniform weighting of examples (given non-uniform sampling) num_examples is a max of 256 by default num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights #print('bbox weights') #print(bbox_outside_weights) #print(bbox_inside_weights) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) #labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def evaluate_recall(self, candidate_boxes=None, thresholds=None, area='all', limit=None): """Evaluate detection proposal recall metrics. Returns: results: dictionary of results with keys 'ar': average recall 'recalls': vector recalls at each IoU overlap threshold 'thresholds': vector of IoU overlap thresholds 'gt_overlaps': vector of all ground-truth overlaps """ # Record max overlap value for each gt box # Return vector of overlap values areas = {'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7} area_ranges = [[0 ** 2, 1e5 ** 2], # all [0 ** 2, 32 ** 2], # small [32 ** 2, 96 ** 2], # medium [96 ** 2, 1e5 ** 2], # large [96 ** 2, 128 ** 2], # 96-128 [128 ** 2, 256 ** 2], # 128-256 [256 ** 2, 512 ** 2], # 256-512 [512 ** 2, 1e5 ** 2], # 512-inf ] assert area in areas, 'unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for i in range(self.num_images): # Checking for max_overlaps == 1 avoids including crowd annotations # (...pretty hacking :/) max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1) gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] gt_boxes = self.roidb[i]['boxes'][gt_inds, :] gt_areas = self.roidb[i]['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) if candidate_boxes is None: # If candidate_boxes is not supplied, the default is to use the # non-ground-truth boxes from this roidb non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0] boxes = self.roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in range(gt_boxes.shape[0]): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert (gt_ovr >= 0) # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert (_gt_overlaps[j] == gt_ovr) # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps}
def get_refine_supervision(self, refine_prob, ss_boxes, image_level_label): ''' refine_prob: num_box x 20 or num_box x 21 ss_boxes; num_box x 4 image_level_label: 1 dim vector with 20 elements ''' cls_prob = refine_prob.data.cpu().numpy() #rois = ss_boxes.numpy() roi_per_image = cfg.TRAIN.MIL_BATCHSIZE if refine_prob.shape[1] == self._num_classes + 1: cls_prob = cls_prob[:, 1:] roi_labels = np.zeros([refine_prob.shape[0], self._num_classes + 1], dtype = np.int32) # num_box x 21 roi_labels[:,0] = 1 # the 0th elements is the bg roi_weights = np.zeros((refine_prob.shape[0], 1), dtype=np.float32) # num_box x 1 weights of the rois max_score_box = np.zeros((0, 4), dtype = np.float32) max_box_score = np.zeros((0, 1), dtype = np.float32) max_box_classes = np.zeros((0, 1), dtype = np.int32) #print('ss_boxes ', ss_boxes[:5,:]) for i in range(self._num_classes): if image_level_label[0, i] == 1: cls_prob_tmp = cls_prob[:, i] max_index = np.argmax(cls_prob_tmp) max_score_box = np.concatenate((max_score_box, ss_boxes[max_index, 1:].reshape(1, -1)), axis=0) max_box_classes = np.concatenate((max_box_classes, (i+1)*np.ones((1, 1), dtype=np.int32)), axis=0) max_box_score = np.concatenate((max_box_score, cls_prob_tmp[max_index]*np.ones((1, 1), dtype=np.float32)), axis=0) #print('image_level_labels ', image_level_label) #print('max_box_class ', max_box_classes) #print('max_box_score ', max_box_score) overlaps = bbox_overlaps(ss_boxes[:,1:], max_score_box) gt_assignment = overlaps.argmax(axis=1) max_over_laps = overlaps.max(axis=1) #print('max_over_laps', max_over_laps.max()) #print('over laps', overlaps.shape) roi_weights[:, 0] = max_box_score[gt_assignment, 0] labels = max_box_classes[gt_assignment, 0] fg_inds = np.where(max_over_laps > cfg.TRAIN.MIL_FG_THRESH)[0] roi_labels[fg_inds,labels[fg_inds]] = 1 roi_labels[fg_inds, 0] = 0 bg_inds = (np.array(max_over_laps >= cfg.TRAIN.MIL_BG_THRESH_LO, dtype=np.int32) + \ np.array(max_over_laps < cfg.TRAIN.MIL_BG_THRESH_HI, dtype=np.int32)==2).nonzero()[0] if len(fg_inds) > 0 and len(bg_inds) > 0: fg_rois_num = min(cfg.TRAIN.MIL_NUM_FG, len(fg_inds)) fg_inds = fg_inds[np.random.choice(np.arange(0, len(fg_inds)), size=int(fg_rois_num), replace=False)] bg_rois_num = min(cfg.TRAIN.MIL_NUM_BG, len(bg_inds)) bg_inds = bg_inds[np.random.choice(np.arange(0, len(bg_inds)), size=int(bg_rois_num), replace=False)] elif len(fg_inds) > 0: fg_rois_num = min(cfg.TRAIN.MIL_NUM_FG, len(fg_inds)) fg_inds = fg_inds[np.random.choice(np.arange(0, len(fg_inds)), size=int(fg_rois_num), replace=False)] elif len(bg_inds) > 0: bg_rois_num = min(cfg.TRAIN.MIL_NUM_BG, len(bg_inds)) bg_inds = bg_inds[np.random.choice(np.arange(0, len(bg_inds)), size=int(bg_rois_num), replace=False)] else: import pdb pdb.set_trace() # print(len(fg_inds), len(bg_inds)) keep_inds = np.concatenate([fg_inds, bg_inds]) return roi_labels[keep_inds, :], roi_weights[keep_inds,0].reshape(-1,1), keep_inds
def _sample_rois(all_rois, all_scores, gt_boxes, gt_masks, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. Return: - labels: (Nkp, ) - rois : (Nkp, 5), [0 x1 y1 x2 y2] - roi_scores : (Nkp, ) - bbox_targets: (Nkp, 4k) - bbox_inside_weights: (Nkp, 4k) """ # overlaps: (rois x gt_boxes) all_rois_data = all_rois.data gt_boxes_data = gt_boxes.data overlaps = bbox_overlaps(all_rois_data[:, 1:5], gt_boxes_data[:, :4]) max_overlaps, gt_assignment = overlaps.max(1) # cuda tensor labels = gt_boxes[gt_assignment, [4]] # cuda Variable # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1) # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = ( (max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1) # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.numel() > 0 and bg_inds.numel() > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel()) fg_inds = fg_inds[torch.from_numpy( npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().cuda()] bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.numel() < bg_rois_per_image bg_inds = bg_inds[torch.from_numpy( npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().cuda()] elif fg_inds.numel() > 0: to_replace = fg_inds.numel() < rois_per_image fg_inds = fg_inds[torch.from_numpy( npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] fg_rois_per_image = rois_per_image elif fg_inds.numel() == 0: # we always make fg_inds.numel() > 0 zeros = Variable(all_rois.data.new(gt_boxes.size(0), 1)) all_rois = torch.cat((all_rois, torch.cat( (zeros, gt_boxes[:, :-1]), 1)), 0) # not sure if it a wise appending, but anyway i am not using it all_scores = torch.cat((all_scores, zeros), 0) return _sample_rois(all_rois, all_scores, gt_boxes, gt_masks, fg_rois_per_image, rois_per_image, num_classes) # elif bg_inds.numel() > 0: # to_replace = bg_inds.numel() < rois_per_image # bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] # fg_rois_per_image = 0 else: import pdb pdb.set_trace() # The indices that we're selecting (both fg and bg) keep_inds = torch.cat([fg_inds, bg_inds], 0) # Select sampled values from various arrays: labels = labels[keep_inds].contiguous() # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_image):] = 0 rois = all_rois[keep_inds].contiguous() roi_scores = all_scores[keep_inds].contiguous() bbox_target_data = _compute_targets( rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) # Get masks, float (num_boxes, 14, 14) # corresponding to the selected boxes mask_targets = torch.FloatTensor(fg_inds.numel(), cfg.MASK_SIZE, cfg.MASK_SIZE).cuda() mix = 0 for i in fg_inds.cpu().numpy().tolist(): roi = all_rois_data[i] # tensor [xyxyc] cropped = gt_masks[gt_assignment[i], int(roi[2]):int(roi[4]) + 1, int(roi[1]):int(roi[3]) + 1] # uint8 {0,1} cropped = imresize(cropped, (cfg.MASK_SIZE, cfg.MASK_SIZE), interp='nearest') # still uint8 {0,1} cropped = cropped.astype(np.float32) # float32, range [0,1] mask_targets[mix, :, :] = torch.from_numpy(cropped).cuda() mix += 1 assert mask_targets.max() <= 1.0001 return labels, rois, roi_scores, bbox_targets, bbox_inside_weights, mask_targets
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = bottom[0].data.shape[-2:] # GT boxes (x1, y1, x2, y2, label) gt_boxes = bottom[1].data # im_info im_info = bottom[2].data[0, :] if DEBUG: print '' print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) print 'height, width: ({}, {})'.format(height, width) print 'rpn: gt_boxes.shape', gt_boxes.shape print 'rpn: gt_boxes', gt_boxes # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] all_anchors = (self._anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -self._allowed_border) & (all_anchors[:, 1] >= -self._allowed_border) & (all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width (all_anchors[:, 3] < im_info[0] + self._allowed_border) # height )[0] if DEBUG: print 'total_anchors', total_anchors print 'inds_inside', len(inds_inside) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors.shape', anchors.shape # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) if gt_boxes.shape[0] != 0: overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: labels.fill(0) # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #print "was %s inds, disabling %s, now %s inds" % ( #len(bg_inds), len(disable_inds), np.sum(labels == 0)) bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) if gt_boxes.shape[0] != 0: bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: self._sums += bbox_targets[labels == 1, :].sum(axis=0) self._squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0) self._counts += np.sum(labels == 1) means = self._sums / self._counts stds = np.sqrt(self._squared_sums / self._counts - means**2) print 'means:' print means print 'stdevs:' print stds # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) if DEBUG: if gt_boxes.shape[0] != 0: print 'rpn: max max_overlap', np.max(max_overlaps) else: print 'rpn: max max_overlap', 0 print 'rpn: num_positive', np.sum(labels == 1) print 'rpn: num_negative', np.sum(labels == 0) self._fg_sum += np.sum(labels == 1) self._bg_sum += np.sum(labels == 0) self._count += 1 print 'rpn: num_positive avg', self._fg_sum / self._count print 'rpn: num_negative avg', self._bg_sum / self._count # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) top[0].reshape(*labels.shape) top[0].data[...] = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) top[1].reshape(*bbox_targets.shape) top[1].data[...] = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) assert bbox_inside_weights.shape[2] == height assert bbox_inside_weights.shape[3] == width top[2].reshape(*bbox_inside_weights.shape) top[2].data[...] = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) assert bbox_outside_weights.shape[2] == height assert bbox_outside_weights.shape[3] == width top[3].reshape(*bbox_outside_weights.shape) top[3].data[...] = bbox_outside_weights
def _sample_rois_manually(gt_boxes_origin, fg_rois_per_image, rois_per_image, num_classes, gt_truncated, im_info): """Args: gt_boxes_origin: Variable, [gt_num, 5], [x1, y1, x2, y2, class_id] fg_rois_per_image: int, 64 rois_per_image: float, 256.0 num_classes: int, 21 gt_truncated: ndarray.bool, [gt_num] """ fg_num = fg_rois_per_image rois_per_image = int(rois_per_image) gt_boxes_origin = gt_boxes_origin.data.cpu() img_width = float(im_info[0]) img_height = float(im_info[1]) """Remove truncated gt_boxes""" gt_truncated = gt_truncated.astype(int) gt_truncated = torch.from_numpy(gt_truncated) truncated_idx = (gt_truncated == 0).nonzero().view(-1) if len(truncated_idx) != 0: gt_boxes = torch.index_select(gt_boxes_origin, 0, truncated_idx) untruncted_gt_num = len(gt_boxes) """get width and height of every untruncated gt_box""" width = gt_boxes[:, 2] - gt_boxes[:, 0] # x2-x1 height = gt_boxes[:, 3] - gt_boxes[:, 1] """for every untruncated gt_box:""" for i in range(untruncted_gt_num): # get the number of fg_rois that the ith gt should generate. if i == untruncted_gt_num - 1: fg_num_per_gt = fg_rois_per_image - ( untruncted_gt_num - 1) * int( fg_rois_per_image / untruncted_gt_num) else: fg_num_per_gt = int(fg_rois_per_image / untruncted_gt_num) # get the width and height delta. delta = torch.rand(fg_num_per_gt, 4) * 0.2 - 0.1 # [-0.1, 0.1) delta = delta * torch.FloatTensor( [width[i], height[i], width[i], height[i]]) if i == 0: fg_rois = delta + gt_boxes[i, :-1] labels = torch.ones(fg_num_per_gt) * gt_boxes[i, 4] else: fg_rois = torch.cat((fg_rois, delta + gt_boxes[i, :-1])) labels = torch.cat( (labels, torch.ones(fg_num_per_gt) * gt_boxes[i, 4])) """manage the boundary""" fg_rois[:, 0] = torch.max(torch.FloatTensor([0]), fg_rois[:, 0]) fg_rois[:, 1] = torch.min(torch.FloatTensor([img_width]), fg_rois[:, 1]) fg_rois[:, 2] = torch.max(torch.FloatTensor([0]), fg_rois[:, 2]) fg_rois[:, 3] = torch.min(torch.FloatTensor([img_height]), fg_rois[:, 3]) else: fg_num = 0 fg_rois = torch.FloatTensor() gt_boxes = torch.FloatTensor() labels = torch.FloatTensor() """v3.0: generate truncated_rois""" if len(gt_boxes) != 0: truncated_rois, truncated_label, truncated_rois_num = genarate_truncated_rois( gt_boxes, fg_rois_per_image) else: truncated_rois = torch.FloatTensor() truncated_label = torch.FloatTensor() truncated_rois_num = 0 """ generate bg_rois """ bg_num = rois_per_image - fg_num - truncated_rois_num x1_bg = (torch.rand(bg_num * 2) * img_width).type(torch.FloatTensor) y1_bg = (torch.rand(bg_num * 2) * img_height).type(torch.FloatTensor) if fg_num != 0: bg_width = torch.min(width) + torch.rand( bg_num * 2) * (torch.max(width) - torch.min(width)) bg_height = torch.min(height) + torch.rand( bg_num * 2) * (torch.max(height) - torch.min(height)) else: width_origin = gt_boxes_origin[:, 2] - gt_boxes_origin[:, 0] # x2-x1 height_origin = gt_boxes_origin[:, 3] - gt_boxes_origin[:, 1] bg_width = torch.min(width_origin) + torch.rand( bg_num * 2) * (torch.max(width_origin) - torch.min(width_origin)) bg_height = torch.min(height_origin) + torch.rand( bg_num * 2) * (torch.max(height_origin) - torch.min(height_origin)) x2_bg = x1_bg + bg_width y2_bg = y1_bg + bg_height bg_rois = torch.cat((torch.unsqueeze(x1_bg, 1), torch.unsqueeze( y1_bg, 1), torch.unsqueeze(x2_bg, 1), torch.unsqueeze(y2_bg, 1)), 1) """cannot overlap with every gt""" overlaps = bbox_overlaps(bg_rois, gt_boxes_origin[:, :-1]) max_overlaps, _ = overlaps.max(1) bg_inds = (max_overlaps == 0).nonzero().view(-1) if len(bg_inds) != 0: bg_rois = bg_rois[bg_inds] else: # Rare case: gt too large, no bg bg_rois = torch.unsqueeze(torch.FloatTensor([10, 10, 20, 20]), 0) # manage the bound bg_inds = (bg_rois[:, 0] >= 0).numpy() & (bg_rois[:, 1] <= img_width).numpy() & \ (bg_rois[:, 2] >= 0).numpy() & (bg_rois[:, 3] <= img_height).numpy() if max(bg_inds == 0): bg_rois = torch.unsqueeze(torch.FloatTensor([10, 10, 20, 20]), 0) bg_inds = np.asarray([1]) bg_inds = torch.FloatTensor(bg_inds.astype(float)).nonzero().view(-1) """select 256-64 bg randomly""" to_replace = bg_inds.numel() < bg_num bg_inds = bg_inds[torch.from_numpy( npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_num), replace=to_replace)).long()] bg_rois = bg_rois[bg_inds] """set return vars""" rois = torch.cat((fg_rois, truncated_rois, bg_rois), 0) rois = torch.cat((torch.zeros(len(rois), 1), rois), 1) # add 0s at first column. rois = Variable(rois.type(torch.cuda.FloatTensor), requires_grad=True) labels = torch.cat((labels, truncated_label, torch.zeros(bg_num))) labels = Variable(labels.type(torch.cuda.FloatTensor), requires_grad=False) roi_scores = Variable(torch.zeros(256, 1).type(torch.cuda.FloatTensor), requires_grad=True) bbox_targets = torch.zeros(256, num_classes * 4).type(torch.cuda.FloatTensor) bbox_inside_weights = torch.zeros(256, num_classes * 4).type( torch.cuda.FloatTensor) assert len(rois) == 256, "len" return labels, rois, roi_scores, bbox_targets, bbox_inside_weights """return:
def forward(self, bottom, top): assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = bottom[0].data.shape[-2:] # GT boxes (x1, y1, x2, y2) gt_boxes = bottom[1].data # im_info im_info = bottom[2].data[0, :] # side_pos side_pos = bottom[3].data if DEBUG: print '' print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'height, width: ({}, {})'.format(height, width) print 'rpn: gt_boxes.shape', gt_boxes.shape print 'rpn: gt_boxes' print gt_boxes print 'rpn: side_pos.shape', side_pos.shape print 'rpn: side_pos' print side_pos A = self._num_anchors all_anchors = self.anchor_generator.locate_anchors((height, width), self._feat_stride) total_anchors = all_anchors.shape[0] # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0) & (all_anchors[:, 2] < im_info[1]) & # width (all_anchors[:, 3] < im_info[0]) # height )[0] if DEBUG: print 'total_anchors', total_anchors print 'inside_anchors', len(inds_inside) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors.shape', anchors.shape # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) init_gt_argmax_overlaps = gt_argmax_overlaps gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if DEBUG: print "overlaps shape", overlaps.shape print "argmax_overlaps shape", argmax_overlaps.shape print "gt_argmax_overlaps shape", gt_argmax_overlaps.shape print "init_gt_argmax_overlaps shape", init_gt_argmax_overlaps.shape print "init_gt_argmax_overlaps" print init_gt_argmax_overlaps print "max overlaps anchors" print anchors[init_gt_argmax_overlaps] # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN_RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN_RPN_POSITIVE_OVERLAP] = 1 if DEBUG: print "before sample" print "positive anchor num", np.sum(labels == 1) print "negative anchor num", np.sum(labels == 0) # sample positive labels if we have too many num_fg = int(cfg.TRAIN_RPN_FG_FRACTION * cfg.TRAIN_RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # sample negative labels if we have too many num_bg = cfg.TRAIN_RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 if DEBUG: print "after sample" print "positive anchor num", np.sum(labels == 1) print "positive anchor", np.where(labels == 1)[0] print "negative anchor num", np.sum(labels == 0) bbox_targets = np.zeros((len(inds_inside), 2), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 2), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array([1, 1]) bbox_outside_weights = np.zeros((len(inds_inside), 2), dtype=np.float32) bbox_outside_weights[labels == 1, :] = np.array([1, 1]) if DEBUG: print "before map:" print "labels.shape", labels.shape print "bbox_targets.shape", bbox_targets.shape print "bbox_inside_weights.shape", bbox_inside_weights.shape print "bbox_outside_weights.shape", bbox_outside_weights.shape # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) max_anchor_inds = inds_inside[init_gt_argmax_overlaps] if DEBUG: print "max anchors" print all_anchors[max_anchor_inds] sr_targets = np.empty((total_anchors, ), dtype=np.float32) sr_targets.fill(0) sr_anchor_inds = [] for i in range(len(side_pos)): if side_pos[i] < 0: continue inds = max_anchor_inds[i] side = side_pos[i] line_num = int(inds) / int(10 * width) for x in [-10, 0, 10]: tmp_inds = inds + x tmp_line_num = int(tmp_inds) / int(10 * width) if tmp_line_num == line_num: center = (all_anchors[tmp_inds][0] + all_anchors[tmp_inds][2]) / 2.0 if abs(center - side) > cfg.TRAIN_SIDE_REFINE_MAX: continue sr_anchor_inds.append(tmp_inds) sr_targets[tmp_inds] = (side - center) / cfg.TEXT_PROPOSALS_WIDTH sr_anchor_inds = [ x for x in sr_anchor_inds if sr_anchor_inds.count(x) == 1 ] if len(sr_anchor_inds) > cfg.TRAIN_SR_BATCH: sr_anchor_inds = npr.choice(sr_anchor_inds, size=(cfg.TRAIN_SR_BATCH), replace=False) sr_inside_weights = np.empty((total_anchors, ), dtype=np.float32) sr_inside_weights.fill(0) sr_inside_weights[sr_anchor_inds] = 1 sr_outside_weights = np.empty((total_anchors, ), dtype=np.float32) sr_outside_weights.fill(0) sr_outside_weights[sr_anchor_inds] = 1 if DEBUG: print "after map:" print "labels.shape", labels.shape print "bbox_targets.shape", bbox_targets.shape print "bbox_inside_weights.shape", bbox_inside_weights.shape print "bbox_outside_weights.shape", bbox_outside_weights.shape print "sr_targets.shape", sr_targets.shape print "sr_inside_weights.shape", sr_inside_weights.shape print "sr_outside_weights.shape", sr_outside_weights.shape print "side refinement:" print "sr_anchor_inds", sr_anchor_inds print "sr_anchor", all_anchors[sr_anchor_inds] print "sr_targets", sr_targets[sr_anchor_inds] print "sr_inside_weights", sr_inside_weights[sr_anchor_inds] print "sr_outside_weights", sr_outside_weights[sr_anchor_inds] # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) top[0].reshape(*labels.shape) top[0].data[...] = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 2)).transpose(0, 3, 1, 2) top[1].reshape(*bbox_targets.shape) top[1].data[...] = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 2)).transpose(0, 3, 1, 2) assert bbox_inside_weights.shape[2] == height assert bbox_inside_weights.shape[3] == width top[2].reshape(*bbox_inside_weights.shape) top[2].data[...] = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 2)).transpose(0, 3, 1, 2) assert bbox_outside_weights.shape[2] == height assert bbox_outside_weights.shape[3] == width top[3].reshape(*bbox_outside_weights.shape) top[3].data[...] = bbox_outside_weights # sr_targets sr_targets = sr_targets \ .reshape((1, height, width, A)).transpose(0, 3, 1, 2) top[4].reshape(*sr_targets.shape) top[4].data[...] = sr_targets # sr_inside_weights sr_inside_weights = sr_inside_weights \ .reshape((1, height, width, A)).transpose(0, 3, 1, 2) assert sr_inside_weights.shape[2] == height assert sr_inside_weights.shape[3] == width top[5].reshape(*sr_inside_weights.shape) top[5].data[...] = sr_inside_weights # sr_outside_weights sr_outside_weights = sr_outside_weights \ .reshape((1, height, width, A)).transpose(0, 3, 1, 2) assert sr_outside_weights.shape[2] == height assert sr_outside_weights.shape[3] == width top[6].reshape(*sr_outside_weights.shape) top[6].data[...] = sr_outside_weights
def prepare_roidb(imdb): """Enrich the imdb's roidb by adding some derived quantities that are useful for training. This function precomputes the maximum overlap, taken over ground-truth boxes, between each ROI and each ground-truth box. The class with maximum overlap is also recorded. """ cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: imdb._roidb = cPickle.load(fid) print '{} gt roidb prepared loaded from {}'.format( imdb.name, cache_file) return roidb = imdb.roidb for i in xrange(len(imdb.image_index)): roidb[i]['image'] = imdb.image_path_at(i) boxes = roidb[i]['boxes'] labels = roidb[i]['gt_classes'] info_boxes = np.zeros((0, 18), dtype=np.float32) if boxes.shape[0] == 0: roidb[i]['info_boxes'] = info_boxes continue # compute grid boxes s = PIL.Image.open(imdb.image_path_at(i)).size image_height = s[1] image_width = s[0] boxes_grid, cx, cy = get_boxes_grid(image_height, image_width) # for each scale for scale_ind, scale in enumerate(cfg.TRAIN.SCALES): boxes_rescaled = boxes * scale # compute overlap overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float)) max_overlaps = overlaps.max(axis=1) argmax_overlaps = overlaps.argmax(axis=1) max_classes = labels[argmax_overlaps] # select positive boxes fg_inds = [] for k in xrange(1, imdb.num_classes): fg_inds.extend( np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0]) if len(fg_inds) > 0: gt_inds = argmax_overlaps[fg_inds] # bounding box regression targets gt_targets = _compute_targets(boxes_grid[fg_inds, :], boxes_rescaled[gt_inds, :]) # scale mapping for RoI pooling scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind] scale_map = cfg.TRAIN.SCALES[scale_ind_map] # contruct the list of positive boxes # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target) info_box = np.zeros((len(fg_inds), 18), dtype=np.float32) info_box[:, 0] = cx[fg_inds] info_box[:, 1] = cy[fg_inds] info_box[:, 2] = scale_ind info_box[:, 3:7] = boxes_grid[fg_inds, :] info_box[:, 7] = scale_ind_map info_box[:, 8:12] = boxes_grid[fg_inds, :] * scale_map / scale info_box[:, 12] = labels[gt_inds] info_box[:, 14:] = gt_targets info_boxes = np.vstack((info_boxes, info_box)) roidb[i]['info_boxes'] = info_boxes with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote gt roidb prepared to {}'.format(cache_file)
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): """Same as the anchor target layer in original Fast/er RCNN """ A = num_anchors total_anchors = all_anchors.shape[0] K = total_anchors / num_anchors # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them # first set the negatives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = ( cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ( (1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap( bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap( bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): """ Same as the anchor target layer in original Fast/er RCNN 1 筛选anchor 2 计算IoU 3 根据IoU标记正负 4 留下256个标签 5 做回归计算 6 反映射到原来19494个anchor中 """ A = num_anchors #9 total_anchors = all_anchors.shape[0] #19494个anchor K = total_anchors / num_anchors # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] #height=57,width=38 # only keep anchors inside the image,得到序号满足(x>0,y<0,w<W, h<H) inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) # overlaps[inds_inside,gt] overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) #得到序号[inds_inside] #[inds_inside] max_overlaps = overlaps[np.arange(len( inds_inside)), argmax_overlaps] #得到最大值的值,每行的最大值,确定这个anchor属于哪个boxes gt_argmax_overlaps = overlaps.argmax(axis=0) #得到最大值的序号,按列方向搜索,[gt] gt_max_overlaps = overlaps[ gt_argmax_overlaps, np.arange( overlaps.shape[1])] #得到最大值的值,每列的最大值,哪个anchor [gt],确定是哪个anchor gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[ 0] #得到最大值的序号(行号),满足overlap中等于gt_max_overlaps if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them # first set the negatives小于0.3,标负标签,表示这一行都小于0.3,即这个box和任何gt都不重叠 labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap,最大值肯定是正的,这一行 labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU,,每一行大于0.7的也是正的 labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many 前景数量不超过batchsize的一半 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] #得到前景的index if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # 多余的前景转为不感兴趣 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 # 多余的背景转为不感兴趣 # 留下RPN_BATCHSIZE个标签 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) # 计算筛选后的anchor和有最大IoU的gt的bounding box regression bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets [1.0, 1.0, 1.0, 1.0] , bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) #256 positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) #初始化weight用作训练其他网络 [1.0, 1.0, 1.0, 1.0]/num_examples bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors, 映射到原始的anchor labels = _unmap(labels, total_anchors, inds_inside, fill=-1) #[label, 19494, 7061,,-1] bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) #映射到原来19494个anchor中 # labels labels = labels.reshape( (1, height, width, A)).transpose(0, 3, 1, 2) #[1,A,height,width] labels = labels.reshape((1, 1, A * height, width)) #[1,1,A * height,width] rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4))# [1,height,width ,9*4] rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4))# [1,height,width ,9*4] rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4))# [1,height,width ,9*4] rpn_bbox_outside_weights = bbox_outside_weights ##[1,1,A * height,width]标签 [1,height,width ,9*4]回归 [1,height,width ,9*4] [1,height,width ,9*4] return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): """Same as the anchor target layer in original Fast/er RCNN """ A = num_anchors total_anchors = all_anchors.shape[0] K = total_anchors / num_anchors # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them # first set the negatives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def forward(self, classifications, regressions, anchors, annotations,iou_thres=0.5): cls_losses = [] reg_losses = [] batch_size = classifications.shape[0] all_pred_boxes = self.box_coder.decode(anchors, regressions, mode='xywht') for j in range(batch_size): classification = classifications[j, :, :] regression = regressions[j, :, :] bbox_annotation = annotations[j, :, :] bbox_annotation = bbox_annotation[bbox_annotation[:, -1] != -1] pred_boxes = all_pred_boxes[j, :, :] if bbox_annotation.shape[0] == 0: cls_losses.append(torch.tensor(0).float().cuda()) reg_losses.append(torch.tensor(0).float().cuda()) continue classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) indicator = bbox_overlaps( min_area_square(anchors[j, :, :]), min_area_square(bbox_annotation[:, :-1]) ) ious = rbox_overlaps( anchors[j, :, :].cpu().numpy(), bbox_annotation[:, :-1].cpu().numpy(), indicator.cpu().numpy(), thresh=1e-1 ) if not torch.is_tensor(ious): ious = torch.from_numpy(ious).cuda() iou_max, iou_argmax = torch.max(ious, dim=1) positive_indices = torch.ge(iou_max, iou_thres) max_gt, argmax_gt = ious.max(0) if (max_gt < iou_thres).any(): positive_indices[argmax_gt[max_gt < iou_thres]]=1 # cls loss cls_targets = (torch.ones(classification.shape) * -1).cuda() cls_targets[torch.lt(iou_max, iou_thres - 0.1), :] = 0 num_positive_anchors = positive_indices.sum() assigned_annotations = bbox_annotation[iou_argmax, :] cls_targets[positive_indices, :] = 0 cls_targets[positive_indices, assigned_annotations[positive_indices, -1].long()] = 1 alpha_factor = torch.ones(cls_targets.shape).cuda() * self.alpha alpha_factor = torch.where(torch.eq(cls_targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(cls_targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, self.gamma) bin_cross_entropy = -(cls_targets * torch.log(classification+1e-6) + (1.0 - cls_targets) * torch.log(1.0 - classification+1e-6)) cls_loss = focal_weight * bin_cross_entropy cls_loss = torch.where(torch.ne(cls_targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda()) cls_losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0)) # reg loss if positive_indices.sum() > 0: all_rois = anchors[j, positive_indices, :] gt_boxes = assigned_annotations[positive_indices, :] reg_targets = self.box_coder.encode(all_rois, gt_boxes) reg_loss = self.criteron(regression[positive_indices, :], reg_targets) reg_losses.append(reg_loss) if not torch.isfinite(reg_loss) : import ipdb; ipdb.set_trace() else: reg_losses.append(torch.tensor(0).float().cuda()) loss_cls = torch.stack(cls_losses).mean(dim=0, keepdim=True) loss_reg = torch.stack(reg_losses).mean(dim=0, keepdim=True) return loss_cls, loss_reg
def _sample_rois(all_rois, all_scores, all_anchors_3d, gt_boxes, true_gt_boxes, gt_boxes_dc, fg_rois_per_frame, rois_per_frame, num_classes, num_bbox_target_elem): """Generate a random sample of RoIs comprising foreground and background examples. This will provide the 'best-case scenario' for the proposal layer to act as a target Arguments: all_rois -> all roi's generated by the RPN (Nx5) where dim1 = [k,x1,y1,x2,y2] all_scores -> all predicted softmax value for winning class, generated by RPN gt_boxes -> all gt_boxes (Nx5) where dim1 = [x1,y1,x2,y2,k] true_gt_boxes -> all gt boxes in 3d form (Nx8) where dim1 = [xc,yc,zc,l,w,h,ry,k] gt_boxes_dc -> bounding boxes containing dont care areas (Nx4) fg_rois_per_frame -> Maximum allowed foreground ROI's to submit to the 2nd stage """ # overlaps: (rois x gt_boxes) #print('gt boxes') #print(gt_boxes) max_overlaps_dc = torch.tensor([]) #Remove all indices that cover dc areas if (cfg.TRAIN.IGNORE_DC and list(gt_boxes_dc.size())[0] > 0): overlaps_dc = bbox_overlaps( all_rois[:, 1:5].data, gt_boxes_dc[:, :4].data ) #NxK Output N= num roi's k = num gt entries on image max_overlaps_dc, _ = overlaps_dc.max( 1 ) #Returns max value of all input elements along dimension and their index dc_inds = (max_overlaps_dc < cfg.TRAIN.DC_THRESH).nonzero().view(-1) dc_filtered_rois = all_rois[dc_inds, :] dc_filtered_scores = all_scores[dc_inds, :] dc_filtered_anchors_3d = all_anchors_3d[dc_inds, :] else: dc_filtered_rois = all_rois dc_filtered_scores = all_scores dc_filtered_anchors_3d = all_anchors_3d overlaps = bbox_overlaps( dc_filtered_rois[:, 1:5].data, gt_boxes[:, :4].data ) #NxK Output N= num roi's k = num gt entries on image max_overlaps, gt_assignment = overlaps.max( 1 ) #Returns max value of all input elements along dimension and their index #Very strange syntax, but maps a new array (size gt_assignment) and populates every element with the selected index from gt_assignment,4 labels = gt_boxes[gt_assignment, [ 4 ]] #Contains which gt box each overlap is assigned to and the class it belongs to as well # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1) # Guard against the case when an image has fewer than fg_rois_per_frame # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = ( (max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view( -1) #.nonzero() returns all elements that are non zero in array # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.numel() > 0 and bg_inds.numel( ) > 0: #numel() returns number of elements in tensor fg_rois_per_frame = min(fg_rois_per_frame, fg_inds.numel()) fg_inds = fg_inds[torch_choice(fg_inds.numel(), int(fg_rois_per_frame), gt_boxes.device, to_replace=False)] bg_rois_per_frame = rois_per_frame - fg_rois_per_frame to_replace = bg_inds.numel( ) < bg_rois_per_frame #Multiple entries of the same bg inds if too small bg_inds = bg_inds[torch_choice(bg_inds.numel(), int(bg_rois_per_frame), gt_boxes.device, to_replace=to_replace)] elif fg_inds.numel() > 0: #Only foreground ROI's were generated to_replace = fg_inds.numel() < rois_per_frame fg_inds = fg_inds[torch_choice(fg_inds.numel(), int(rois_per_frame), gt_boxes.device, to_replace=to_replace)] fg_rois_per_frame = rois_per_frame elif bg_inds.numel() > 0: #Only background ROI's were generated to_replace = bg_inds.numel() < rois_per_frame bg_inds = bg_inds[torch_choice(bg_inds.numel(), int(rois_per_frame), gt_boxes.device, to_replace=to_replace)] fg_rois_per_frame = 0 else: print('importing pdb') import pdb pdb.set_trace() # The indices that we're selecting (both fg and bg) keep_inds = torch.cat([fg_inds, bg_inds], 0) # Select sampled values from various arrays: labels = labels[keep_inds].contiguous() # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_frame):] = 0 rois = dc_filtered_rois[keep_inds].contiguous() roi_scores = dc_filtered_scores[keep_inds].contiguous() anchors_3d = dc_filtered_anchors_3d[keep_inds].contiguous() #Right here, bbox_target_data is actually the delta. if (cfg.NET_TYPE == 'lidar'): #TODO: Multiple anchors?? bbox_target_data = _compute_lidar_targets( rois[:, 1:5].data, anchors_3d.data, true_gt_boxes[gt_assignment[keep_inds]][:, :-1].data, labels.data) elif (cfg.NET_TYPE == 'image'): bbox_target_data = _compute_targets( rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes, num_bbox_target_elem) return labels, rois, anchors_3d, roi_scores, bbox_targets, bbox_inside_weights
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes, parsing_labels=None): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x, gt_boxes) (2000, 15) # 每个roi和每个gt box的iou overlaps = bbox_overlaps( all_rois[:, 1:5].data, gt_boxes[:, :4].data) max_overlaps, gt_assignment = overlaps.max(1) # 对于每个roi,它与所有gtboxes中iou最大的作为它的gt # max_overlaps 每个roi与给它指定的gtbox之间的iou labels = gt_boxes[gt_assignment, [4]] # 每个roi被指定的cls gt_boxes(15,5) if cfg.SUB_CATEGORY: sub_labels = gt_boxes[gt_assignment, [5]] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1) #print(fg_inds) # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) # 0.1-0.5的被看成是背景 bg_inds = ((max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1) # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.numel() > 0 and bg_inds.numel() > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel()) fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().cuda()] bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.numel() < bg_rois_per_image bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().cuda()] elif fg_inds.numel() > 0: to_replace = fg_inds.numel() < rois_per_image fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] fg_rois_per_image = rois_per_image elif bg_inds.numel() > 0: to_replace = bg_inds.numel() < rois_per_image bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] fg_rois_per_image = 0 else: import pdb pdb.set_trace() if cfg.DO_PARSING: mask_rois = all_rois[fg_inds]#.contiguous() #print(mask_rois.size()) (64,5) mask_cls_labels = labels[fg_inds]#.contiguous() #print(mask_cls_labels.size()) assert parsing_labels.size(0) == 1 # parsing_labels (48, 320, 320) -> (48, 1, 28, 28) # TODO : parsing_labels = parsing_labels[0][gt_assignment[fg_inds], :, :] # gt_assignment只是指定的gt box序号,但是label是对应gtbox的第五个维度,也就是说第1个box的label不一定是1 # print (gt_assignment.size(), labels.size()) # print (type(gt_assignment), type(labels)) parsing_labels = parsing_labels[0][labels.data.long()[fg_inds], :, :] # batch channel h w batch == 1 #print(parsing_labels.size()) mask_parsing_labels = gen_mask_parsing_labels(parsing_labels, mask_rois) #print(mask_parsing_labels.size()) mask_unit = {} mask_unit['mask_rois'] = mask_rois mask_unit['mask_cls_labels'] = mask_cls_labels mask_unit['mask_parsing_labels'] = mask_parsing_labels #print(mask_unit['mask_parsing_labels'].size()) # The indices that we're selecting (both fg and bg) keep_inds = torch.cat([fg_inds, bg_inds], 0) # 2000个roi中256个被选为fg和bg的索引 # Select sampled values from various arrays: labels = labels[keep_inds].contiguous() # 被选中roi的cls label (256,) # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_image):] = 0 # 将背景的label固定为0 (256,) if cfg.SUB_CATEGORY: sub_labels = sub_labels[keep_inds].contiguous() sub_labels[int(fg_rois_per_image):] = 0 rois = all_rois[keep_inds].contiguous() # 只留下被选中的roi roi_scores = all_scores[keep_inds].contiguous() # 只留下被选中roi的score(rpn网络预测这个roi为物体的概率) # if cfg.DO_PARSING: # mask_unit = {} # mask_unit['mask_rois'] = rois[:int(fg_rois_per_image),...] # mask_unit['mask_cls_labels'] = labels[:int(fg_rois_per_image)] # mask_unit['mask_parsing_labels'] = parsing_labels[0][labels[:int(fg_rois_per_image)], :, :] # 把被选择的roi和给它指定的gtbox的坐标和类别 送入_compute_targets # roi的坐标 rois[:, 1:5].data(256, 4) # 匹配的gt的坐标 gt_boxes[gt_assignment[keep_inds]][:, :4].data (256, 4) # 匹配的类别 labels.data(256,) # 返回 (256, 5) 类别和4个回归值 bbox_target_data = _compute_targets( rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) if cfg.SUB_CATEGORY: if cfg.DO_PARSING: return labels, sub_labels, rois, roi_scores, bbox_targets, bbox_inside_weights, mask_unit else: return labels, sub_labels, rois, roi_scores, bbox_targets, bbox_inside_weights else: if cfg.DO_PARSING: return labels, rois, roi_scores, bbox_targets, bbox_inside_weights, mask_unit else: return labels, rois, roi_scores, bbox_targets, bbox_inside_weights