def _compute_targets(rois, overlaps, labels): """ Compute bounding-box regression targets for an image. for each roi find the corresponding gt_box, then compute the distance. """ # Indices of ground-truth ROIs gt_inds = np.where(overlaps == 1)[0] if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return np.zeros((rois.shape[0], 5), dtype=np.float32) # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = bbox_overlaps( np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] targets = np.zeros((rois.shape[0], 5), dtype=np.float32) targets[ex_inds, 0] = labels[ex_inds] targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) return targets
def evaluate_signal_proposal(p_bbox_list, g_bbox_list, thredshold): """ 计算单分类的网络性能 :param g_bbox_list: groud truth shape: [n,4] :param p_bbox_list: predicted shape: [n,4] [left_top_x, left_top_y, right_bottom_x, right_bottom_y] :return """ overlaps = bbox_overlaps(np.ascontiguousarray(p_bbox_list, dtype=np.float), np.ascontiguousarray(g_bbox_list, dtype=np.float)) # precision max_p_overlaps = np.max(overlaps, axis=1) # print(max_p_overlaps) filter = np.where(max_p_overlaps >= thredshold) # print(filter[0]) precision_TP = len(filter[0]) precision = float(precision_TP) / float(len(p_bbox_list)) # print(precision, float(precision_TP), float(len(p_bbox_list))) # recall filted_overlaps = overlaps[filter[0]] # print(filted_overlaps) max_index = np.argmax(filted_overlaps, axis=1) # print(max_index.shape) recall = float(max_index.shape[0]) / float(len(g_bbox_list)) return precision, recall
def report(self, bbox): """tracker calls this function to report the result. bbox should be in the form of (x, y, w, h)""" gt = self._ground_truth[self._cur - 1] # Since result bbox is reported once a time, therefore takes the first one overlap = bbox_overlaps([gt], [bbox])[0] self._overlaps[self._video_name].append(overlap)
def create_roidb_from_box_list(self, box_list, gt_roidb): assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] for i in range(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] overlaps = scipy.sparse.csr_matrix(overlaps) roidb.append({ 'boxes': boxes, 'gt_classes': np.zeros((num_boxes, ), dtype=np.int32), 'gt_overlaps': overlaps, 'flipped': False, 'seg_areas': np.zeros((num_boxes, ), dtype=np.float32), }) return roidb
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.FLAGS.roi_fg_threshold)[0] # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.FLAGS.roi_bg_threshold_high) & (max_overlaps >= cfg.FLAGS.roi_bg_threshold_low))[0] print('max_overlaps', max_overlaps) print('fg_inds', fg_inds) print('bg_inds', bg_inds) # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.size > 0 and bg_inds.size > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.size) fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False) bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.size < bg_rois_per_image bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace) elif fg_inds.size > 0: to_replace = fg_inds.size < rois_per_image fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = rois_per_image elif bg_inds.size > 0: to_replace = bg_inds.size < rois_per_image bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = 0 else: import pdb pdb.set_trace() # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_image):] = 0 rois = all_rois[keep_inds] roi_scores = all_scores[keep_inds] bbox_target_data = _compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( all_rois[:, 1:5].data, gt_boxes[:, :4].data) max_overlaps, gt_assignment = overlaps.max(1) labels = gt_boxes[gt_assignment, [4]] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1) # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = (((max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO)) == 2).nonzero().view(-1) # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.numel() > 0 and bg_inds.numel() > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel()) fg_inds = fg_inds[torch.from_numpy( npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().cuda()] bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.numel() < bg_rois_per_image bg_inds = bg_inds[torch.from_numpy( npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().cuda()] elif fg_inds.numel() > 0: to_replace = fg_inds.numel() < rois_per_image fg_inds = fg_inds[torch.from_numpy( npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] fg_rois_per_image = rois_per_image elif bg_inds.numel() > 0: to_replace = bg_inds.numel() < rois_per_image bg_inds = bg_inds[torch.from_numpy( npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] fg_rois_per_image = 0 else: import pdb pdb.set_trace() # The indices that we're selecting (both fg and bg) keep_inds = torch.cat([fg_inds, bg_inds], 0) # Select sampled values from various arrays: labels = labels[keep_inds].contiguous() # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_image):] = 0 rois = all_rois[keep_inds].contiguous() roi_scores = all_scores[keep_inds].contiguous() bbox_target_data = _compute_targets( rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def uniform_aspect_sample(im, bbox, params, num, stype): assert len(bbox) == 4, "Invalid ground-truth(x, y, w, h) form." assert bbox[2] > 0 and bbox[3] > 0, "Width or height < 0." assert len(params) == 5, "Invalid {:d}-tuple params(should be five-tuple).".format(len(params)) assert num > 0, "Number of samples should be larger than 0." im_shape = im.shape im_w = im_shape[1] im_h = im_shape[0] # Calculate average of width and height centerx = bbox[0] + bbox[2] / 2. centery = bbox[1] + bbox[3] / 2. xrand = params[0] * bbox[2] * (npr.rand(num, 1) * 2 - 1) yrand = params[1] * bbox[3] * (npr.rand(num, 1) * 2 - 1) wrand = bbox[2] * (1.05 ** (npr.rand(num, 1) * 4 - 2)) hrand = bbox[3] * (1.05 ** (npr.rand(num, 1) * 4 - 2)) ws = wrand * (1.05 ** npr.rand(num, 1)) hs = hrand * (1.05 ** npr.rand(num, 1)) bboxes = [] for i in range(num): cx = centerx + xrand[i, 0] cy = centery + yrand[i, 0] hw = ws[i, 0] / 2. hh = hs[i, 0] / 2. box = ( max(0, int(cx - hw)), max(0, int(cy - hh)), min(im_w, int(cx + hw)), min(im_h, int(cy + hh)) ) sample = (box[0], box[1], box[2] - box[0], box[3] - box[1]) if int(sample[2]) <= 0 or int(sample[3]) <= 0: continue overlap = bbox_overlaps([bbox], [sample])[0] if overlap > params[3]: bboxes.append({ 'img': im, 'box': sample, 'label': 1, 'overlap': overlap }) elif overlap < params[4]: bboxes.append({ 'img': im, 'box': sample, 'label': 0, 'overlap': overlap }) return bboxes
def fusion_target(rois, gt_labels, gt_boxes, gt_boxes3d): CFG = EasyDict() CFG.TRAIN = EasyDict() CFG.TRAIN.RCNN_BATCH_SIZE = 128 CFG.TRAIN.RCNN_FG_FRACTION = 0.25 CFG.TRAIN.RCNN_FG_THRESH_LO = 0.5 # Include "ground-truth" in the set of candidate rois rois = rois.reshape(-1, 5) # Proposal (i, x1, y1, x2, y2) coming from RPN num = len(gt_boxes) zeros = np.zeros((num, 1), dtype=np.float32) extended_rois = np.vstack((rois, np.hstack((zeros, gt_boxes)))) assert np.all( extended_rois[:, 0] == 0), 'Only single image batches are supported' rois_per_image = CFG.TRAIN.RCNN_BATCH_SIZE fg_rois_per_image = np.round(CFG.TRAIN.RCNN_FG_FRACTION * rois_per_image) # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(extended_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) max_overlaps = overlaps.max(axis=1) gt_assignment = overlaps.argmax(axis=1) labels = gt_labels[gt_assignment] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= CFG.TRAIN.RCNN_FG_THRESH_LO)[0] # Select false positive fp_inds = np.where((max_overlaps < 0.01))[0] # The indices that we're selecting (both fg and bg) keep = np.append(fg_inds, fp_inds) rois = extended_rois[keep] labels = labels[keep] labels[fg_inds.size:] = 0 gt_boxes3d = gt_boxes3d[gt_assignment[keep]] et_boxes = rois[:, 1:5] et_boxes3d = top_box_to_box3d(et_boxes) targets = box3d_transform(et_boxes3d, gt_boxes3d) targets[np.where(labels == 0), :, :] = 0 return rois, labels, targets
def evaluate_signal_bbox(p_bbox_list, g_bbox_list, thredshold): """ 计算单分类的网络性能 :param p_bbox_list: predicted shape: [n,4] [left_top_x, left_top_y, right_bottom_x, right_bottom_y] :param g_bbox_list: groud truth shape: [n,4] :return """ # print(g_bbox_list) overlaps = bbox_overlaps(np.ascontiguousarray(p_bbox_list, dtype=np.float), np.ascontiguousarray(g_bbox_list, dtype=np.float), 1) max_overlaps = np.max(overlaps, axis=0) # print(max_overlaps) filter = np.where(max_overlaps >= thredshold)[0] recall = float(len(filter)) / float(len(g_bbox_list)) return recall
def anchor_target_layer(rpn_cls_score, rpn_cls_prob, im_name, gt_boxes_large, gt_ishard, dontcare_areas, im_info, _feat_stride=[ 16, ], anchor_scales=[ 16, ]): """ 将gt_box划分为细框 实现论文中的side-refinement arameters ---------- rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] gt_ishard: (G, 1), 1 or 0 indicates difficult or not dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0 im_info: a list of [image_height, image_width, scale_ratios] _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- :return: """ gt_boxes = split_frame(gt_boxes_large) _anchors = generate_anchors( scales=np.array(anchor_scales)) # 生成基本的anchor,一共9个 _num_anchors = _anchors.shape[0] # 9个anchor if DEBUG: print('anchors:') print(_anchors) print('anchor shapes:') print( np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], ))) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # allow boxes to sit over the edge by a small amount _allowed_border = 0 im_info = im_info[0] # 图像的高宽及通道数 assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] # feature-map的高宽 if DEBUG: print('AnchorTargetLayer: height', height, 'width', width) print('') print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) print('height, width: ({}, {})'.format(height, width)) print('rpn: gt_boxes.shape', gt_boxes.shape) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride # (W) shift_y = np.arange(0, height) * _feat_stride # (H) shift_x, shift_y = np.meshgrid( shift_x, shift_y) # in W H order # shift_x (H, W) shift_y (H, W) # K is H x W shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel() )).transpose() # 生成feature-map和真实image上anchor之间的偏移量 #(H*W, 4) # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors # 9个anchor K = shifts.shape[0] # 50*37,feature-map的宽乘高的大小 all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) # 相当于复制宽高的维度,然后相加 all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image # 仅保留那些还在图像内部的anchor,超出图像的都删掉 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] # 保留那些在图像内的anchor (In, 4) if DEBUG: print('anchors.shape', anchors.shape) # 至此,anchor准备好了 # -------------------------------------------------------------- # label: 1 is positive, 0 is negative, -1 is dont care # (A) labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # 初始化label,均为-1 # overlaps between the anchors and the gt boxes # overlaps (ex, gt), shape is A x G # 计算anchor和gt-box的overlap,用来给anchor上标签 overlaps = bbox_overlaps(np.ascontiguousarray( anchors, dtype=np.float), np.ascontiguousarray( gt_boxes, dtype=np.float)) # 假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组 # 存放每一个anchor和每一个gtbox之间的overlap argmax_overlaps = overlaps.argmax( axis=1) # (A)#找到和每一个anchor,overlap最大的那个gt max_overlaps = overlaps[np.arange( len(inds_inside) ), argmax_overlaps] # 假如在内部的anchor有900个 ,(900,), 表示的是每一个anchor最大的overlaps值 gt_argmax_overlaps = overlaps.argmax( axis=0) # G#找到所有anchor中与gtbox,overlap最大的那个anchor # (3) gt_max_overlaps = overlaps[ gt_argmax_overlaps, np.arange( overlaps.shape[1] )] # 比如有3个gt 那么就得到(3,),表示的是上一步找到的与gt的overlap最大的3个anchor的overlap值 gt_argmax_overlaps = np.where( overlaps == gt_max_overlaps)[0] # (3, ) 表示的是哪几个与gt有最大overlap的anchor的索引 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # 每个位置上的9个anchor中overlap最大的认为是前景 # 是将iou小于0.5的样本标记为负样本, if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 # overlap大于0.7的认为是前景 # 增加的修复,负样本包含了最上方最下方有字的部分,这些样本会干扰样本,因此可以去掉这些负样本中,处在最上方的和左下方的样本 bg_anchor_index = labels == 0 y_anchor = anchors[:, 3] top_anchor_index = y_anchor < min(anchors[:, 1]) + 50 bottom_anchor_index = y_anchor > max(anchors[:, 3]) - 50 assert top_anchor_index.shape == bottom_anchor_index.shape top_bottom_anchor_index = top_anchor_index + bottom_anchor_index bg_topbottom_anchor_index = bg_anchor_index * top_bottom_anchor_index labels[bg_topbottom_anchor_index] = -1 # 可视化这时候的正样本,看一下是怎样的 # vis_labels = _unmap(labels, total_anchors, inds_inside, fill=-1) # 这些anchor的label是-1,也即dontcare # vis_training_sample(vis_labels, all_anchors, im_name, gt_boxes) if DEBUG: print('在过滤数量之前:') print('正样本:' + str(len(np.where(labels == 1)[0]))) print('负样本:' + str(len(np.where(labels == 0)[0]))) print('忽略样本:' + str(len(np.where(labels == -1)[0]))) # 至此,第一次生成好了这个图片的labels, # 生成其他部分的标签 v_target, o_target = _compute_targets( anchors, gt_boxes[argmax_overlaps, :]) # 根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差) # 但是计算损失函数的时候,其实是需要j索引和k索引,所以计算好这两个索引,一并返回,帮助计算损失函数 # j索引,有效索引:正锚点或者与gt的overlap大于0.5以上的锚点的索引 # 正锚点 positive_index = np.where(labels == 1)[0] # 应该是一个(p,)p应该不大于128 # # ignore_index = np.where(labels==-1)[0] # 应该是一个(n,)n应该很大,因为忽略的anchor很多 keep_index = np.where(labels != -1)[0] _ = np.where(max_overlaps > 0.5)[0] # 应该是一个(c,),表示overlap大于0.5的anchor的索引 remove_ignore = list() for i in range(_.shape[0]): if i in keep_index: remove_ignore.append(_[i]) remove_ignore = np.array(remove_ignore) effect_index = np.append(positive_index, remove_ignore) remove_repeat = np.array(list(set(list(effect_index)))) j_index = remove_repeat.astype(np.int32) j_index1 = np.zeros((len(inds_inside)), dtype=np.int32) j_index1[j_index] = 1 # k 索引 , 边缘索引 # 先找到所有的可以认为是边缘的gt框,这里简单的认为是边缘框和左右各自一个。 # ori_gt_box = (gt_boxes/im_info[2]).astype(np.int32, copy=False) ori_gt_box = gt_boxes.astype(np.float32, copy=False) # 找到左右边界框,矩阵操作实现 todo list_left_index = list() list_right_index = list() for i in range(ori_gt_box.shape[0]): if ori_gt_box[i][2] - ori_gt_box[i][0] != 15: list_left_index.append(i) if ori_gt_box[i][0] % 16 != 0: # 看做是左边边界框 list_left_index.append(i + 1) if (ori_gt_box[i][2] + 1) % 16 != 0: # 看做是右边边界框 list_left_index.append(i - 1) else: continue list_index1 = list_left_index + list_right_index # 去除不属于gt中的索引和重复的索引 list_index2 = list(set(list_index1)) list_index3 = sorted(list_index2) list_index4 = list() for index in list_index3: if index in range(ori_gt_box.shape[0]): list_index4.append(index) gt_side_index = np.array(list_index4).astype(np.int32) # 得到了边界gt框的索引 # 要得到与这些gt框有最大的overlap的anchors的索引,这些anchor是我们关心的 gt_argmax_overlaps = overlaps.argmax(axis=0) anchor_side_index = gt_argmax_overlaps[ gt_side_index] # 得到143个与gt具有最大的overlaps的anchor的索引 # 还要去掉与边界框overlap为0的anchor,因为这些anhcor不是真的我们关心的anchor,如果不去除,还会造成o_loss异常大 # anchor_side_list = list() anchor_fg_side_list = list() anchor_nocare_side_list = list() for i in range(anchor_side_index.shape[0]): anchor_index = anchor_side_index[i] gt_index = gt_side_index[i] overlap = overlaps[anchor_index, gt_index] if overlap > 0.05: anchor_fg_side_list.append(anchor_index) elif overlap > 0: anchor_nocare_side_list.append(anchor_index) else: pass # 找到了与所有边界框有最大交集的anchor,这些anchor中有的与gt的iou只有很小(因为gt特别窄,不够16像素),所以这些anchor我们标记为-1,意思是模型将之识别为什么我们都不关心了,但是iou大于0.4的,我们都将之标记为正样本,另模型能够正确学习正负样本 anchor_fg_side_index = np.array(anchor_fg_side_list, dtype=np.int32) anchor_nocare_side_index = np.array(anchor_nocare_side_list, dtype=np.int32) anchor_fg_side_index = np.array( sorted(list(set(list(anchor_fg_side_index))))).astype(np.int32) anchor_nocare_side_index = np.array( sorted(list(set(list(anchor_nocare_side_index))))).astype(np.int32) labels[anchor_fg_side_index] = 1 labels[anchor_nocare_side_index] = -1 k_index = anchor_fg_side_index.copy() k_index1 = np.zeros((len(inds_inside)), dtype=np.int32) k_index1[k_index] = 1 # map up to original set of anchors # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来 labels = _unmap(labels, total_anchors, inds_inside, fill=-1) # 这些anchor的label是-1,也即dontcare v_target = _unmap(v_target, total_anchors, inds_inside, fill=0) # 这些anchor的真值是0,也即没有值 o_target = _unmap(o_target, total_anchors, inds_inside, fill=0) j_index2 = _unmap(j_index1, total_anchors, inds_inside, fill=0).astype(np.int32) k_index2 = _unmap(k_index1, total_anchors, inds_inside, fill=0).astype(np.int32) # real_j_index = np.where(j_index2==1)[0] # real_k_index = np.where(k_index2==1)[0] if DEBUG: # 可视化出我们最终选出来的正样本,确定是否合理 vis_training_sample(labels, all_anchors, im_name, gt_boxes) if DEBUG or SHOW_SOME: print('正样本:' + str(len(np.where(labels == 1)[0]))) print('负样本:' + str(len(np.where(labels == 0)[0]))) print('忽略样本:' + str(len(np.where(labels == -1)[0]))) # print('保存的tmp_labels') # print('正样本:' + str(len(np.where(tmp_labels == 1)[0]))) # print('负样本:' + str(len(np.where(tmp_labels == 0)[0]))) # print('忽略样本:' + str(len(np.where(tmp_labels == -1)[0]))) return labels, v_target, o_target, j_index2, k_index2
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): """Same as the anchor target layer in original Fast/er RCNN """ height, width = rpn_cls_score.shape[1:3] if DEBUG: print('AnchorTargetLayer: height', height, 'width', width) print('all anchors size {}'.format(all_anchors.shape)) print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) print('height, width: ({}, {})'.format(height, width)) print('rpn: gt_boxes.shape', gt_boxes.shape) print('rpn: gt_boxes', gt_boxes) A = num_anchors total_anchors = all_anchors.shape[ 0] # anchors = anchors.reshape((K * A, 4)).astype(np.float32, copy=False) H*W, 4 K = total_anchors / num_anchors # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # if DEBUG: # print('total_anchors', total_anchors) # print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] # if DEBUG: # print('anchors.shape', anchors.shape) # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox.bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) if DEBUG: print('anchors {} --> \n {}'.format(anchors.shape, anchors)) print('gt boxes --> \n {}'.format(gt_boxes)) print('anchors , gt boxes overlaps {} --> \n {}'.format( overlaps.shape, overlaps)) # axis=1 按行值 axis=0 按列值. argmax(axis=1) 找到anchors每行对应重合度最大的gt_boxes # overlaps [ N * K] N anchors length, K gt boxes length argmax_overlaps = overlaps.argmax( axis=1) # (A)#找到和每一个gtbox,overlap最大的那个anchor if DEBUG: print('arg max over laps axis = 1 --> \n {}'.format(argmax_overlaps)) max_overlaps = overlaps[np.arange(len( inds_inside)), argmax_overlaps] # argmax_overlaps 是选择overlaps最大的一列的列位置 # argmax(axis=1) 按列扫描找到每列最大的行索引值, 找到gt_boxes 每行数据 重合度最大的anchors的行 gt_argmax_overlaps = overlaps.argmax( axis=0) # 取得gt_boxes 对应重合度最大的 anchors的行值 if DEBUG: print('gt arg max over laps axis = 0 --> \n {}'.format( gt_argmax_overlaps)) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where( overlaps == gt_max_overlaps)[0] # 返回overlaps=gt max overlaps if DEBUG: print('gt arg max over laps 222 axis = 0 --> \n {}'.format( gt_argmax_overlaps)) # if DEBUG: # print('arg max over laps ->', argmax_overlaps) # print('max over laps ->', max_overlaps) # print('gt argmax overlaps ->',len(gt_argmax_overlaps)) if not RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them # first set the negatives labels[max_overlaps < RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap # gt_artmax_overlaps是gt_boxs的每个位置在anchros里面有最大值的位置,即每个gt_boxes在anchors里重合度最大的地方 labels[gt_argmax_overlaps] = 1 if DEBUG: print('labels --> \n {}'.format(labels)) # fg label: above threshold IOU labels[max_overlaps >= RPN_POSITIVE_OVERLAP] = 1 if RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 if DEBUG: print('fg_inds --> {}'.format(fg_inds)) print('labels --> \n {}'.format(labels)) # subsample negative labels if we have too many num_bg = RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 if DEBUG: print('bg_inds --> {}'.format(bg_inds)) print('labels --> \n {}'.format(labels)) # if DEBUG: # print('Number FG -> {} Number BG -> {}'.format(num_fg, num_bg)) bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) # argmax_overlaps是对应anchors每一行对应最大的列索引值,相对于gt_boxes则是其对应的行号 cgt_boxes = gt_boxes[argmax_overlaps, :] # if DEBUG: # print('anchors size -> {} cgt boxes size -> {}'.format(anchors.shape, cgt_boxes.shape)) # print('anchors --> \n {}'.format(anchors)) # print('cgt boxes --> \n {}'.format(cgt_boxes)) bbox_targets = _compute_targets(anchors, cgt_boxes) # if DEBUG: # print('bbox target {} --> \n {}'.format(bbox_targets.shape,bbox_targets)) # zz = bbox_transform_inv(torch.from_numpy(anchors[4506]).view(1,4),torch.from_numpy(bbox_targets[4506]).view(1,4)) # bbox inside 权重 bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets bbox_inside_weights[labels == 1, :] = np.array(RPN_BBOX_INSIDE_WEIGHTS) # bbox outside 权重 bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) # positive_weights = np.ones((1, 4)) * 1.0 / num_examples # negative_weights = np.ones((1, 4)) * 1.0 / num_examples # 外部权重,前景是1,背景是0 positive_weights = np.ones((1, 4)) negative_weights = np.zeros((1, 4)) else: assert ((RPN_POSITIVE_WEIGHT > 0) & (RPN_POSITIVE_WEIGHT < 1)) positive_weights = (RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) # 外部权重,前景是1,背景是0 bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: print('labels 2 --> \n {}'.format(labels)) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) if DEBUG: print('labels 2 --> \n {}'.format(labels)) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels # labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) # labels = labels.reshape((1, 1, A * height, width)) labels = labels.reshape((1, height, width, A)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def evaluate_recall(self, candidate_boxes=None, thresholds=None, area='all', limit=None): """Evaluate detection proposal recall metrics. Returns: results: dictionary of results with keys 'ar': average recall 'recalls': vector recalls at each IoU overlap threshold 'thresholds': vector of IoU overlap thresholds 'gt_overlaps': vector of all ground-truth overlaps """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7 } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], # 512-inf ] assert area in areas, 'unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for i in range(self.num_images): # Checking for max_overlaps == 1 avoids including crowd annotations # (...pretty hacking :/) max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max( axis=1) gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] gt_boxes = self.roidb[i]['boxes'][gt_inds, :] gt_areas = self.roidb[i]['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) if candidate_boxes is None: # If candidate_boxes is not supplied, the default is to use the # non-ground-truth boxes from this roidb non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0] boxes = self.roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in range(gt_boxes.shape[0]): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert (gt_ovr >= 0) # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert (_gt_overlaps[j] == gt_ovr) # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { 'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps }
def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride=[ 16, ], anchor_scales=[ 16, ]): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. Parameters ---------- rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] gt_ishard: (G, 1), 1 or 0 indicates difficult or not dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0 im_info: a list of [image_height, image_width, scale_ratios] _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform) that are the regression objectives rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg, beacuse the numbers of bgs and fgs mays significiantly different """ _anchors = generate_anchors( scales=np.array(anchor_scales)) # 生成基本的anchor,一共10个 _num_anchors = _anchors.shape[0] # 10个anchor # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) # height, width = rpn_cls_score.shape[1:3] im_info = im_info[0] #图像的高宽及通道数 # 在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标 # Algorithm: # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] #feature-map的高宽 # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) # in W H order # K is H x W shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # 生成feature-map和真实image上anchor之间的偏移量 # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors # 10个anchor K = shifts.shape[0] # 50*37,feature-map的宽乘高的大小 all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) # 相当于复制宽高的维度,然后相加 all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image # 仅保留那些还在图像内部的anchor,超出图像的都删掉 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # if DEBUG: # print('total_anchors', total_anchors) # print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] #保留那些在图像内的anchor # if DEBUG: # print('anchors.shape', anchors.shape) #至此,anchor准备好了 #-------------------------------------------------------------- # label:>=1 is prostive, 0 is negative, -1 is dont care # (A) labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) #初始化label,均为-1 # overlaps between the anchors and the gt boxes # overlaps (ex, gt), shape is A x G # 计算anchor和gt-box的overlap,用来给anchor上标签 # print('anchors shape', anchors.shape) [n , 4] # print('anchors ascontiguousarray', np.ascontiguousarray(anchors, dtype=np.float)) # print('gt_boxes shape', gt_boxes) # print('gt_boxes ascontiguousarray', np.ascontiguousarray(gt_boxes, dtype=np.float)) # overlaps shape = [12402, 465] # ascontiguousarray返回地址连续的数组 # print('gt_boxes',gt_boxes.shape) overlaps = bbox_overlaps(np.ascontiguousarray( anchors, dtype=np.float), np.ascontiguousarray( gt_boxes, dtype=np.float)) #假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组 # argmax_overlaps shape (12402,) # 存放每一个anchor和gt最大的iou的那个gt的位置 argmax_overlaps = overlaps.argmax( axis=1) # (A)#找到和每一个gtbox,overlap最大的那个anchor # print('argmax_overlaps shape ', argmax_overlaps.shape, argmax_overlaps[1000:1100]) # pp_label = np.max(overlaps, axis=1) # print('pp_label', pp_label.shape, pp_label[0:100]) # 所有anchor与groudtruth的最高得分的那个值 max_overlaps = np.max(overlaps, axis=1) # print('max_overlaps shape', max_overlaps.shape) # print('max_overlaps',max_overlaps[0:100]) gt_argmax_overlaps = overlaps.argmax( axis=0) # G#找到每个位置上10个anchor中与gtbox,overlap最大的那个 # print('gt_argmax_overlaps',gt_argmax_overlaps[0:100]) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] # print('gt_max_overlaps',gt_max_overlaps[0:100]) # gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # print('gt_argmax_overlaps', gt_argmax_overlaps[0:100]) if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 #先给背景上标签,小于0.3overlap的 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = gt_boxes[:, 4] # 每个位置上的10个anchor中overlap最大的认为是前景 # print('gt_boxes[gt_argmax_overlaps, 4]', gt_boxes[argmax_overlaps[gt_argmax_overlaps], 4]) # fg label: above threshold IOU max_iou_pp = max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP # 0.7 labels[max_iou_pp] = gt_boxes[argmax_overlaps[max_iou_pp], 4] #overlap大于0.7的认为是前景 # print('labels', labels) if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # False # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many # 对正样本进行采样,如果正样本的数量太多的话 # 限制正样本的数量不超过128个 # TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。 # cfg.TRAIN.RPN_FG_FRACTION = 0.5 ,cfg.TRAIN.RPN_BATCHSIZE = 300 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels >= 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) # 随机去除掉一些正样本 labels[disable_inds] = -1 # 变为-1 # subsample negative labels if we have too many # 对负样本进行采样,如果负样本的数量太多的话 # 正负样本总数是256,限制正样本数目最多128, # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本 num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels >= 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 # print "was %s inds, disabling %s, now %s inds" % ( # len(bg_inds), len(disable_inds), np.sum(labels == 0)) # 至此, 上好标签,开始计算rpn-box的真值 #-------------------------------------------------------------- bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets( anchors, gt_boxes[argmax_overlaps, :]) #根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差) # bbox_targets.shape [ inds_inside, 4] bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels >= 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) #内部权重,前景就给1,其他是0 bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: #暂时使用uniform 权重,也就是正样本是1,负样本是0 # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) + 1 # positive_weights = np.ones((1, 4)) * 1.0 / num_examples # negative_weights = np.ones((1, 4)) * 1.0 / num_examples positive_weights = np.ones((1, 4)) negative_weights = np.zeros((1, 4)) else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / (np.sum(labels >= 1)) + 1) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / (np.sum(labels == 0)) + 1) bbox_outside_weights[labels >= 1, :] = positive_weights #外部权重,前景是1,背景是0 bbox_outside_weights[labels == 0, :] = negative_weights # if DEBUG: # _sums += bbox_targets[labels == 1, :].sum(axis=0) # _squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0) # _counts += np.sum(labels == 1) # means = _sums / _counts # stds = np.sqrt(_squared_sums / _counts - means ** 2) # print('means:') # print(means) # print('stdevs:') # print(stds) # map up to original set of anchors # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来 labels = _unmap(labels, total_anchors, inds_inside, fill=-1) #这些anchor的label是-1,也即dontcare bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) #这些anchor的真值是0,也即没有值 bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) #内部权重以0填充 bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) #外部权重以0填充 # if DEBUG: # print('rpn: max max_overlap', np.max(max_overlaps)) # print('rpn: num_positive', np.sum(labels == 1)) # print('rpn: num_negative', np.sum(labels == 0)) # _fg_sum += np.sum(labels == 1) # _bg_sum += np.sum(labels == 0) # _count += 1 # print('rpn: num_positive avg', _fg_sum / _count) # print('rpn: num_negative avg', _bg_sum / _count) # labels labels = labels.reshape((1, height, width, A)) #reshap一下label rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4))#reshape rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights # print('rpn_bbox_targets', rpn_bbox_targets.shape, rpn_bbox_targets) # print('rpn_labels shape',rpn_labels.shape) # rpn_bbox_targets shape [1, 37, 40, 40] # rpn_labels (1, 37, 40, 10) # print('rpn_labels 0 num', len(np.where(rpn_labels[0]==0)[2])) # print('rpn_labels 1 num', len(np.where(rpn_labels[0]==1)[2])) # print('rpn_labels 2 num', len(np.where(rpn_labels[0]==2)[2])) return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def mdnet_sample(im, bbox, params, num, stype): """Generate gaussian samples based on bbox :arg im: cv2's image bbox: ground-truth box(x, y, w, h) params: five-tuple(width, height, scale, pos_threshold, neg_threshold) of gaussian parameters num: number of samples :return bboxes: list of boxes { 'img' :img, 'box'(x, y, w, h), 'label': label, 'overlap': overlap } """ assert len(bbox) == 4, "Invalid ground-truth(x, y, w, h) form." assert bbox[2] > 0 and bbox[3] > 0, "Width or height < 0." assert len( params ) == 5, "Invalid {:d}-tuple params(should be five-tuple).".format( len(params)) assert num > 0, "Number of samples should be larger than 0." im_shape = im.shape im_w = im_shape[1] im_h = im_shape[0] # Calculate average of width and height centerx = bbox[0] + bbox[2] / 2 centery = bbox[1] + bbox[3] / 2 bboxes = [] cur_id = 0 while cur_id < num: # new box parameters _mean = (bbox[2] + bbox[3]) / 2 offsetx = rd.gauss(0, params[0] * _mean) offsety = rd.gauss(0, params[1] * _mean) scalex = rd.gauss(1, params[2]) # scaley = rd.gauss(1, params[2]) scaley = scalex # new box half width and half height hw = bbox[2] * scalex / 2 hh = bbox[3] * scaley / 2 # box is in the form of (x1, y1, x2, y2) box = (max(0, centerx + offsetx - hw), max(0, centery + offsety - hh), min(im_w, centerx + offsetx + hw), min(im_h, centery + offsety + hh)) # transform to (x, y, w, h) sample = (box[0], box[1], box[2] - box[0], box[3] - box[1]) if int(sample[2]) <= 0 or int(sample[3]) <= 0: continue # since there is only one query box, then take the first one in the overlaps overlap = bbox_overlaps([bbox], [sample])[0] if overlap > params[3]: bboxes.append({ 'img': im, 'box': sample, 'label': 1, 'overlap': overlap }) elif overlap < params[4]: bboxes.append({ 'img': im, 'box': sample, 'label': 0, 'overlap': overlap }) else: continue cur_id += 1 return bboxes
def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride=[ 16, ], anchor_scales=[ 16, ]): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. Parameters ---------- rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] gt_ishard: (G, 1), 1 or 0 indicates difficult or not dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0 im_info: a list of [image_height, image_width, scale_ratios] _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform) that are the regression objectives rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg, beacuse the numbers of bgs and fgs mays significiantly different """ _anchors = generate_anchors( scales=np.array(anchor_scales)) #生成基本的anchor,一共9个 _num_anchors = _anchors.shape[0] #9个anchor if DEBUG: print('anchors:') print(_anchors) print('anchor shapes:') print( np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], ))) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) #height, width = rpn_cls_score.shape[1:3] im_info = im_info[0] #图像的高宽及通道数 #在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标 # Algorithm: # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] #feature-map的高宽 if DEBUG: print('AnchorTargetLayer: height', height, 'width', width) print('') print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) print('height, width: ({}, {})'.format(height, width)) print('rpn: gt_boxes.shape', gt_boxes.shape) print('rpn: gt_boxes', gt_boxes) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride # (W) shift_y = np.arange(0, height) * _feat_stride #(H) shift_x, shift_y = np.meshgrid( shift_x, shift_y) # in W H order # shift_x (H, W) shift_y (H, W) # K is H x W shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel() )).transpose() #生成feature-map和真实image上anchor之间的偏移量 #(H*W, 4) # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors #9个anchor K = shifts.shape[0] #50*37,feature-map的宽乘高的大小 all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) #相当于复制宽高的维度,然后相加 all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image #仅保留那些还在图像内部的anchor,超出图像的都删掉 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] #保留那些在图像内的anchor (In, 4) if DEBUG: print('anchors.shape', anchors.shape) #至此,anchor准备好了 #-------------------------------------------------------------- # label: 1 is positive, 0 is negative, -1 is dont care # (A) labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) #初始化label,均为-1 # overlaps between the anchors and the gt boxes # overlaps (ex, gt), shape is A x G #计算anchor和gt-box的overlap,用来给anchor上标签 overlaps = bbox_overlaps(np.ascontiguousarray( anchors, dtype=np.float), np.ascontiguousarray( gt_boxes, dtype=np.float)) #假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组 # 存放每一个anchor和每一个gtbox之间的overlap argmax_overlaps = overlaps.argmax( axis=1) # (A)#找到和每一个anchor,overlap最大的那个gt max_overlaps = overlaps[np.arange( len(inds_inside) ), argmax_overlaps] # 假如在内部的anchor有900个 ,(900,), 表示的是每一个anchor最大的overlaps值 gt_argmax_overlaps = overlaps.argmax( axis=0) # G#找到所有anchor中与gtbox,overlap最大的那个anchor # (3) if DEBUG: print('获取所有anchor中与gt相交最大的哪几个anchor的索引') print('gt_argmax_overlaps.shape', gt_argmax_overlaps.shape) print('gt_argmax_overlaps', gt_argmax_overlaps) gt_max_overlaps = overlaps[ gt_argmax_overlaps, np.arange( overlaps.shape[1] )] # 比如有3个gt 那么就得到(3,),表示的是上一步找到的与gt的overlap最大的3个anchor的overlap值 gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[ 0] # (3, ) 表示的是哪几个与gt有最大overlap的anchor的索引 if DEBUG: print('这一步是找到那些同样与gt有最大overlap的索引,上一步找到的4个,这一步找到其他重复的') print('gt_argmax_overlaps.shape', gt_argmax_overlaps.shape) print('gt_argmax_overlaps', gt_argmax_overlaps) if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 #先给背景上标签,小于0.3overlap的 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 #每个位置上的9个anchor中overlap最大的认为是前景 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 #overlap大于0.7的认为是前景 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 if DEBUG: print('在过滤数量之前:') print('正样本:', len(np.where(labels == 1)[0])) print('负样本:', len(np.where(labels == 0)[0])) print('忽略样本:', len(np.where(labels == -1)[0])) # preclude dontcare areas if dontcare_areas is not None and dontcare_areas.shape[ 0] > 0: #这里我们暂时不考虑有doncare_area的存在 # intersec shape is D x A intersecs = bbox_intersections( np.ascontiguousarray(dontcare_areas, dtype=np.float), # D x 4 np.ascontiguousarray(anchors, dtype=np.float) # A x 4 ) intersecs_ = intersecs.sum(axis=0) # A x 1 labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1 #这里我们暂时不考虑难样本的问题 # preclude hard samples that are highly occlusioned, truncated or difficult to see if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[ 0] > 0: assert gt_ishard.shape[0] == gt_boxes.shape[0] gt_ishard = gt_ishard.astype(int) gt_hardboxes = gt_boxes[gt_ishard == 1, :] if gt_hardboxes.shape[0] > 0: # H x A hard_overlaps = bbox_overlaps( np.ascontiguousarray(gt_hardboxes, dtype=np.float), # H x 4 np.ascontiguousarray(anchors, dtype=np.float)) # A x 4 hard_max_overlaps = hard_overlaps.max(axis=0) # (A) labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1 max_intersec_label_inds = hard_overlaps.argmax(axis=1) # H x 1 labels[max_intersec_label_inds] = -1 # # subsample positive labels if we have too many #对正样本进行采样,如果正样本的数量太多的话 # 限制正样本的数量不超过128个 #TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) #随机去除掉一些正样本 labels[disable_inds] = -1 #变为-1 # subsample negative labels if we have too many #对负样本进行采样,如果负样本的数量太多的话 # 正负样本总数是256,限制正样本数目最多128, # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本 num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #print "was %s inds, disabling %s, now %s inds" % ( #len(bg_inds), len(disable_inds), np.sum(labels == 0)) if DEBUG: print("考虑均衡住正负样本以后:") print('正样本:', len(np.where(labels == 1)[0])) print('负样本:', len(np.where(labels == 0)[0])) print('忽略样本:', len(np.where(labels == -1)[0])) # 至此, 上好标签,开始计算rpn-box的真值 #-------------------------------------------------------------- bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets( anchors, gt_boxes[argmax_overlaps, :]) #根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) #内部权重,前景就给1,其他是0 bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: #暂时使用uniform 权重,也就是正样本是1,负样本是0 # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) + 1 # positive_weights = np.ones((1, 4)) * 1.0 / num_examples # negative_weights = np.ones((1, 4)) * 1.0 / num_examples positive_weights = np.ones((1, 4)) negative_weights = np.zeros((1, 4)) else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / (np.sum(labels == 1)) + 1) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / (np.sum(labels == 0)) + 1) bbox_outside_weights[labels == 1, :] = positive_weights # 外部权重,前景是1,背景是0 bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: _sums += bbox_targets[labels == 1, :].sum(axis=0) _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts += np.sum(labels == 1) means = _sums / _counts stds = np.sqrt(_squared_sums / _counts - means**2) print('means:') print(means) print('stdevs:') print(stds) # map up to original set of anchors # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来 labels = _unmap(labels, total_anchors, inds_inside, fill=-1) #这些anchor的label是-1,也即dontcare bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) #这些anchor的真值是0,也即没有值 bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) #内部权重以0填充 bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) #外部权重以0填充 if DEBUG: print('rpn: max max_overlap', np.max(max_overlaps)) print('rpn: num_positive', np.sum(labels == 1)) print('rpn: num_negative', np.sum(labels == 0)) _fg_sum += np.sum(labels == 1) _bg_sum += np.sum(labels == 0) _count += 1 print('rpn: num_positive avg', _fg_sum / _count) print('rpn: num_negative avg', _bg_sum / _count) # labels labels = labels.reshape((1, height, width, A)) #reshap一下label rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4))#reshape rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): """Same as the anchor target layer in original Fast/er RCNN """ A = num_anchors total_anchors = all_anchors.size()[0] K = total_anchors / num_anchors # allow boxes to sit over the edge by a small amount _allowed_border = 0 # pytorch (bs, c, h, w) height, width = rpn_cls_score.size()[2:4] # only keep anchors inside the image inds_inside = ( (all_anchors.data[:, 0] >= -_allowed_border) & (all_anchors.data[:, 1] >= -_allowed_border) & (all_anchors.data[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors.data[:, 3] < im_info[0] + _allowed_border) # height ).nonzero()[:, 0].long() if DEBUG: print('total_anchors', total_anchors) print('inds_inside', inds_inside.size()[0]) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print('anchors.shape', anchors.size()) # label: 1 is positive, 0 is negative, -1 is dont care labels = inds_inside.new(inds_inside.size()[0]).fill_(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) shape is A x G overlaps = bbox_overlaps(anchors.data, gt_boxes[:, :4].data) max_overlaps, argmax_overlaps = torch.max(overlaps, dim=1) gt_max_overlaps, gt_argmax_overlaps = torch.max(overlaps, dim=0) gt_argmax_overlaps = (overlaps == ( gt_max_overlaps.unsqueeze(0).expand_as(overlaps))).nonzero()[:, 0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them # first set the negatives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = (labels == 1).nonzero()[:, 0] if fg_inds.numel() > num_fg: inds = fg_inds.new( npr.choice(np.arange(0, fg_inds.numel()), size=int((len(fg_inds) - num_fg)), replace=False)).long() disable_inds = fg_inds[inds] labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - (labels == 1).sum() bg_inds = (labels == 0).nonzero()[:, 0] if bg_inds.numel() > num_bg: inds = bg_inds.new( npr.choice(np.arange(0, bg_inds.numel()), size=int((len(bg_inds) - num_bg)), replace=False)).long() disable_inds = bg_inds[inds] labels[disable_inds] = -1 bbox_targets = _compute_targets(anchors.data, gt_boxes[argmax_overlaps][:, :4].data) bbox_inside_weights = bbox_targets.new(inds_inside.size()[0], 4).zero_() # only the positive ones have regression targets inds = (labels == 1).nonzero().view(-1) # dim1_inds = inds.unsqueeze(1).expand(inds.size(0), 4) # dim2_inds = inds.new((0,1,2,3)).view(-1,4).expand_as(dim1_inds) dim_value = bbox_targets.new(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS).view( -1, 4).expand(inds.size(0), 4) bbox_inside_weights[inds, :] = dim_value bbox_outside_weights = bbox_targets.new(inds_inside.size()[0], 4).zero_() if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = (labels >= 0).sum() positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / (labels == 1).sum()) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / (labels == 0).sum()) inds = (labels == 1).nonzero().view(-1) # dim1_inds = inds.unsqueeze(1).expand(inds.size(0), 4) # dim2_inds = inds.new((0,1,2,3)).view(-1,4).expand_as(dim1_inds) dim_value = bbox_targets.new(positive_weights).view(-1, 4).expand( inds.size(0), 4) bbox_outside_weights[inds, :] = dim_value inds = (labels == 0).nonzero().view(-1) # dim1_inds = inds.unsqueeze(1).expand(inds.size(0), 4) # dim2_inds = inds.new((0,1,2,3)).view(-1,4).expand_as(dim1_inds) dim_value = bbox_targets.new(negative_weights).view(-1, 4).expand( inds.size(0), 4) bbox_outside_weights[inds, :] = dim_value # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels labels = labels.view((1, height, width, A)).permute(0, 3, 1, 2).contiguous() labels = labels.view((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .view((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .view((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .view((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def gaussian_sample(im, bbox, params, num, stype): assert len(bbox) == 4, "Invalid ground-truth(x, y, w, h) form." assert bbox[2] > 0 and bbox[3] > 0, "Width or height < 0." assert len( params ) == 5, "Invalid {:d}-tuple params(should be five-tuple).".format( len(params)) assert num > 0, "Number of samples should be larger than 0." im_shape = im.shape im_w = im_shape[1] im_h = im_shape[0] # Calculate average of width and height centerx = bbox[0] + bbox[2] / 2 centery = bbox[1] + bbox[3] / 2 ones = np.ones((num, 1)) neg_ones = -1 * ones mean = round((bbox[2] + bbox[3]) / 2.) min_ = np.min(np.hstack((ones, 0.5 * randn(num, 1))), axis=1) min_ = min_.reshape((min_.size, 1)) max_ = np.max(np.hstack((neg_ones, min_)), axis=1) offsetx = params[0] * mean * max_ min_ = np.min(np.hstack((ones, 0.5 * randn(num, 1))), axis=1) min_ = min_.reshape((min_.size, 1)) max_ = np.max(np.hstack((neg_ones, min_)), axis=1) offsety = params[1] * mean * max_ min_ = np.min(np.hstack((ones, 0.5 * randn(num, 1))), axis=1) min_ = min_.reshape((min_.size, 1)) max_ = params[2] * np.max(np.hstack((neg_ones, min_)), axis=1) scale = 1.05**max_ w = (bbox[2] * scale)[:, np.newaxis] h = (bbox[3] * scale)[:, np.newaxis] tens = np.array([10] * num)[:, np.newaxis] w_minus_10 = np.array(w - 10) h_minus_10 = np.array(h - 10) if stype == 'TRAIN': wmin_ = np.min(np.hstack((w_minus_10, w)), axis=1)[:, np.newaxis] hmin_ = np.min(np.hstack((h_minus_10, h)), axis=1)[:, np.newaxis] ws = np.max(np.hstack((tens, wmin_)), axis=1) hs = np.max(np.hstack((tens, hmin_)), axis=1) elif stype == 'TEST': ws = np.max(np.hstack((tens, w)), axis=1) hs = np.max(np.hstack((tens, h)), axis=1) bboxes = [] for i in range(num): hw = ws[i] / 2 hh = hs[i] / 2 box = (max(0, int(centerx + offsetx[i] - hw)), max(0, int(centery + offsety[i] - hh)), min(im_w, int(centerx + offsetx[i] + hw)), min(im_h, int(centery + offsety[i] + hh))) sample = (box[0], box[1], box[2] - box[0], box[3] - box[1]) if int(sample[2]) <= 0 or int(sample[3]) <= 0: continue overlap = bbox_overlaps([bbox], [sample])[0] if overlap > params[3]: bboxes.append({ 'img': im, 'box': sample, 'label': 1, 'overlap': overlap }) elif overlap < params[4]: bboxes.append({ 'img': im, 'box': sample, 'label': 0, 'overlap': overlap }) return bboxes
def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride = [16,], anchor_scales = [16,]): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. Parameters ---------- rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] gt_ishard: (G, 1), 1 or 0 indicates difficult or not dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0 im_info: a list of [image_height, image_width, scale_ratios] _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform) that are the regression objectives rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg, beacuse the numbers of bgs and fgs mays significiantly different """ _anchors = generate_anchors(scales=np.array(anchor_scales))#生成基本的anchor,一共9个 _num_anchors = _anchors.shape[0]#9个anchor if DEBUG: print('anchors:') print(_anchors) print('anchor shapes:') print(np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], ))) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) #height, width = rpn_cls_score.shape[1:3] im_info = im_info[0]#图像的高宽及通道数 #在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标 # Algorithm: # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3]#feature-map的高宽 if DEBUG: print('AnchorTargetLayer: height', height, 'width', width) print('') print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) print('height, width: ({}, {})'.format(height, width)) print('rpn: gt_boxes.shape', gt_boxes.shape) print('rpn: gt_boxes', gt_boxes) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) # in W H order # K is H x W shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()#生成feature-map和真实image上anchor之间的偏移量 # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors#9个anchor K = shifts.shape[0]#50*37,feature-map的宽乘高的大小 all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))#相当于复制宽高的维度,然后相加 all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image #仅保留那些还在图像内部的anchor,超出图像的都删掉 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :]#保留那些在图像内的anchor if DEBUG: print('anchors.shape', anchors.shape) #至此,anchor准备好了 #-------------------------------------------------------------- # label: 1 is positive, 0 is negative, -1 is dont care # (A) labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1)#初始化label,均为-1 # overlaps between the anchors and the gt boxes # overlaps (ex, gt), shape is A x G #计算anchor和gt-box的overlap,用来给anchor上标签 overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float))#假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组 # 存放每一个anchor和每一个gtbox之间的overlap argmax_overlaps = overlaps.argmax(axis=1) # (A)#找到和每一个gtbox,overlap最大的那个anchor max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) # G#找到每个位置上9个anchor中与gtbox,overlap最大的那个 gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0#先给背景上标签,小于0.3overlap的 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1#每个位置上的9个anchor中overlap最大的认为是前景 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1#overlap大于0.7的认为是前景 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # preclude dontcare areas if dontcare_areas is not None and dontcare_areas.shape[0] > 0:#这里我们暂时不考虑有doncare_area的存在 # intersec shape is D x A intersecs = bbox_intersections( np.ascontiguousarray(dontcare_areas, dtype=np.float), # D x 4 np.ascontiguousarray(anchors, dtype=np.float) # A x 4 ) intersecs_ = intersecs.sum(axis=0) # A x 1 labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1 #这里我们暂时不考虑难样本的问题 # preclude hard samples that are highly occlusioned, truncated or difficult to see if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[0] > 0: assert gt_ishard.shape[0] == gt_boxes.shape[0] gt_ishard = gt_ishard.astype(int) gt_hardboxes = gt_boxes[gt_ishard == 1, :] if gt_hardboxes.shape[0] > 0: # H x A hard_overlaps = bbox_overlaps( np.ascontiguousarray(gt_hardboxes, dtype=np.float), # H x 4 np.ascontiguousarray(anchors, dtype=np.float)) # A x 4 hard_max_overlaps = hard_overlaps.max(axis=0) # (A) labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1 max_intersec_label_inds = hard_overlaps.argmax(axis=1) # H x 1 labels[max_intersec_label_inds] = -1 # # subsample positive labels if we have too many #对正样本进行采样,如果正样本的数量太多的话 # 限制正样本的数量不超过128个 #TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False)#随机去除掉一些正样本 labels[disable_inds] = -1#变为-1 # subsample negative labels if we have too many #对负样本进行采样,如果负样本的数量太多的话 # 正负样本总数是256,限制正样本数目最多128, # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本 num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #print "was %s inds, disabling %s, now %s inds" % ( #len(bg_inds), len(disable_inds), np.sum(labels == 0)) # 至此, 上好标签,开始计算rpn-box的真值 #-------------------------------------------------------------- bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])#根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)#内部权重,前景就给1,其他是0 bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:#暂时使用uniform 权重,也就是正样本是1,负样本是0 # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) + 1 # positive_weights = np.ones((1, 4)) * 1.0 / num_examples # negative_weights = np.ones((1, 4)) * 1.0 / num_examples positive_weights = np.ones((1, 4)) negative_weights = np.zeros((1, 4)) else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / (np.sum(labels == 1)) + 1) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / (np.sum(labels == 0)) + 1) bbox_outside_weights[labels == 1, :] = positive_weights#外部权重,前景是1,背景是0 bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: _sums += bbox_targets[labels == 1, :].sum(axis=0) _squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0) _counts += np.sum(labels == 1) means = _sums / _counts stds = np.sqrt(_squared_sums / _counts - means ** 2) print('means:') print(means) print('stdevs:') print(stds) # map up to original set of anchors # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来 labels = _unmap(labels, total_anchors, inds_inside, fill=-1)#这些anchor的label是-1,也即dontcare bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)#这些anchor的真值是0,也即没有值 bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)#内部权重以0填充 bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)#外部权重以0填充 if DEBUG: print('rpn: max max_overlap', np.max(max_overlaps)) print('rpn: num_positive', np.sum(labels == 1)) print('rpn: num_negative', np.sum(labels == 0)) _fg_sum += np.sum(labels == 1) _bg_sum += np.sum(labels == 0) _count += 1 print('rpn: num_positive avg', _fg_sum / _count) print('rpn: num_negative avg', _bg_sum / _count) # labels labels = labels.reshape((1, height, width, A))#reshap一下label rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4))#reshape rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_target_layer(rpn_cls_score, gt_boxes_large, gt_ishard, dontcare_areas, im_info, _feat_stride=[ 16, ], anchor_scales=[ 16, ]): """ 将gt_box划分为细框 实现论文中的side-refinement arameters ---------- rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] gt_ishard: (G, 1), 1 or 0 indicates difficult or not dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0 im_info: a list of [image_height, image_width, scale_ratios] _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- :return: """ gt_boxes = split_frame(gt_boxes_large) # gt_width = gt_boxes[:,2]-gt_boxes[:,0] _anchors = generate_anchors( scales=np.array(anchor_scales)) # 生成基本的anchor,一共9个 _num_anchors = _anchors.shape[0] # 9个anchor if DEBUG: print('anchors:') print(_anchors) print('anchor shapes:') print( np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], ))) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # allow boxes to sit over the edge by a small amount _allowed_border = 0 im_info = im_info[0] # 图像的高宽及通道数 assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] # feature-map的高宽 if DEBUG: print('AnchorTargetLayer: height', height, 'width', width) print('') print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) print('height, width: ({}, {})'.format(height, width)) print('rpn: gt_boxes.shape', gt_boxes.shape) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride # (W) shift_y = np.arange(0, height) * _feat_stride # (H) shift_x, shift_y = np.meshgrid( shift_x, shift_y) # in W H order # shift_x (H, W) shift_y (H, W) # K is H x W shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel() )).transpose() # 生成feature-map和真实image上anchor之间的偏移量 #(H*W, 4) # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors # 9个anchor K = shifts.shape[0] # 50*37,feature-map的宽乘高的大小 all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) # 相当于复制宽高的维度,然后相加 all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image # 仅保留那些还在图像内部的anchor,超出图像的都删掉 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] #保留那些在图像内的anchor (In, 4) if DEBUG: print('anchors.shape', anchors.shape) #至此,anchor准备好了 #-------------------------------------------------------------- # label: 1 is positive, 0 is negative, -1 is dont care # (A) labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(0) #初始化label,均为-1 # overlaps between the anchors and the gt boxes # overlaps (ex, gt), shape is A x G # 计算anchor和gt-box的overlap,用来给anchor上标签 overlaps = bbox_overlaps(np.ascontiguousarray( anchors, dtype=np.float), np.ascontiguousarray( gt_boxes, dtype=np.float)) # 假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组 # 存放每一个anchor和每一个gtbox之间的overlap argmax_overlaps = overlaps.argmax( axis=1) # (A)#找到和每一个anchor,overlap最大的那个gt max_overlaps = overlaps[np.arange( len(inds_inside) ), argmax_overlaps] # 假如在内部的anchor有900个 ,(900,), 表示的是每一个anchor最大的overlaps值 gt_argmax_overlaps = overlaps.argmax( axis=0) # G#找到所有anchor中与gtbox,overlap最大的那个anchor # (3) if DEBUG: print('获取所有anchor中与gt相交最大的哪几个anchor的索引') print('gt_argmax_overlaps.shape', gt_argmax_overlaps.shape) gt_max_overlaps = overlaps[ gt_argmax_overlaps, np.arange( overlaps.shape[1] )] # 比如有3个gt 那么就得到(3,),表示的是上一步找到的与gt的overlap最大的3个anchor的overlap值 gt_argmax_overlaps = np.where( overlaps == gt_max_overlaps)[0] # (3, ) 表示的是哪几个与gt有最大overlap的anchor的索引 if DEBUG: print('这一步是找到那些同样与gt有最大overlap的索引,上一步找到的4个,这一步找到其他重复的') print('gt_argmax_overlaps.shape', gt_argmax_overlaps.shape) if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 #先给背景上标签,小于0.3overlap的 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # 每个位置上的9个anchor中overlap最大的认为是前景 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 #overlap大于0.7的认为是前景 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 if DEBUG: print('在过滤数量之前:') print('正样本:', len(np.where(labels == 1)[0])) print('负样本:', len(np.where(labels == 0)[0])) print('忽略样本:', len(np.where(labels == -1)[0])) # 不再限制正负样本的数量 if DEBUG: print("考虑均衡住正负样本以后:") print('正样本:', len(np.where(labels == 1)[0])) print('负样本:', len(np.where(labels == 0)[0])) print('忽略样本:', len(np.where(labels == -1)[0])) # 至此, 上好标签,开始计算rpn-box的真值 # v_target, o_target = _compute_targets(anchors, gt_boxes[ argmax_overlaps, :]) # 根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差) # 但是计算损失函数的时候,其实是需要j索引和k索引,所以计算好这两个索引,一并返回,帮助计算损失函数 # j索引,有效索引:正锚点或者与gt的overlap大于0.5以上的锚点的索引 # 正锚点 positive_index = np.where(labels == 1)[0] # 应该是一个(p,)p应该不大于128 # # ignore_index = np.where(labels==-1)[0] # 应该是一个(n,)n应该很大,因为忽略的anchor很多 keep_index = np.where(labels != -1)[0] _ = np.where(max_overlaps > 0.5)[0] # 应该是一个(c,),表示overlap大于0.5的anchor的索引 remove_ignore = list() for i in range(_.shape[0]): if i in keep_index: remove_ignore.append(_[i]) remove_ignore = np.array(remove_ignore) effect_index = np.append(positive_index, remove_ignore) remove_repeat = np.array(list(set(list(effect_index)))) j_index = remove_repeat.astype(np.int32) j_index1 = np.zeros((len(inds_inside)), dtype=np.int32) j_index1[j_index] = 1 # k 索引 , 边缘索引 # 先找到所有的可以认为是边缘的gt框,这里简单的认为是边缘框和左右各自一个。 #ori_gt_box = (gt_boxes/im_info[2]).astype(np.int32, copy=False) ori_gt_box = gt_boxes.astype(np.float32, copy=False) # 找到左右边界框,矩阵操作实现 todo list_left_index = list() list_right_index = list() for i in range(ori_gt_box.shape[0]): if ori_gt_box[i][2] - ori_gt_box[i][0] != 15: list_left_index.append(i) else: continue list_index1 = list_left_index + list_right_index # 去除不属于gt中的索引和重复的索引 list_index2 = list(set(list_index1)) list_index3 = sorted(list_index2) list_index4 = list() for index in list_index3: if index in range(ori_gt_box.shape[0]): list_index4.append(index) # if DEBUG: # print("list_left_index", list_left_index) # print("list_right_index", list_right_index) # print("list_index1", list_index1) # print("list_index2", list_index2) # print("list_index3", list_index3) # print("list_index4", list_index4) gt_side_index = np.array(list_index4).astype(np.int32) # 得到了边界gt框的索引 # 要得到与这些gt框有最大的overlap的anchors的索引,这些anchor是我们关心的 gt_argmax_overlaps = overlaps.argmax(axis=0) anchor_side_index = gt_argmax_overlaps[ gt_side_index] # 得到143个与gt具有最大的overlaps的anchor的索引 # 还要去掉与边界框overlap为0的anchor,因为这些anhcor不是真的我们关心的anchor,如果不去除,还会造成o_loss异常大 anchor_side_list = list() for i in range(anchor_side_index.shape[0]): anchor_index = anchor_side_index[i] gt_index = gt_side_index[i] overlap = overlaps[anchor_index, gt_index] if overlap > 0: anchor_side_list.append(anchor_index) anchor_side_index = np.array(anchor_side_list, dtype=np.int32) anchor_side_index1 = np.array(sorted(list(set( list(anchor_side_index))))).astype(np.int32) k_index = anchor_side_index1 # (s,) s个边界索引,但是并不是包括之前去除的超过边界框的索引值,所以需要之后的操作 k_index1 = np.zeros((len(inds_inside)), dtype=np.int32) k_index1[k_index] = 1 if DEBUG: print('jIndex1:', j_index1.shape) print('k_index1:', k_index1.shape) # map up to original set of anchors # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来 labels = _unmap(labels, total_anchors, inds_inside, fill=-1) # 这些anchor的label是-1,也即dontcare v_target = _unmap(v_target, total_anchors, inds_inside, fill=0) # 这些anchor的真值是0,也即没有值 o_target = _unmap(o_target, total_anchors, inds_inside, fill=0) j_index2 = _unmap(j_index1, total_anchors, inds_inside, fill=0).astype(np.int32) k_index2 = _unmap(k_index1, total_anchors, inds_inside, fill=0).astype(np.int32) # real_j_index = np.where(j_index2==1)[0] # real_k_index = np.where(k_index2==1)[0] if DEBUG: print('loss_1 index:', np.where(labels != -1)[0].shape[0]) print('j_index:', j_index.shape) print('k_index:', k_index.shape) print('j_index2:', j_index2.shape) print('k_index2:', k_index2.shape) print('label shape', labels.shape) print('v_target shape', v_target.shape) print('o_target shape', o_target.shape) return labels, v_target, o_target, j_index2, k_index2
def anchor_target_layer(rpn_cls_score, rpn_cls_prob, im_name, gt_boxes_large, gt_ishard, dontcare_areas, im_info, _feat_stride=[ 16, ], anchor_scales=[ 16, ]): """ 将gt_box划分为细框 实现论文中的side-refinement arameters ---------- rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] gt_ishard: (G, 1), 1 or 0 indicates difficult or not dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0 im_info: a list of [image_height, image_width, scale_ratios] _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- :return: """ global img_name if img_name != im_name: # 第一次训练这个图片 flag_first = True else: flag_first = False img_name = im_name if flag_first: # 如果是第一次见到这个图片,就要重新生成所有tmp对象 gt_boxes = split_frame(gt_boxes_large) # gt_width = gt_boxes[:,2]-gt_boxes[:,0] _anchors = generate_anchors( scales=np.array(anchor_scales)) # 生成基本的anchor,一共9个 _num_anchors = _anchors.shape[0] # 9个anchor if DEBUG: print('anchors:') print(_anchors) print('anchor shapes:') print( np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], ))) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # allow boxes to sit over the edge by a small amount _allowed_border = 0 im_info = im_info[0] # 图像的高宽及通道数 assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] # feature-map的高宽 if DEBUG: print('AnchorTargetLayer: height', height, 'width', width) print('') print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) print('height, width: ({}, {})'.format(height, width)) print('rpn: gt_boxes.shape', gt_boxes.shape) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride # (W) shift_y = np.arange(0, height) * _feat_stride # (H) shift_x, shift_y = np.meshgrid( shift_x, shift_y) # in W H order # shift_x (H, W) shift_y (H, W) # K is H x W shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel() )).transpose() # 生成feature-map和真实image上anchor之间的偏移量 #(H*W, 4) # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors # 9个anchor K = shifts.shape[0] # 50*37,feature-map的宽乘高的大小 all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) # 相当于复制宽高的维度,然后相加 all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image # 仅保留那些还在图像内部的anchor,超出图像的都删掉 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] # 保留那些在图像内的anchor (In, 4) if DEBUG: print('anchors.shape', anchors.shape) # 至此,anchor准备好了 # -------------------------------------------------------------- # label: 1 is positive, 0 is negative, -1 is dont care # (A) labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # 初始化label,均为-1 # overlaps between the anchors and the gt boxes # overlaps (ex, gt), shape is A x G # 计算anchor和gt-box的overlap,用来给anchor上标签 overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray( gt_boxes, dtype=np.float)) # 假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组 # 存放每一个anchor和每一个gtbox之间的overlap argmax_overlaps = overlaps.argmax( axis=1) # (A)#找到和每一个anchor,overlap最大的那个gt max_overlaps = overlaps[np.arange( len(inds_inside) ), argmax_overlaps] # 假如在内部的anchor有900个 ,(900,), 表示的是每一个anchor最大的overlaps值 gt_argmax_overlaps = overlaps.argmax( axis=0) # G#找到所有anchor中与gtbox,overlap最大的那个anchor # (3) gt_max_overlaps = overlaps[ gt_argmax_overlaps, np.arange( overlaps.shape[1] )] # 比如有3个gt 那么就得到(3,),表示的是上一步找到的与gt的overlap最大的3个anchor的overlap值 gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[ 0] # (3, ) 表示的是哪几个与gt有最大overlap的anchor的索引 if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # 先给背景上标签,小于0.3overlap的 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # 每个位置上的9个anchor中overlap最大的认为是前景 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 # overlap大于0.7的认为是前景 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 before_filter_labels = labels.copy() # 过滤之前的标签,方便用来计算hard negtive all_bg_index = before_filter_labels == 0 if DEBUG: print('在过滤数量之前:') print('正样本:' + str(len(np.where(labels == 1)[0]))) print('负样本:' + str(len(np.where(labels == 0)[0]))) print('忽略样本:' + str(len(np.where(labels == -1)[0]))) # preclude dontcare areas if dontcare_areas is not None and dontcare_areas.shape[ 0] > 0: # 这里我们暂时不考虑有doncare_area的存在 # intersec shape is D x A intersecs = bbox_intersections( np.ascontiguousarray(dontcare_areas, dtype=np.float), # D x 4 np.ascontiguousarray(anchors, dtype=np.float) # A x 4 ) intersecs_ = intersecs.sum(axis=0) # A x 1 labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1 # 这里我们暂时不考虑难样本的问题 # preclude hard samples that are highly occlusioned, truncated or difficult to see if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[ 0] > 0 and 0: assert gt_ishard.shape[0] == gt_boxes.shape[0] gt_ishard = gt_ishard.astype(int) gt_hardboxes = gt_boxes[gt_ishard == 1, :] if gt_hardboxes.shape[0] > 0: # H x A hard_overlaps = bbox_overlaps( np.ascontiguousarray(gt_hardboxes, dtype=np.float), # H x 4 np.ascontiguousarray(anchors, dtype=np.float)) # A x 4 hard_max_overlaps = hard_overlaps.max(axis=0) # (A) labels[ hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1 max_intersec_label_inds = hard_overlaps.argmax(axis=1) # H x 1 labels[max_intersec_label_inds] = -1 # # subsample positive labels if we have too many # 对正样本进行采样,如果正样本的数量太多的话 # 限制正样本的数量不超过128个 # TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) # 随机去除掉一些正样本 labels[disable_inds] = -1 # 变为-1 # subsample negative labels if we have too many # 对负样本进行采样,如果负样本的数量太多的话 # 正负样本总数是512,限制正样本数目最多128, # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本 num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 # print "was %s inds, disabling %s, now %s inds" % ( # len(bg_inds), len(disable_inds), np.sum(labels == 0)) if DEBUG: print("考虑均衡住正负样本以后:") print('正样本:' + str(len(np.where(labels == 1)[0]))) print('负样本:' + str(len(np.where(labels == 0)[0]))) print('忽略样本:' + str(len(np.where(labels == -1)[0]))) # 至此,第一次生成好了这个图片的labels,随机抽了512个 # 生成其他部分的标签 v_target, o_target = _compute_targets(anchors, gt_boxes[ argmax_overlaps, :]) # 根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差) # 但是计算损失函数的时候,其实是需要j索引和k索引,所以计算好这两个索引,一并返回,帮助计算损失函数 # j索引,有效索引:正锚点或者与gt的overlap大于0.5以上的锚点的索引 # 正锚点 positive_index = np.where(labels == 1)[0] # 应该是一个(p,)p应该不大于128 # # ignore_index = np.where(labels==-1)[0] # 应该是一个(n,)n应该很大,因为忽略的anchor很多 keep_index = np.where(labels != -1)[0] _ = np.where( max_overlaps > 0.5)[0] # 应该是一个(c,),表示overlap大于0.5的anchor的索引 remove_ignore = list() for i in range(_.shape[0]): if i in keep_index: remove_ignore.append(_[i]) remove_ignore = np.array(remove_ignore) effect_index = np.append(positive_index, remove_ignore) remove_repeat = np.array(list(set(list(effect_index)))) j_index = remove_repeat.astype(np.int32) j_index1 = np.zeros((len(inds_inside)), dtype=np.int32) j_index1[j_index] = 1 # k 索引 , 边缘索引 # 先找到所有的可以认为是边缘的gt框,这里简单的认为是边缘框和左右各自一个。 # ori_gt_box = (gt_boxes/im_info[2]).astype(np.int32, copy=False) ori_gt_box = gt_boxes.astype(np.float32, copy=False) # 找到左右边界框,矩阵操作实现 todo list_left_index = list() list_right_index = list() for i in range(ori_gt_box.shape[0]): if ori_gt_box[i][2] - ori_gt_box[i][0] != 15: list_left_index.append(i) else: continue list_index1 = list_left_index + list_right_index # 去除不属于gt中的索引和重复的索引 list_index2 = list(set(list_index1)) list_index3 = sorted(list_index2) list_index4 = list() for index in list_index3: if index in range(ori_gt_box.shape[0]): list_index4.append(index) gt_side_index = np.array(list_index4).astype(np.int32) # 得到了边界gt框的索引 # 要得到与这些gt框有最大的overlap的anchors的索引,这些anchor是我们关心的 gt_argmax_overlaps = overlaps.argmax(axis=0) anchor_side_index = gt_argmax_overlaps[ gt_side_index] # 得到143个与gt具有最大的overlaps的anchor的索引 # 还要去掉与边界框overlap为0的anchor,因为这些anhcor不是真的我们关心的anchor,如果不去除,还会造成o_loss异常大 anchor_side_list = list() for i in range(anchor_side_index.shape[0]): anchor_index = anchor_side_index[i] gt_index = gt_side_index[i] overlap = overlaps[anchor_index, gt_index] if overlap > 0: anchor_side_list.append(anchor_index) anchor_side_index = np.array(anchor_side_list, dtype=np.int32) anchor_side_index1 = np.array( sorted(list(set(list(anchor_side_index))))).astype(np.int32) k_index = anchor_side_index1 # (s,) s个边界索引,但是并不是包括之前去除的超过边界框的索引值,所以需要之后的操作 k_index1 = np.zeros((len(inds_inside)), dtype=np.int32) k_index1[k_index] = 1 in_labels = labels.copy() # map up to original set of anchors # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来 labels = _unmap(labels, total_anchors, inds_inside, fill=-1) # 这些anchor的label是-1,也即dontcare v_target = _unmap(v_target, total_anchors, inds_inside, fill=0) # 这些anchor的真值是0,也即没有值 o_target = _unmap(o_target, total_anchors, inds_inside, fill=0) j_index2 = _unmap(j_index1, total_anchors, inds_inside, fill=0).astype(np.int32) k_index2 = _unmap(k_index1, total_anchors, inds_inside, fill=0).astype(np.int32) # real_j_index = np.where(j_index2==1)[0] # real_k_index = np.where(k_index2==1)[0] global tmp_labels, tmp_all_bg_index, tmp_v_target, tmp_o_target, tmp_j_index2, tmp_k_index2, tmp_inds_inside tmp_labels = in_labels tmp_all_bg_index = all_bg_index tmp_v_target = v_target tmp_o_target = o_target tmp_j_index2 = j_index2 tmp_k_index2 = k_index2 tmp_inds_inside = inds_inside if DEBUG or SHOW_SOME: print('第一次这张图') print('正样本:' + str(len(np.where(labels == 1)[0]))) print('负样本:' + str(len(np.where(labels == 0)[0]))) print('忽略样本:' + str(len(np.where(labels == -1)[0]))) # print('保存的tmp_labels') # print('正样本:' + str(len(np.where(tmp_labels == 1)[0]))) # print('负样本:' + str(len(np.where(tmp_labels == 0)[0]))) # print('忽略样本:' + str(len(np.where(tmp_labels == -1)[0]))) return labels, v_target, o_target, j_index2, k_index2 else: # 第二次见过这个图,只用生成hard neg添加进去 if DEBUG and SHOW_SOME: print('不是第一次') # 先找出负样本 bg_index = tmp_all_bg_index inds_inside = tmp_inds_inside # 找出得分高于某个阈值的 rpn_cls_prob = np.reshape(rpn_cls_prob, [-1, 2]) rpn_cls_prob = rpn_cls_prob[inds_inside, :] fg_score = rpn_cls_prob[:, 1] high_score = fg_score > 0.5 # 找出即是负样本,又是分数很高的样本 assert bg_index.shape == high_score.shape # 得到了hard negtive的索引,这个是 hard_neg = bg_index * high_score if DEBUG: print('负样本的数量:' + str(len(np.where(bg_index == True)[0]))) print('得分高于0.5的数量:' + str(len(np.where(high_score == True)[0]))) print('hard negtive 数量' + str(len(np.where(hard_neg == True)[0]))) # 如果是第二次训练这张图片,训练样本是第一次随机生成的正负样本加这一次的hard negtive labels = tmp_labels.copy() first_gen_index = labels != -1 hard_neg_index = hard_neg assert first_gen_index.shape == hard_neg_index.shape, 'line 282' diff = hard_neg_index * 1 - first_gen_index * 1 new_hard_index = diff == 1 assert labels.shape == new_hard_index.shape if DEBUG: print('加载进来的第一次的labels的负样本的数量:' + str(len(np.where(labels == 0)[0]))) print('属于难负样本不属于第一次样本的数量:' + str(len(np.where(new_hard_index == True)[0]))) print('加难样本之前的负样本数量:' + str(len(np.where(labels == 0)[0]))) labels[new_hard_index] = 0 if DEBUG or SHOW_SOME: print('加难样本之后的负样本数量:' + str(len(np.where(labels == 0)[0]))) print('正样本:' + str(len(np.where(labels == 1)[0]))) print('负样本:' + str(len(np.where(labels == 0)[0]))) print('忽略样本:' + str(len(np.where(labels == -1)[0]))) # print('第一次保存的tmp_labels的负样本数量应该不变的'+ str(len(np.where(tmp_labels == 0)[0]))) # 至此,准备好了labels # 其他标签不变,加载上次保存的就好 v_target = tmp_v_target o_target = tmp_o_target j_index2 = tmp_j_index2 k_index2 = tmp_k_index2 return labels, v_target, o_target, j_index2, k_index2
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): """Same as the anchor target layer in original Fast/er RCNN """ A = num_anchors total_anchors = all_anchors.shape[0] K = total_anchors / num_anchors # allow boxes to sit over the edge by a small amount _allowed_border = 0 # pytorch (bs, c, h, w) height, width = rpn_cls_score.shape[2:4] # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them # first set the negatives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples # positive_weights = np.ones((1, 4)) # negative_weights = np.zeros((1, 4)) else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_target_layer(rpn_cls_score_list, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): """Same as the anchor target layer in original Fast/er RCNN """ A_s = num_anchors total_anchors = all_anchors.shape[0] # K = total_anchors / num_anchors # allow boxes to sit over the edge by a small amount _allowed_border = 0 # pytorch (bs, c, h, w) heights = [rpn_cls_score.shape[2] for rpn_cls_score in rpn_cls_score_list] widths = [rpn_cls_score.shape[3] for rpn_cls_score in rpn_cls_score_list] # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print('inds_inside', len(inds_inside)) print('total anchors', total_anchors) # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] #laji if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them # first set the negatives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # --------------------ignore handling---------------------------- #tmp = [gt for gt in gt_boxes if gt[4] == 1] tttinds = np.where(gt_boxes[:, 4] == 1)[0] if (len(tttinds > 0)): tmp = gt_boxes[tttinds, :] # calculate overlaps between anchors and ignore regions overlaps2 = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(tmp, dtype=np.float)) # find max value argmax_overlaps2 = overlaps2.argmax(axis=1) max_overlaps2 = overlaps2[np.arange(len(inds_inside)), argmax_overlaps2] # ignore high overlaps by setting them to -1 (ignore) labels[max_overlaps2 >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1 # --------------------ignore handling---------------------------- # import pdb; pdb.set_trace() # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples # positive_weights = np.ones((1, 4)) # negative_weights = np.zeros((1, 4)) else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) begin_cnt = 0 end_cnt = 0 begin_cnt_bbox = 0 end_cnt_bbox = 0 labels_list = list() bbox_targets_list = list() bbox_inside_weights_list = list() bbox_outside_weights_list = list() for height, width, A in zip(heights, widths, A_s): begin_cnt = end_cnt end_cnt += 1 * height * width * A labels_part = labels[begin_cnt:end_cnt] # labels labels_part = labels_part.reshape( (1, height, width, A)).transpose(0, 3, 1, 2) labels_part = labels_part.reshape((1, 1, A * height, width)).reshape( (-1, )) labels_list.append(labels_part) # begin_cnt_bbox = end_cnt_bbox # end_cnt_bbox += 1*height*width*A*4 # bbox_targets_part = bbox_targets[begin_cnt_bbox:end_cnt_bbox] # bbox_inside_weights_part = bbox_inside_weights[begin_cnt_bbox:end_cnt_bbox] # bbox_outside_weights_part = bbox_outside_weights[begin_cnt_bbox:end_cnt_bbox] # # # bbox_targets # bbox_targets_part = bbox_targets_part.reshape((1, height, width, A * 4)) # bbox_targets_list.append(bbox_targets_part) # # # bbox_inside_weights # bbox_inside_weights_part = bbox_inside_weights_part.reshape((1, height, width, A * 4)) # bbox_inside_weights_list.append(bbox_inside_weights_part) # # # bbox_outside_weights # bbox_outside_weights_part = bbox_outside_weights_part.reshape((1, height, width, A * 4)) # bbox_outside_weights_list.append(bbox_outside_weights_part) assert total_anchors == end_cnt labels = np.concatenate(labels_list, axis=0) # labels # labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) # labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets # bbox_targets = bbox_targets \ # .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights # bbox_inside_weights = bbox_inside_weights \ # .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights # bbox_outside_weights = bbox_outside_weights \ # .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps(all_rois[:, 1:5].data, gt_boxes[:, :4].data) max_overlaps, gt_assignment = overlaps.max(1) labels = gt_boxes[gt_assignment, [4]] # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = ( (max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1) #-----------------------ignore handling-------------------- # Select foreground RoIs as those with >= FG_THRESH overlap # import pdb; pdb.set_trace() # gt_boxes2 = [gt for gt in gt_boxes if gt[4].data[0]==15] # if len(gt_boxes2) == 0: # import pdb; pdb.set_trace() # then we choose positive regions # we only keep pedestrain regions # import pdb;pdb.set_trace() # # gt_boxes = [gt for gt in gt_boxes if gt[4].data[0]==15] # # gt_boxes = torch.stack(gt_boxes) tttinds = (gt_boxes[:, 4] == 15).nonzero().view(-1) gt_boxes = gt_boxes[tttinds] # import pdb; pdb.set_trace() overlaps = bbox_overlaps(all_rois[:, 1:5].data, gt_boxes[:, :4].data) max_overlaps, gt_assignment = overlaps.max(1) labels = gt_boxes[gt_assignment, [4]] fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1) #-----------------------ignore handling-------------------- # print('fg_rois_per_image:',fg_rois_per_image,'fg_inds.numel():',fg_inds.numel()) # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.numel() > 0 and bg_inds.numel() > 0: ''' to_replace = fg_inds.numel() < fg_rois_per_image fg_inds = fg_inds[torch.from_numpy( npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=to_replace)).long().cuda()] ''' fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel()) fg_inds = fg_inds[torch.from_numpy( npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().cuda()] bg_rois_per_image = rois_per_image - fg_rois_per_image #print(bg_rois_per_image,fg_rois_per_image,'bg_rois_per_image,fg_rois_per_image proposal_target_layer.py(167)') to_replace = bg_inds.numel() < bg_rois_per_image if to_replace: bg_inds = bg_inds[torch.from_numpy( npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().cuda()] else: bg_inds = bg_inds[:int(bg_rois_per_image)] elif fg_inds.numel() > 0: to_replace = fg_inds.numel() < rois_per_image fg_inds = fg_inds[torch.from_numpy( npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] fg_rois_per_image = rois_per_image elif bg_inds.numel() > 0: to_replace = bg_inds.numel() < rois_per_image bg_inds = bg_inds[torch.from_numpy( npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] fg_rois_per_image = 0 else: import pdb pdb.set_trace() # The indices that we're selecting (both fg and bg) keep_inds = torch.cat([fg_inds, bg_inds], 0) # Select sampled values from various arrays: labels = labels[keep_inds].contiguous() # Clamp labels for the background RoIs to 0 # import pdb;pdb.set_trace() # print(fg_rois_per_image,len(labels)) labels[int(fg_rois_per_image):] = 0 rois = all_rois[keep_inds].contiguous() roi_scores = all_scores[keep_inds].contiguous() bbox_target_data = _compute_targets( rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def rpn_target(anchors, inside_inds, gt_labels, gt_boxes): def box_transform(et_boxes, gt_boxes): et_ws = et_boxes[:, 2] - et_boxes[:, 0] + 1.0 et_hs = et_boxes[:, 3] - et_boxes[:, 1] + 1.0 et_cxs = et_boxes[:, 0] + 0.5 * et_ws et_cys = et_boxes[:, 1] + 0.5 * et_hs gt_ws = gt_boxes[:, 2] - gt_boxes[:, 0] + 1.0 gt_hs = gt_boxes[:, 3] - gt_boxes[:, 1] + 1.0 gt_cxs = gt_boxes[:, 0] + 0.5 * gt_ws gt_cys = gt_boxes[:, 1] + 0.5 * gt_hs dxs = (gt_cxs - et_cxs) / et_ws dys = (gt_cys - et_cys) / et_hs dws = np.log(gt_ws / et_ws) dhs = np.log(gt_hs / et_hs) deltas = np.vstack((dxs, dys, dws, dhs)).transpose() return deltas CFG = EasyDict() CFG.TRAIN = EasyDict() CFG.TRAIN.RPN_BATCHSIZE = 100 CFG.TRAIN.RPN_FG_FRACTION = 0.5 CFG.TRAIN.RPN_FG_THRESH_LO = 0.7 CFG.TRAIN.RPN_BG_THRESH_HI = 0.3 inside_anchors = anchors[inside_inds, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inside_inds), ), dtype=np.int32) labels.fill(-1) # overlaps between the anchors and the gt process overlaps = bbox_overlaps( np.ascontiguousarray(inside_anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inside_inds)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] labels[max_overlaps < CFG.TRAIN.RPN_BG_THRESH_HI] = 0 # bg label labels[ gt_argmax_overlaps] = 1 # fg label: for each gt, anchor with highest overlap labels[max_overlaps >= CFG.TRAIN.RPN_FG_THRESH_LO] = 1 # fg label: above threshold IOU # subsample positive labels num_fg = int(CFG.TRAIN.RPN_FG_FRACTION * CFG.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels num_bg = CFG.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 idx_label = np.where(labels != -1)[0] idx_target = np.where(labels == 1)[0] pos_neg_inds = inside_inds[idx_label] labels = labels[idx_label] pos_inds = inside_inds[idx_target] pos_anchors = inside_anchors[idx_target] pos_gt_boxes = gt_boxes[argmax_overlaps][idx_target] targets = box_transform(pos_anchors, pos_gt_boxes) return pos_neg_inds, pos_inds, labels, targets