def decode(boxes, scores, all_anchors, ih, iw): """Decode outputs into boxes Parameters --------- boxes: an array of shape (1, h, w, Ax4) scores: an array of shape (1, h, w, Ax2), all_anchors: an array of shape (1, h, w, Ax4), [x1, y1, x2, y2] Returns -------- final_boxes: of shape (R x 4) classes: of shape (R) in {0,1,2,3... K-1} scores: of shape (R) in [0 ~ 1] """ h, w = boxes.shape[1], boxes.shape[2] if all_anchors == None: stride = 2**int(round(np.log2((iw + 0.0) / w))) all_anchors = anchors_plane(h, w, stride=stride) all_anchors = all_anchors.reshape((-1, 4)) boxes = boxes.reshape((-1, 4)) scores = scores.reshape((-1, 2)) assert scores.shape[0] == boxes.shape[0] == all_anchors.reshape[0], \ 'Anchor layer shape error %d vs %d vs %d' % (scores.shape[0],boxes.shape[0],all_anchors.reshape[0]) boxes = bbox_transform_inv(all_anchors, boxes) classes = np.argmax(scores, axis=1) scores = scores[:, 1] final_boxes = np.zeros((boxes.shape[0], 4)) for i in np.arange(final_boxes.shape[0]): c = classes[i] * 4 final_boxes[i, 0:4] = boxes[i, c:c + 4] final_boxes = clip_boxes(final_boxes, (ih, iw)) return final_boxes, classes, scores
def _build_anchors(self): if len(self.ANCHORS) == 0: ih, iw = cfg.input_size all_anchors = [] for i, stride in enumerate(cfg.strides): height, width = int(ih / stride), int(iw / stride) scales = cfg.anchor_scales[i] if isinstance(cfg.anchor_scales[i], list) else cfg.anchor_scales anchors = anchors_plane(height, width, stride, scales=scales, ratios=cfg.anchor_ratios, base=cfg.anchor_base) all_anchors.append(anchors) self.ANCHORS = all_anchors
def encode(gt_boxes, all_anchors, height, width, stride): """Matching and Encoding groundtruth into learning targets Sampling Parameters --------- gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class] all_anchors: an array of shape (h, w, A, 4), width: width of feature height: height of feature stride: downscale factor w.r.t the input size, e.g., [4, 8, 16, 32] Returns -------- labels: Nx1 array in [0, num_classes] anchors: Sampled anchors bbox_targets: N x (4) regression targets bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned. """ # TODO: speedup this module if all_anchors is None: all_anchors = anchors_plane(height, width, stride=stride) # anchors, inds_inside, total_anchors all_anchors = all_anchors.reshape((-1, 4)) inds_inside = np.where((all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0) & (all_anchors[:, 2] < width * stride) & (all_anchors[:, 3] < height * stride))[0] anchors = all_anchors[inds_inside, :] total_anchors = all_anchors.shape[0] # choose boxes to assign to this stride # TODO gt assignment outside areas = (gt_boxes[:, 3] - gt_boxes[:, 1] + 1) * (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) ks = np.floor(4 + np.log2(np.sqrt(areas) / 224.0)) K = int(np.log2(stride)) inds = np.where((K == ks + 4))[0] if inds.size > 0: gt_boxes = gt_boxes[inds] else: labels = np.zeros((total_anchors), dtype=np.float32) bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32) bbox_inside_weights = np.zeros((total_anchors, 4), dtype=np.float32) return labels, bbox_targets, bbox_inside_weights labels = np.zeros((anchors.shape[0], ), dtype=np.float32) overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # (A) max_overlaps = overlaps[np.arange(len(inds_inside)), gt_assignment] gt_argmax_overlaps = overlaps.argmax(axis=0) # G gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] if False: # this is sentive to boxes of little overlaps, no need! gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # fg label: for each gt, assign anchor with highest overlap despite its overlaps labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.FLAGS.fg_threshold] = 1 # print (np.min(labels), np.max(labels)) # subsample positive labels if there are too many num_fg = int(cfg.FLAGS.fg_rpn_fraction * cfg.FLAGS.rpn_batch_size) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # TODO: mild hard negative mining # subsample negative labels if there are too many num_bg = cfg.FLAGS.rpn_batch_size - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[gt_assignment, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = 1 # mapping to whole outputs labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) labels = labels.reshape((1, height, width, -1)) bbox_targets = bbox_targets.reshape((1, height, width, -1)) bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, -1)) return labels, bbox_targets, bbox_inside_weights
def encode(gt_boxes, all_anchors, height, width, stride): """Matching and Encoding groundtruth into learning targets Sampling Parameters --------- gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class] all_anchors: an array of shape (h, w, A, 4), width: width of feature height: height of feature stride: downscale factor w.r.t the input size, e.g., [4, 8, 16, 32] Returns -------- labels: Nx1 array in [0, num_classes] bbox_targets: N x (4) regression targets bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned. """ # TODO: speedup this module if all_anchors is None: all_anchors = anchors_plane(height, width, stride=stride) # anchors, inds_inside, total_anchors border = cfg.FLAGS.allow_border all_anchors = all_anchors.reshape((-1, 4)) inds_inside = np.where((all_anchors[:, 0] >= -border) & (all_anchors[:, 1] >= -border) & (all_anchors[:, 2] < (width * stride) + border) & (all_anchors[:, 3] < (height * stride) + border))[0] anchors = all_anchors[inds_inside, :] total_anchors = all_anchors.shape[0] labels = np.zeros((anchors.shape[0], ), dtype=np.float32) if gt_boxes.size > 0: overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) # if _DEBUG: # print ('gt_boxes shape: ', gt_boxes.shape) # print ('anchors shape: ', anchors.shape) # print ('overlaps shape: ', overlaps.shape) gt_assignment = overlaps.argmax(axis=1) # (A) max_overlaps = overlaps[np.arange(len(inds_inside)), gt_assignment] gt_argmax_overlaps = overlaps.argmax(axis=0) # G gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] if True: # this is sentive to boxes of little overlaps, no need! # gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # fg label: for each gt, hard-assign anchor with highest overlap despite its overlaps labels[gt_argmax_overlaps] = 1 # exclude examples with little overlaps # added later excludes = np.where(gt_max_overlaps < cfg.FLAGS.bg_threshold)[0] labels[gt_argmax_overlaps[excludes]] = -1 if _DEBUG: min_ov = np.min(gt_max_overlaps) max_ov = np.max(gt_max_overlaps) mean_ov = np.mean(gt_max_overlaps) if min_ov < cfg.FLAGS.bg_threshold: LOG('ANCHOREncoder: overlaps: (min %.3f mean:%.3f max:%.3f), stride: %d, shape:(h:%d, w:%d)' % (min_ov, mean_ov, max_ov, stride, height, width)) worst = gt_boxes[np.argmin(gt_max_overlaps)] anc = anchors[ gt_argmax_overlaps[np.argmin(gt_max_overlaps)], :] LOG('ANCHOREncoder: worst case: overlap: %.3f, box:(%.1f, %.1f, %.1f, %.1f %d), anchor:(%.1f, %.1f, %.1f, %.1f)' % (min_ov, worst[0], worst[1], worst[2], worst[3], worst[4], anc[0], anc[1], anc[2], anc[3])) # fg label: above threshold IOU labels[max_overlaps >= cfg.FLAGS.fg_threshold] = 1 # print (np.min(labels), np.max(labels)) # subsample positive labels if there are too many num_fg = int(cfg.FLAGS.fg_rpn_fraction * cfg.FLAGS.rpn_batch_size) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 else: # if there is no gt labels[:] = 0 # TODO: mild hard negative mining # subsample negative labels if there are too many num_fg = np.sum(labels == 1) num_bg = max(min(cfg.FLAGS.rpn_batch_size - num_fg, num_fg * 3), 8) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets = _compute_targets(anchors, gt_boxes[gt_assignment, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = 1 # mapping to whole outputs labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) labels = labels.reshape((1, height, width, -1)) bbox_targets = bbox_targets.reshape((1, height, width, -1)) bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, -1)) return labels, bbox_targets, bbox_inside_weights
def data_layer(img_name, bboxes, classes, masks, mask, is_training, ANCHORS=[]): """ Returns the learning labels 1. resize image, boxes, masks, mask 2. data augmentation 3. build learning labels """ im = cv2.imread(img_name).astype(np.float32) if im.size == im.shape[0] * im.shape[1]: im = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) im = im.astype(np.float32) strides = cfg.strides if is_training: im, bboxes, classes, masks, mask, ori_im = \ preprocess_train(im, bboxes, classes, masks, mask, cfg.input_size, cfg.min_size, use_augment=cfg.use_augment, training_scale=cfg.training_scale) gt_boxes = np.hstack((bboxes, classes[:, np.newaxis])) # layer_ids = assign.assign_boxes(gt_boxes, min_k=int(np.log2(strides[0])), max_k=int(np.log2(strides[-1])), # base_size=cfg.base_size) else: im, ori_im = \ preprocess_test(im, cfg.input_size) masks, mask = [], [] ih, iw = im.shape[0:2] ANNOTATIONS = [] # if is_training: ANNOTATIONS = [bboxes, classes] if len(ANCHORS) == 0: for i, stride in enumerate(strides): height, width = int(ih / stride), int(iw / stride) scales = cfg.anchor_scales[i] if isinstance( cfg.anchor_scales[i], list) else cfg.anchor_scales all_anchors = anchors_plane(height, width, stride, scales=scales, ratios=cfg.anchor_ratios, base=cfg.anchor_base) ANCHORS.append(all_anchors) all_anchors = [] for i in range(len(ANCHORS)): all_anchors.append(ANCHORS[i].reshape((-1, 4))) all_anchors = np.vstack(all_anchors) # building learning labels TARGETS = [] if is_training: labels, label_weights, bbox_targets, bbox_inside_weights = \ anchor.encode(gt_boxes, all_anchors) TARGETS = [labels, label_weights, bbox_targets, bbox_inside_weights] # flat (N, ), (N, 4), (N, 4) # if _DEBUG: # np.set_printoptions(precision=3) # bb = bbox_targets[labels > 0, :] # mean = np.abs(bb).mean(0) # max = np.abs(bb).max() # s = bbox_targets[labels > 0, :].std() return im, TARGETS, masks, mask, ori_im, ANNOTATIONS
s = np.random.randint(20, 50, (50, 2)) s = boxes + s boxes = np.concatenate((boxes, s), axis=1) gt_boxes = np.hstack((boxes, classes)) # gt_boxes = boxes N = 100 rois = np.random.randint(10, 50, (N, 2)) s = np.random.randint(0, 20, (N, 2)) s = rois + s rois = np.concatenate((rois, s), axis=1) indexs = np.arange(N) all_anchors = anchors_plane(200, 300, stride=4, scales=[2, 4, 8, 16, 32], ratios=[0.5, 1, 2.0], base=16) labels, bbox_targets, bbox_inside_weights = encode( gt_boxes, all_anchors=all_anchors, height=200, width=300, stride=4, indexs=indexs) all_anchors = anchors_plane(100, 150, stride=8, scales=[2, 4, 8, 16, 32], ratios=[0.5, 1, 2.0],