def decode(mask_targets, rois, classes, ih, iw): """Decode outputs into final masks Params ------ mask_targets: of shape (N, h, w, K) rois: of shape (N, 4) [x1, y1, x2, y2] classes: of shape (N, 1) the class-id of each roi height: image height width: image width Returns ------ M: a painted image with all masks, of shape (height, width), in [0, K] """ Mask = np.zeros((ih, iw), dtype=np.float32) assert rois.shape[0] == mask_targets.shape[0], \ '%s rois vs %d masks' %(rois.shape[0], mask_targets.shape[0]) num = rois.shape[0] rois = clip_boxes(rois, (ih, iw)) for i in np.arange(num): k = classes[i] mask = mask_targets[i, :, :, k] h, w = rois[i, 3] - rois[i, 1] + 1, rois[i, 2] - rois[i, 0] + 1 x, y = rois[i, 0], rois[i, 1] mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST) mask *= k # paint Mask[y:y + h, x:x + w] = mask return Mask
def decode(boxes, scores, all_anchors, ih, iw, num_classes=None): """Decode outputs into boxes Parameters --------- boxes: an array of shape (1, h, w, Ax4) scores: an array of shape (1, h, w, Ax2), all_anchors: an array of shape (1, h, w, Ax4), [x1, y1, x2, y2] Returns -------- final_boxes: of shape (R x 4) classes: of shape (R) in {0,1,2,3... K-1} scores: of shape (R, K) in [0 ~ 1] """ num_classes = cfg.num_classes if num_classes is None else num_classes all_anchors = all_anchors.reshape((-1, 4)) boxes = boxes.reshape((-1, 4)) scores = scores.reshape((-1, num_classes)) assert scores.shape[0] == boxes.shape[0] == all_anchors.shape[0], \ 'Anchor layer shape error %d vs %d vs %d' % (scores.shape[0], boxes.shape[0], all_anchors.reshape[0]) if cfg.rpn_box_encoding == 'fastrcnn': boxes = bbox_transform.bbox_transform_inv(all_anchors, boxes) elif cfg.rpn_box_encoding == 'linear': boxes = bbox_transform.bbox_transform_inv_linear(all_anchors, boxes) classes = np.argmax(scores, axis=1) final_boxes = boxes final_boxes = bbox_transform.clip_boxes(final_boxes, (ih, iw)) classes = classes.astype(np.int32) return final_boxes, classes, scores
def decode(boxes, scores, all_anchors, ih, iw): """Decode outputs into boxes Parameters --------- boxes: an array of shape (1, h, w, Ax4) scores: an array of shape (1, h, w, Ax2), all_anchors: an array of shape (1, h, w, Ax4), [x1, y1, x2, y2] Returns -------- final_boxes: of shape (R x 4) classes: of shape (R) in {0,1,2,3... K-1} scores: of shape (R) in [0 ~ 1] """ h, w = boxes.shape[1], boxes.shape[2] if all_anchors == None: stride = 2**int(round(np.log2((iw + 0.0) / w))) all_anchors = anchors_plane(h, w, stride=stride) all_anchors = all_anchors.reshape((-1, 4)) boxes = boxes.reshape((-1, 4)) scores = scores.reshape((-1, 2)) assert scores.shape[0] == boxes.shape[0] == all_anchors.reshape[0], \ 'Anchor layer shape error %d vs %d vs %d' % (scores.shape[0],boxes.shape[0],all_anchors.reshape[0]) boxes = bbox_transform_inv(all_anchors, boxes) classes = np.argmax(scores, axis=1) scores = scores[:, 1] final_boxes = np.zeros((boxes.shape[0], 4)) for i in np.arange(final_boxes.shape[0]): c = classes[i] * 4 final_boxes[i, 0:4] = boxes[i, c:c + 4] final_boxes = clip_boxes(final_boxes, (ih, iw)) return final_boxes, classes, scores
def decode(boxes, scores, all_anchors, ih, iw): """Decode outputs into boxes Parameters --------- boxes: an array of shape (1, h, w, Ax4) scores: an array of shape (1, h, w, Ax2), all_anchors: an array of shape (1, h, w, Ax4), [x1, y1, x2, y2] #but in pyramid_network.py these are allready in form [-1,4] Returns -------- final_boxes: of shape (R x 4) classes: of shape (R) in {0,1,2,3... K-1} scores: of shape (R) in [0 ~ 1] """ # h, w = boxes.shape[1], boxes.shape[2] # if all_anchors is None: # stride = 2 ** int(round(np.log2((iw + 0.0) / w))) # all_anchors = anchors_plane(h, w, stride=stride) all_anchors = all_anchors.reshape((-1, 4)) boxes = boxes.reshape((-1, 4)) scores = scores.reshape((-1, 2)) assert scores.shape[0] == boxes.shape[0] == all_anchors.shape[0], \ 'Anchor layer shape error %d vs %d vs %d' % (scores.shape[0],boxes.shape[0],all_anchors.reshape[0]) boxes = bbox_transform_inv(all_anchors, boxes) classes = np.argmax(scores, axis=1) scores = scores[:, 1] #0 is background 1 is foreground. selects the probability of foregorund final_boxes = boxes final_boxes = clip_boxes(final_boxes, (ih, iw)) # does not reduce the number of rois classes = classes.astype(np.int32) return final_boxes, classes, scores
def decode(mask_targets, rois, classes, ih, iw): """Decode outputs into final masks Params ------ mask_targets: of shape (N, h, w, K) rois: of shape (N, 4) [x1, y1, x2, y2] classes: of shape (N, 1) the class-id of each roi height: image height width: image width Returns ------ M: a painted image with all masks, of shape (height, width), in [0, K] """ Mask = np.zeros((ih, iw), dtype=np.float32) assert rois.shape[0] == mask_targets.shape[0], \ '%s rois vs %d masks' %(rois.shape[0], mask_targets.shape[0]) num = rois.shape[0] rois = clip_boxes(rois, (ih, iw)) for i in np.arange(num): k = classes[i] mask = mask_targets[i, :, :, k] h, w = rois[i, 3] - rois[i, 1] + 1, rois[i, 2] - rois[i, 0] + 1 x, y = rois[i, 0], rois[i, 1] mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST) mask *= k # paint Mask[y:y+h, x:x+w] = mask return Mask
def decode(boxes, scores, all_anchors, image_height, image_width): """Decode outputs into boxes Parameters --------- boxes: an array of shape (1, h, w, Ax4) scores: an array of shape (1, h, w, Ax2), all_anchors: an array of shape (1, h, w, Ax4), [x1, y1, x2, y2] Returns -------- final_boxes: of shape (R x 4) classes: of shape (R) in {0,1,2,3... K-1} scores: of shape (R) in [0 ~ 1] """ all_anchors = all_anchors.reshape((-1, 4)) boxes = boxes.reshape((-1, 4)) scores = scores.reshape((-1, 2)) assert scores.shape[0] == boxes.shape[0] == all_anchors.shape[0], \ 'Anchor layer shape error %d vs %d vs %d' % (scores.shape[0], boxes.shape[0], all_anchors.reshape[0]) boxes = bbox_transform_inv(all_anchors, boxes) boxes = clip_boxes(boxes, (image_height, image_width)) classes = np.argmax(scores, axis=1).astype(np.int32) scores = scores[:, 1] return boxes, classes, scores
def _offset_boxes(boxes, im_shape, scale, offs, flip): if len(boxes) == 0: return boxes boxes = np.asarray(boxes, dtype=np.float) boxes *= scale boxes[:, 0::2] -= offs[0] boxes[:, 1::2] -= offs[1] boxes = clip_boxes(boxes, im_shape) if flip: boxes_x = np.copy(boxes[:, 0]) boxes[:, 0] = im_shape[1] - boxes[:, 2] boxes[:, 2] = im_shape[1] - boxes_x return boxes
def decode(boxes, scores, rois, ih, iw): """Decode prediction targets into boxes and only keep only one boxes of greatest possibility for each rois Parameters --------- boxes: an array of shape (R, Kx4), [x1, y1, x2, y2, x1, x2, y1, y2] scores: an array of shape (R, K), rois: an array of shape (R, 4), [x1, y1, x2, y2] Returns -------- final_boxes: of shape (R x 4) classes: of shape (R) in {0,1,2,3... K-1} scores: of shape (R) in [0 ~ 1] """ boxes = bbox_transform_inv(rois, deltas=boxes) classes = np.argmax(scores, axis=1) scores = np.max(scores, axis=1) final_boxes = np.zeros((boxes.shape[0], 4)) for i in np.arange(0, boxes.shape[0]): ind = classes[i] * 4 final_boxes[i, 0:4] = boxes[i, ind:ind + 4] final_boxes = clip_boxes(final_boxes, (ih, iw)) return final_boxes, classes, scores
def decode(boxes, scores, rois, ih, iw): """Decode prediction targets into boxes and only keep only one boxes of greatest possibility for each rois Parameters --------- boxes: an array of shape (R, Kx4), [x1, y1, x2, y2, x1, x2, y1, y2] scores: an array of shape (R, K), rois: an array of shape (R, 4), [x1, y1, x2, y2] Returns -------- final_boxes: of shape (R x 4) classes: of shape (R) in {0,1,2,3... K-1} scores: of shape (R) in [0 ~ 1] """ boxes = bbox_transform_inv(rois, deltas=boxes) classes = np.argmax(scores, axis=1) classes = classes.astype(np.int32) scores = np.max(scores, axis=1) final_boxes = np.zeros((boxes.shape[0], 4), dtype=np.float32) for i in np.arange(0, boxes.shape[0]): ind = classes[i]*4 final_boxes[i, 0:4] = boxes[i, ind:ind+4] final_boxes = clip_boxes(final_boxes, (ih, iw)) return final_boxes, classes, scores
def encode(gt_boxes, all_anchors, feature_height, feature_width, stride, image_height, image_width, ignore_cross_boundary=True): """Matching and Encoding groundtruth into learning targets Sampling Parameters --------- gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class] all_anchors: an array of shape (h, w, A, 4), feature_height: height of feature feature_width: width of feature image_height: height of image image_width: width of image stride: downscale factor w.r.t the input size, e.g., [4, 8, 16, 32] Returns -------- labels: Nx1 array in [0, num_classes] bbox_targets: N x (4) regression targets bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned. """ # TODO: speedup this module allow_border = cfg.FLAGS.allow_border all_anchors = all_anchors.reshape([-1, 4]) total_anchors = all_anchors.shape[0] labels = np.empty((total_anchors, ), dtype=np.int32) labels.fill(-1) jittered_gt_boxes = jitter_gt_boxes(gt_boxes[:, :4]) clipped_gt_boxes = clip_boxes(jittered_gt_boxes, (image_height, image_width)) if gt_boxes.size > 0: overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(all_anchors, dtype=np.float), np.ascontiguousarray(clipped_gt_boxes, dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # (A) max_overlaps = overlaps[np.arange(total_anchors), gt_assignment] gt_argmax_overlaps = overlaps.argmax(axis=0) # G gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] # bg label: less than threshold IOU labels[max_overlaps < cfg.FLAGS.rpn_bg_threshold] = 0 # fg label: above threshold IOU labels[max_overlaps >= cfg.FLAGS.rpn_fg_threshold] = 1 # ignore cross-boundary anchors if ignore_cross_boundary is True: cb_inds = _get_cross_boundary(all_anchors, image_height, image_width, allow_border) labels[cb_inds] = -1 # this is sentive to boxes of little overlaps, use with caution! gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # fg label: for each gt, hard-assign anchor with highest overlap despite its overlaps labels[gt_argmax_overlaps] = 1 # subsample positive labels if there are too many num_fg = int(cfg.FLAGS.fg_rpn_fraction * cfg.FLAGS.rpn_batch_size) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 else: # if there is no gt labels[:] = 0 # TODO: mild hard negative mining # subsample negative labels if there are too many num_fg = np.sum(labels == 1) num_bg = max(min(cfg.FLAGS.rpn_batch_size - num_fg, num_fg * 3), 8) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets = _compute_targets(all_anchors, gt_boxes[gt_assignment, :]) bbox_inside_weights = np.zeros((total_anchors, 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = 1.0 #0.1 labels = labels.reshape((1, feature_height, feature_width, -1)) bbox_targets = bbox_targets.reshape((1, feature_height, feature_width, -1)) bbox_inside_weights = bbox_inside_weights.reshape( (1, feature_height, feature_width, -1)) return labels, bbox_targets, bbox_inside_weights