def create_roidb_from_box_list(self, box_list, gt_roidb): assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] for i in xrange(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = cython_bbox.bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] overlaps = scipy.sparse.csr_matrix(overlaps) roidb.append({ 'boxes' : boxes, 'gt_classes' : np.zeros((num_boxes,), dtype=np.int32), 'gt_overlaps' : overlaps, 'flipped' : False, 'seg_areas' : np.zeros((num_boxes,), dtype=np.float32), }) return roidb
def sample_rpn_outputs_wrt_gt_boxes(boxes, scores, gt_boxes, is_training=False, only_positive=False): """sample boxes for refined output""" boxes, scores, batch_inds = sample_rpn_outputs(boxes, scores, is_training, only_positive) if gt_boxes.size > 0: overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(boxes[:, 0:4], dtype=np.float), np.ascontiguousarray(gt_boxes[:, 0:4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # B max_overlaps = overlaps[np.arange(boxes.shape[0]), gt_assignment] # B fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0] if _DEBUG and np.argmax(overlaps[fg_inds],axis=1).size < gt_boxes.size/5.0: print("gt_size") print(gt_boxes) gt_height = (gt_boxes[:,2]-gt_boxes[:,0]) gt_width = (gt_boxes[:,3]-gt_boxes[:,1]) gt_dim = np.vstack((gt_height, gt_width)) print(np.transpose(gt_dim)) #print(gt_height) #print(gt_width) print('SAMPLE: %d after overlaps by %s' % (len(fg_inds),cfg.FLAGS.fg_threshold)) print("detected object no.") print(np.argmax(overlaps[fg_inds],axis=1)) print("total object") print(gt_boxes.size/5.0) mask_fg_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0] if mask_fg_inds.size > cfg.FLAGS.masks_per_image: mask_fg_inds = np.random.choice(mask_fg_inds, size=cfg.FLAGS.masks_per_image, replace=False) if True: gt_argmax_overlaps = overlaps.argmax(axis=0) # G fg_inds = np.union1d(gt_argmax_overlaps, fg_inds) fg_rois = int(min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction)) if fg_inds.size > 0 and fg_rois < fg_inds.size: fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False) # TODO: sampling strategy bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0] bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 8)#64 if bg_inds.size > 0 and bg_rois < bg_inds.size: bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False) keep_inds = np.append(fg_inds, bg_inds) #print(gt_boxes[np.argmax(overlaps[fg_inds],axis=1),4]) else: bg_inds = np.arange(boxes.shape[0]) bg_rois = min(int(cfg.FLAGS.rois_per_image * (1-cfg.FLAGS.fg_roi_fraction)), 8)#64 if bg_rois < bg_inds.size: bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False) keep_inds = bg_inds mask_fg_inds = np.arange(0) return boxes[keep_inds, :], scores[keep_inds], batch_inds[keep_inds],\ boxes[mask_fg_inds, :], scores[mask_fg_inds], batch_inds[mask_fg_inds]
def encode(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width): """Encode masks groundtruth into learnable targets Sample some exmaples Params ------ gt_masks: image_height x image_width {0, 1} matrix, of shape (G, imh, imw) gt_boxes: ground-truth boxes of shape (G, 5), each raw is [x1, y1, x2, y2, class] rois: the bounding boxes of shape (N, 4), ## scores: scores of shape (N, 1) num_classes; K mask_height, mask_width: height and width of output masks Returns ------- # rois: boxes sampled for cropping masks, of shape (M, 4) labels: class-ids of shape (M, 1) mask_targets: learning targets of shape (M, pooled_height, pooled_width, K) in {0, 1} values mask_inside_weights: of shape (M, pooled_height, pooled_width, K) in {0, 1} indicating which mask is sampled """ total_masks = rois.shape[0] # B x G overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(rois[:, 0:4], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # shape is N max_overlaps = overlaps[np.arange(len(gt_assignment)), gt_assignment] # N labels = gt_boxes[gt_assignment, 4] # N # sample positive rois which intersection is more than 0.5 keep_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0] num_masks = int(min(keep_inds.size, cfg.FLAGS.masks_per_image)) if keep_inds.size > 0: keep_inds = np.random.choice(keep_inds, size=num_masks, replace=False) LOG('Masks: %d of %d rois are considered positive mask. Number of masks %d'\ %(num_masks, rois.shape[0], gt_masks.shape[0])) # rois = rois[inds] # labels = labels[inds].astype(np.int32) # gt_assignment = gt_assignment[inds] mask_targets = np.zeros( (total_masks, mask_height, mask_width, num_classes), dtype=np.int32) mask_inside_weights = np.zeros( (total_masks, mask_height, mask_width, num_classes), dtype=np.float32) # TODO: speed bottleneck? for i in keep_inds: roi = rois[i, :4] cropped = gt_masks[gt_assignment[i], int(roi[1]):int(roi[3]) + 1, int(roi[0]):int(roi[2]) + 1] cropped = cv2.resize(cropped, (mask_width, mask_height), interpolation=cv2.INTER_NEAREST) mask_targets[i, :, :, int(labels[i])] = cropped mask_inside_weights[i, :, :, int(labels[i])] = 1 return labels, mask_targets, mask_inside_weights
def encode(gt_boxes, all_anchors): """ :param gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class] :param all_anchors: an array of shape (h, w, A, 4) :return: labels: (N x 1) array in [-1, num_classes], negative labels are ignored bbox_targets: (N x 4) regression targets bbox_inside_weights: (N x 4), in {0, 1} indicating to which class is assigned """ all_anchors = all_anchors.reshape([-1, 4]) anchors = all_anchors total_anchors = all_anchors.shape[0] bbox_flags_ = np.zeros([total_anchors], dtype=np.int32) if gt_boxes.size > 0: overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) # (A) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(total_anchors), gt_assignment] # (G) gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] # Add Mask. gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # 0 - background, 1 - foreground, -1 - ignore labels = gt_boxes[gt_assignment, 4] labels[max_overlaps < cfg.rpn_bg_threshold] = 0 # ignore rpn_bg_threshold <= max_overlaps < rpn_fg_threshold labels[np.logical_and(max_overlaps < cfg.rpn_fg_threshold, max_overlaps >= cfg.rpn_bg_threshold)] = -1 bbox_flags_[max_overlaps >= 0.5] = 1 labels[gt_argmax_overlaps] = gt_boxes[ gt_assignment[gt_argmax_overlaps], 4] if cfg.rpn_clobber_positives: labels[max_overlaps < cfg.rpn_bg_threshold] = 0 bbox_flags_[labels >= 1] = 1 if _DEBUG: pass ignored_inds = np.where(gt_boxes[:, -1] < 0)[0] if ignored_inds.size > 0: ignored_areas = gt_boxes[ignored_inds, :] intersecs = cython_bbox.bbox_intersections(np.ascontiguousarray(), np.ascontiguousarray())
def sample_rpn_outputs_wrt_gt_boxes(boxes, scores, gt_boxes, is_training=False, only_positive=False): """sample boxes for refined output""" boxes, scores, batch_inds = sample_rpn_outputs(boxes, scores, is_training, only_positive) if gt_boxes.size > 0: overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(boxes[:, 0:4], dtype=np.float), np.ascontiguousarray(gt_boxes[:, 0:4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # B max_overlaps = overlaps[np.arange(boxes.shape[0]), gt_assignment] # B fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0] mask_fg_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0] if mask_fg_inds.size > cfg.FLAGS.masks_per_image: mask_fg_inds = np.random.choice(mask_fg_inds, size=cfg.FLAGS.masks_per_image, replace=False) if True: gt_argmax_overlaps = overlaps.argmax(axis=0) # G fg_inds = np.union1d(gt_argmax_overlaps, fg_inds) fg_rois = int( min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction)) if fg_inds.size > 0 and fg_rois < fg_inds.size: fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False) # TODO: sampling strategy bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0] bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 64) if bg_inds.size > 0 and bg_rois < bg_inds.size: bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False) keep_inds = np.append(fg_inds, bg_inds) else: bg_inds = np.arange(boxes.shape[0]) bg_rois = min( int(cfg.FLAGS.rois_per_image * (1 - cfg.FLAGS.fg_roi_fraction)), 64) if bg_rois < bg_inds.size: bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False) keep_inds = bg_inds mask_fg_inds = np.arange(0) return boxes[keep_inds, :], scores[keep_inds], batch_inds[keep_inds],\ boxes[mask_fg_inds, :], scores[mask_fg_inds], batch_inds[mask_fg_inds]
def encode(gt_boxes, rois, num_classes): """Matching and Encoding groundtruth boxes (gt_boxes) into learning targets to boxes Sampling Parameters --------- gt_boxes an array of shape (G x 5), [x1, y1, x2, y2, class] gt_classes an array of shape (G x 1), each value is in [0, num_classes] rois an array of shape (R x 4), [x1, y1, x2, y2] Returns -------- labels: Nx1 array in [0, num_classes) # rois: Sampled rois of shape (N, 4) bbox_targets: N x (Kx4) regression targets bbox_inside_weights: N x (Kx4), in {0, 1} indicating which class is assigned. """ all_rois = rois num_rois = rois.shape[0] # R x G matrix overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(all_rois[:, 0:4], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # R # max_overlaps = overlaps.max(axis=1) # R max_overlaps = overlaps[np.arange(rois.shape[0]), gt_assignment] labels = gt_boxes[gt_assignment, 4] # sample rois as to 1:3 fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0] fg_rois = int( min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction)) if fg_inds.size > 0: fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False) # print(fg_rois) bg_rois = cfg.FLAGS.rois_per_image - fg_rois bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0] labels[bg_inds] = 0 # print(bg_rois) if bg_inds.size > 0 and bg_rois < bg_inds.size: bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False) keep_inds = np.append(fg_inds, bg_inds) bbox_targets, bbox_inside_weights = _compute_targets( rois[keep_inds, 0:4], gt_boxes[gt_assignment[keep_inds], :4], labels, num_classes) bbox_targets = _unmap(bbox_targets, num_rois, keep_inds, 0) bbox_inside_weights = _unmap(bbox_inside_weights, num_rois, keep_inds, 0) return labels, bbox_targets, bbox_inside_weights
def sample_rpn_outputs_wrt_gt_boxes(boxes, scores, gt_boxes, is_training=False, only_positive=False): """sample boxes for refined output""" boxes, scores, batch_inds = sample_rpn_outputs(boxes, scores, is_training, only_positive) if gt_boxes.size > 0: overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(boxes[:, 0:4], dtype=np.float), np.ascontiguousarray(gt_boxes[:, 0:4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # B max_overlaps = overlaps[np.arange(boxes.shape[0]), gt_assignment] # B fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0] mask_fg_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0] if mask_fg_inds.size > cfg.FLAGS.masks_per_image: mask_fg_inds = np.random.choice(mask_fg_inds, size=cfg.FLAGS.masks_per_image, replace=False) if True: gt_argmax_overlaps = overlaps.argmax(axis=0) # G fg_inds = np.union1d(gt_argmax_overlaps, fg_inds) fg_rois = int(min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction)) if fg_inds.size > 0 and fg_rois < fg_inds.size: fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False) # TODO: sampling strategy bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0] bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 64) if bg_inds.size > 0 and bg_rois < bg_inds.size: bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False) keep_inds = np.append(fg_inds, bg_inds) else: bg_inds = np.arange(boxes.shape[0]) bg_rois = min(int(cfg.FLAGS.rois_per_image * (1-cfg.FLAGS.fg_roi_fraction)), 64) if bg_rois < bg_inds.size: bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False) keep_inds = bg_inds mask_fg_inds = np.arange(0) return boxes[keep_inds, :], scores[keep_inds], batch_inds[keep_inds],\ boxes[mask_fg_inds, :], scores[mask_fg_inds], batch_inds[mask_fg_inds]
def sample_rois(boxes, image_inds, gt_boxes_list, fg_overlap_threshold=0.5, rois_per_image=512, fg_fraction=0.25, ignore_threshold=0.2): """filter out ignored areas and keep the fg/bg ratio at 1:3""" boxes_np = boxes.data.cpu().numpy() if boxes.is_cuda else boxes.data.numpy( ) image_inds_np = image_inds.data.cpu().numpy( ) if image_inds.is_cuda else image_inds.data.numpy() num_boxes = boxes_np.shape[0] assert num_boxes == image_inds_np.size sampled_boxes = [] sampled_probs = [] sampled_labels = [] sampled_image_inds = [] batch_size = len(gt_boxes_list) for i, gt_boxes in enumerate(gt_boxes_list): boxes_im = boxes_np[image_inds_np == i] image_inds_im = image_inds_np[image_inds_np == i] keep_inds = filter_boxes(boxes_im) boxes_im = boxes_im[keep_inds] image_inds_im = image_inds_im[keep_inds] num_boxes_im = boxes_im.shape[0] labels = np.zeros((boxes_im.shape[0], ), dtype=np.int64) # TODO: what if is no gt_boxes if gt_boxes.size > 0: # B x G overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(boxes_im, dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # (B) max_overlaps = overlaps[np.arange(num_boxes_im), gt_assignment] labels[:] = gt_boxes[gt_assignment, 4] labels[max_overlaps < fg_overlap_threshold] = 0 # ignoring areas ignored_mask = gt_boxes[:, 4] <= 0 if np.any(ignored_mask): ignored_areas = gt_boxes[ignored_mask] ignored = cython_bbox.bbox_exclude_ignored_areas( np.ascontiguousarray(boxes_im, dtype=np.float), np.ascontiguousarray(ignored_areas[:, :4], dtype=np.float), ignore_threshold) labels[ignored == 1] = -1 # add ground-thruth boxes if True: valid_inds = np.where(gt_boxes[:, 4] > 0)[0] gb = gt_boxes[valid_inds][:, :4].astype(np.float32) gb = jitter_boxes(gb) cls = gt_boxes[valid_inds][:, 4].astype(np.int64) boxes_im = np.concatenate((boxes_im, gb), axis=0) labels = np.concatenate((labels, cls), axis=0) assert labels.shape[0] == boxes_im.shape[0] gn = gb.shape[0] new_inds = np.zeros((gn, ), dtype=image_inds_im.dtype) + i image_inds_im = np.concatenate((image_inds_im, new_inds), axis=0) else: labels = np.zeros((boxes_im.shape[0], ), dtype=np.float32) sampled_boxes.append(boxes_im[labels >= 0]) sampled_labels.append(labels[labels >= 0]) sampled_image_inds.append(image_inds_im[labels >= 0]) sampled_boxes = np.concatenate(sampled_boxes, axis=0) sampled_labels = np.concatenate(sampled_labels, axis=0).astype(np.int64) sampled_image_inds = np.concatenate(sampled_image_inds, axis=0).astype(np.int64) # sampling bg_inds = np.where(sampled_labels == 0)[0] fg_inds = np.where(sampled_labels > 0)[0] # num_fg = min(fg_inds.size, 64) # if fg_inds.size > 0: # fg_inds = np.random.choice(fg_inds, num_fg) if False: # sample all foregrounds num_fg = fg_inds.size num_bg = max(min(3 * num_fg, bg_inds.size), 16) if bg_inds.size > 0: bg_inds = np.random.choice(bg_inds, num_bg) keep_inds = np.append(fg_inds, bg_inds) else: # faster rcnn sampling num_fg = min(fg_inds.size, int(fg_fraction * rois_per_image * batch_size)) if num_fg > 0: fg_inds = np.random.choice(fg_inds, num_fg, replace=False) num_bg = rois_per_image * batch_size - num_fg num_bg = min(num_bg, bg_inds.size) if bg_inds.size > 0: bg_inds = np.random.choice(bg_inds, num_bg, replace=False) keep_inds = np.append(fg_inds, bg_inds) sampled_labels = sampled_labels[keep_inds] sampled_boxes = sampled_boxes[keep_inds] sampled_image_inds = sampled_image_inds[keep_inds] # Guard against the case no sampled rois if sampled_labels.size == 0: sampled_boxes = boxes_np[:1, :] sampled_labels = np.array([-1], dtype=np.int64) sampled_image_inds = image_inds_np[:1].astype(np.int64) if boxes.is_cuda: return torch.from_numpy(sampled_boxes).cuda(), \ torch.from_numpy(sampled_labels).cuda(), \ torch.from_numpy(sampled_image_inds).cuda() return torch.from_numpy(sampled_boxes), \ torch.from_numpy(sampled_labels), \ torch.from_numpy(sampled_image_inds)
def encode(gt_boxes, all_anchors, height, width, stride): """Matching and Encoding groundtruth into learning targets Sampling Parameters --------- gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class] all_anchors: an array of shape (h, w, A, 4), width: width of feature height: height of feature stride: downscale factor w.r.t the input size, e.g., [4, 8, 16, 32] Returns -------- labels: Nx1 array in [0, num_classes] bbox_targets: N x (4) regression targets bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned. """ # TODO: speedup this module # if all_anchors is None: # all_anchors = anchors_plane(height, width, stride=stride) # # anchors, inds_inside, total_anchors # border = cfg.FLAGS.allow_border # all_anchors = all_anchors.reshape((-1, 4)) # inds_inside = np.where( # (all_anchors[:, 0] >= -border) & # (all_anchors[:, 1] >= -border) & # (all_anchors[:, 2] < (width * stride) + border) & # (all_anchors[:, 3] < (height * stride) + border))[0] # anchors = all_anchors[inds_inside, :] all_anchors = all_anchors.reshape([-1, 4]) anchors = all_anchors total_anchors = all_anchors.shape[0] # labels = np.zeros((anchors.shape[0], ), dtype=np.float32) labels = np.empty((anchors.shape[0], ), dtype=np.float32) labels.fill(-1) if gt_boxes.size > 0: overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) # if _DEBUG: # print ('gt_boxes shape: ', gt_boxes.shape) # print ('anchors shape: ', anchors.shape) # print ('overlaps shape: ', overlaps.shape) gt_assignment = overlaps.argmax(axis=1) # (A) max_overlaps = overlaps[np.arange(total_anchors), gt_assignment] gt_argmax_overlaps = overlaps.argmax(axis=0) # G gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] labels[max_overlaps < cfg.FLAGS.rpn_bg_threshold] = 0 if True: # this is sentive to boxes of little overlaps, no need! # gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # fg label: for each gt, hard-assign anchor with highest overlap despite its overlaps labels[gt_argmax_overlaps] = 1 # exclude examples with little overlaps # added later # excludes = np.where(gt_max_overlaps < cfg.FLAGS.bg_threshold)[0] # labels[gt_argmax_overlaps[excludes]] = -1 if _DEBUG: min_ov = np.min(gt_max_overlaps) max_ov = np.max(gt_max_overlaps) mean_ov = np.mean(gt_max_overlaps) if min_ov < cfg.FLAGS.bg_threshold: LOG('ANCHOREncoder: overlaps: (min %.3f mean:%.3f max:%.3f), stride: %d, shape:(h:%d, w:%d)' % (min_ov, mean_ov, max_ov, stride, height, width)) worst = gt_boxes[np.argmin(gt_max_overlaps)] anc = anchors[ gt_argmax_overlaps[np.argmin(gt_max_overlaps)], :] LOG('ANCHOREncoder: worst case: overlap: %.3f, box:(%.1f, %.1f, %.1f, %.1f %d), anchor:(%.1f, %.1f, %.1f, %.1f)' % (min_ov, worst[0], worst[1], worst[2], worst[3], worst[4], anc[0], anc[1], anc[2], anc[3])) # fg label: above threshold IOU labels[max_overlaps >= cfg.FLAGS.rpn_fg_threshold] = 1 # print (np.min(labels), np.max(labels)) # subsample positive labels if there are too many num_fg = int(cfg.FLAGS.fg_rpn_fraction * cfg.FLAGS.rpn_batch_size) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 else: # if there is no gt labels[:] = 0 # TODO: mild hard negative mining # subsample negative labels if there are too many num_fg = np.sum(labels == 1) num_bg = max(min(cfg.FLAGS.rpn_batch_size - num_fg, num_fg * 3), 8) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets = _compute_targets(anchors, gt_boxes[gt_assignment, :]) bbox_inside_weights = np.zeros((total_anchors, 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = 0.1 # # mapping to whole outputs # labels = _unmap(labels, total_anchors, inds_inside, fill=-1) # bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) # bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) labels = labels.reshape((1, height, width, -1)) bbox_targets = bbox_targets.reshape((1, height, width, -1)) bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, -1)) return labels, bbox_targets, bbox_inside_weights
def encode(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width): """Encode masks groundtruth into learnable targets Sample some exmaples Params ------ gt_masks: image_height x image_width {0, 1} matrix, of shape (G, imh, imw) #actually modified by me, gt_mask is of shape (G,imh,imw,7) gt_boxes: ground-truth boxes of shape (G, 5), each raw is [x1, y1, x2, y2, class] rois: the bounding boxes of shape (N, 4), ## scores: scores of shape (N, 1) num_classes; K mask_height, mask_width: height and width of output masks Returns ------- # rois: boxes sampled for cropping masks, of shape (M, 4) labels: class-ids of shape (M, 1) mask_targets: learning targets of shape (M, pooled_height, pooled_width, K) in {0, 1} values mask_inside_weights: of shape (M, pooled_height, pooled_width, K) in {0, 1}Í indicating which mask is sampled """ total_masks = rois.shape[0] if gt_boxes.size > 0: # B x G overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(rois[:, 0:4], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # shape is N max_overlaps = overlaps[np.arange(len(gt_assignment)), gt_assignment] # N # note: this will assign every rois with a positive label # labels = gt_boxes[gt_assignment, 4] # N labels = np.zeros((total_masks, ), np.float32) labels[:] = -1 # sample positive rois which intersection is more than 0.5 keep_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0] num_masks = int(min(keep_inds.size, cfg.FLAGS.masks_per_image)) if keep_inds.size > 0 and num_masks < keep_inds.size: keep_inds = np.random.choice(keep_inds, size=num_masks, replace=False) LOG('Masks: %d of %d rois are considered positive mask. Number of masks %d'\ %(num_masks, rois.shape[0], gt_masks.shape[0])) labels[keep_inds] = gt_boxes[gt_assignment[keep_inds], -1] # rois = rois[inds] # labels = labels[inds].astype(np.int32) # gt_assignment = gt_assignment[inds] # ignore rois with overlaps between fg_threshold and bg_threshold # mask are only defined on positive rois ignore_inds = np.where((max_overlaps < cfg.FLAGS.fg_threshold))[0] labels[ignore_inds] = -1 mask_targets = np.zeros( (total_masks, mask_height, mask_width, num_classes), dtype=np.int32) mask_inside_weights = np.zeros( (total_masks, mask_height, mask_width, num_classes), dtype=np.float32) rois[rois < 0] = 0 # TODO: speed bottleneck? for i in keep_inds: roi = rois[i, :4] for x in range(7): cropped = gt_masks[gt_assignment[i], int(roi[1]):int(roi[3]) + 1, int(roi[0]):int(roi[2]) + 1, x] cropped = cv2.resize(cropped, (mask_width, mask_height), interpolation=cv2.INTER_NEAREST) mask_targets[i, :, :, x] = cropped mask_inside_weights[i, :, :, x] = 1 else: # there is no gt labels = np.zeros((total_masks, ), np.float32) labels[:] = -1 mask_targets = np.zeros( (total_masks, mask_height, mask_width, num_classes), dtype=np.int32) mask_inside_weights = np.zeros( (total_masks, mask_height, mask_height, num_classes), dtype=np.float32) #np.save("/home/czurini/Alex/rois.npy",rois) # np.save("/home/czurini/Alex/mask_targets.npy",mask_targets) return labels, mask_targets, mask_inside_weights
def encode(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width): """Encode masks groundtruth into learnable targets Sample some exmaples Params ------ gt_masks: image_height x image_width {0, 1} matrix, of shape (G, imh, imw) gt_boxes: ground-truth boxes of shape (G, 5), each raw is [x1, y1, x2, y2, class] rois: the bounding boxes of shape (N, 4), ## scores: scores of shape (N, 1) num_classes; K mask_height, mask_width: height and width of output masks Returns ------- # rois: boxes sampled for cropping masks, of shape (M, 4) labels: class-ids of shape (M, 1) mask_targets: learning targets of shape (M, pooled_height, pooled_width, K) in {0, 1} values mask_inside_weights: of shape (M, pooled_height, pooled_width, K) in {0, 1}Í indicating which mask is sampled """ total_masks = rois.shape[0] if gt_boxes.size > 0: # B x G overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(rois[:, 0:4], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # shape is N max_overlaps = overlaps[np.arange(len(gt_assignment)), gt_assignment] # N # note: this will assign every rois with a positive label # labels = gt_boxes[gt_assignment, 4] # N labels = np.zeros((total_masks, ), np.float32) labels[:] = -1 # sample positive rois which intersection is more than 0.5 keep_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0] num_masks = int(min(keep_inds.size, cfg.FLAGS.masks_per_image)) if keep_inds.size > 0 and num_masks < keep_inds.size: keep_inds = np.random.choice(keep_inds, size=num_masks, replace=False) LOG('Masks: %d of %d rois are considered positive mask. Number of masks %d'\ %(num_masks, rois.shape[0], gt_masks.shape[0])) labels[keep_inds] = gt_boxes[gt_assignment[keep_inds], -1] # rois = rois[inds] # labels = labels[inds].astype(np.int32) # gt_assignment = gt_assignment[inds] # ignore rois with overlaps between fg_threshold and bg_threshold # mask are only defined on positive rois ignore_inds = np.where((max_overlaps < cfg.FLAGS.fg_threshold))[0] labels[ignore_inds] = -1 mask_targets = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.int32) mask_inside_weights = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.float32) rois [rois < 0] = 0 # TODO: speed bottleneck? for i in keep_inds: roi = rois[i, :4] cropped = gt_masks[gt_assignment[i], int(roi[1]):int(roi[3])+1, int(roi[0]):int(roi[2])+1] cropped = cv2.resize(cropped, (mask_width, mask_height), interpolation=cv2.INTER_NEAREST) mask_targets[i, :, :, int(labels[i])] = cropped mask_inside_weights[i, :, :, int(labels[i])] = 1 else: # there is no gt labels = np.zeros((total_masks, ), np.float32) labels[:] = -1 mask_targets = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.int32) mask_inside_weights = np.zeros((total_masks, mask_height, mask_height, num_classes), dtype=np.float32) return labels, mask_targets, mask_inside_weights
def encode(gt_boxes, all_anchors): """Single Shot Sampling Parameters --------- gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class] all_anchors: an array of shape (h, w, A, 4), Returns -------- labels: Nx1 array in [-1, num_classes], negative labels are ignored bbox_targets: N x (4) regression targets bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned. """ all_anchors = all_anchors.reshape([-1, 4]) anchors = all_anchors total_anchors = all_anchors.shape[0] bbox_flags_ = np.zeros([total_anchors], dtype=np.int32) if gt_boxes.size > 0: overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # (A) max_overlaps = overlaps[np.arange(total_anchors), gt_assignment] gt_argmax_overlaps = overlaps.argmax(axis=0) # (G) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] labels = gt_boxes[gt_assignment, 4] labels[max_overlaps < cfg.rpn_bg_threshold] = 0 labels[np.logical_and(max_overlaps < cfg.rpn_fg_threshold, max_overlaps >= cfg.rpn_bg_threshold)] = -1 bbox_flags_[max_overlaps >= 0.5] = 1 # fg label: for each gt, hard-assign anchor with highest overlap despite its overlaps labels[gt_argmax_overlaps] = gt_boxes[ gt_assignment[gt_argmax_overlaps], 4] # bbox_flags_[gt_argmax_overlaps] = 1 # if clobber positive: there may exist some positive objs (jaccard overlap < bg_th) that are not assigned to any anchors if cfg.rpn_clobber_positives: labels[max_overlaps < cfg.rpn_bg_threshold] = 0 bbox_flags_[labels >= 1] = 1 if _DEBUG: min_ov = np.min(gt_max_overlaps) max_ov = np.max(gt_max_overlaps) mean_ov = np.mean(gt_max_overlaps) if min_ov < cfg.rpn_bg_threshold: LOG('ANCHORSS: overlaps: (min %.3f mean:%.3f max:%.3f)' % (min_ov, mean_ov, max_ov)) worst = gt_boxes[np.argmin(gt_max_overlaps)] anc = anchors[ gt_argmax_overlaps[np.argmin(gt_max_overlaps)], :] LOG('ANCHORSS: worst overlap:%.3f, box:(%.1f, %.1f, %.1f, %.1f %d), anchor:(%.1f, %.1f, %.1f, %.1f)' % (min_ov, worst[0], worst[1], worst[2], worst[3], worst[4], anc[0], anc[1], anc[2], anc[3])) ## handle ignored regions (the gt_class of crowd boxes is set to -1) ignored_inds = np.where(gt_boxes[:, -1] < 0)[0] if ignored_inds.size > 0: ignored_areas = gt_boxes[ignored_inds, :] # intersec shape is D x A intersecs = cython_bbox.bbox_intersections( np.ascontiguousarray(ignored_areas, dtype=np.float), np.ascontiguousarray(anchors, dtype=np.float)) intersecs_ = intersecs.sum(axis=0) # A x 1 labels[intersecs_ > cfg.ignored_area_intersection_fraction] = -1 bbox_flags_[ intersecs_ > cfg.ignored_area_intersection_fraction] = 0 else: # if there is no gt labels = np.zeros([total_anchors], dtype=np.float32) label_weights = np.zeros((total_anchors, ), dtype=np.float32) if cfg.rpn_sample_strategy == 'traditional': """subsample positive labels if there are too many, inherited from fastrcnn""" num_fg = int(cfg.rpn_fg_fraction * cfg.rpn_batch_size) fg_inds = np.where(labels >= 1)[0] if len(fg_inds) > num_fg: disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 else: num_fg = len(fg_inds) # subsample negative labels if there are too many num_bg = max(min(cfg.rpn_batch_size - num_fg, num_fg * 5), 128) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 elif cfg.rpn_sample_strategy == 'simple': """using label_weights to balance example losses""" fg_inds = np.where(labels >= 1)[0] num_fg = len(fg_inds) label_weights[fg_inds] = 1.0 bg_inds = np.where(labels == 0)[0] num_bg = len(bg_inds) label_weights[bg_inds] = 3 * max(num_fg, 1.0) / max( max(num_bg, num_fg), 1.0) elif cfg.rpn_sample_strategy == 'advanced': """no implemented yet""" # deal with ignored lables? else: raise ValueError( 'RPN sample strategy %s has not been implemented yet' % cfg.rpn_sample_strategy) # if True: # person only # nonperson_inds = np.where(np.logical_and(labels != 1, labels != -1))[0] # labels[nonperson_inds] = 0 # label_weights[nonperson_inds] = 0 # kept_inds = np.random.choice(nonperson_inds, size=(1000), replace=False) # label_weights[kept_inds] = 0.02 bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets = _compute_targets(anchors, gt_boxes[gt_assignment, :]) bbox_inside_weights = np.zeros((total_anchors, 4), dtype=np.float32) # bbox_inside_weights[labels >= 1, :] = np.asarray(cfg.bbweights, dtype=np.float32) bbox_inside_weights[bbox_flags_ == 1, :] = np.asarray(cfg.bbweights, dtype=np.float32) labels = labels.reshape((-1, )) bbox_targets = bbox_targets.reshape((-1, 4)) bbox_inside_weights = bbox_inside_weights.reshape((-1, 4)) return labels.astype(np.float32), label_weights, bbox_targets.astype( np.float32), bbox_inside_weights.astype(np.float32)
gt_boxes = [ [121, 120, 140, 150, 0], [100, 100, 120, 130, 0], [121, 120, 140, 150, -1], [100, 100, 120, 130, -1], [1, 1, 8, 8, 3], [13, 10, 23, 20, 4], ] gt_batch_inds = [0, 0, 1, 1, 1, 1] rois = np.asarray(rois, dtype=np.float32) gt_boxes = np.asarray(gt_boxes, dtype=np.float32) gt_batch_inds = np.asarray(gt_batch_inds, dtype=np.float32) roi_batch_inds = np.asarray(roi_batch_inds, dtype=np.float32) overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(rois[:, :4], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # (R) max_overlaps = overlaps[np.arange(rois.shape[0], ), gt_assignment] rois = Variable(torch.from_numpy(rois)).cuda() gt_boxes = Variable(torch.from_numpy(gt_boxes)).cuda() gt_batch_inds = Variable(torch.from_numpy(gt_batch_inds)).cuda().long() roi_batch_inds = Variable(torch.from_numpy(roi_batch_inds)).cuda().long() roi_target = RoITarget(0.55, box_encoding='fastrcnn') labels, deltas, bbwght = roi_target(rois, roi_batch_inds, gt_boxes, gt_batch_inds) labels.cpu() print(labels.size(), deltas.size(), bbwght.size())
def sample_rpn_outputs_wrt_gt_boxes(boxes, scores, gt_boxes, is_training=False, only_positive=False): """sample boxes using RPN scores only_positive: Flag to exclude bbox with RPN score less than 0.5 with_nms: Flag to use NMS """ boxes, scores, batch_inds = sample_rpn_outputs(boxes, scores, is_training=is_training, only_positive=only_positive, with_nms=True) if gt_boxes.size > 0 and boxes.size > 0: overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(boxes[:, 0:4], dtype=np.float), np.ascontiguousarray(gt_boxes[:, 0:4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # B max_overlaps = overlaps[np.arange(boxes.shape[0]), gt_assignment] # B ## rcnn foreground bbox with high overlap fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0] ## rcnn foreground bbox with highest overlap area on gt gt_argmax_overlaps = overlaps.argmax(axis=0) # G fg_inds = np.union1d(gt_argmax_overlaps, fg_inds) ## mask foreground bbox with high overlap mask_fg_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0] ## limit mask foreground bbox if mask_fg_inds.size > cfg.FLAGS.masks_per_image: mask_fg_inds = np.random.choice(mask_fg_inds, size=cfg.FLAGS.masks_per_image, replace=False) ## limit rcnn foreground bbox fg_rois = int( min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction)) if fg_inds.size > 0 and fg_rois < fg_inds.size: fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False) ## limit rcnn background bbox ## TODO: sampling strategy bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0] bg_rois = int( max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 8) ) #cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction))#128 if bg_inds.size > 0 and bg_rois < bg_inds.size: bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False) keep_inds = np.append(fg_inds, bg_inds) ## quick fix for mask foreground is null if mask_fg_inds.size is 0: mask_fg_inds = keep_inds else: bg_inds = np.arange(boxes.shape[0]) bg_rois = int( min(cfg.FLAGS.rois_per_image * (1 - cfg.FLAGS.fg_roi_fraction), 8) ) # cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction))#128 if bg_rois < bg_inds.size: bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False) keep_inds = bg_inds mask_fg_inds = bg_inds return boxes[keep_inds, :], scores[keep_inds], batch_inds[keep_inds], \ boxes[mask_fg_inds, :], scores[mask_fg_inds], batch_inds[mask_fg_inds]
def encode(gt_boxes, all_anchors, feature_height, feature_width, stride, image_height, image_width, ignore_cross_boundary=True): """Matching and Encoding groundtruth into learning targets Sampling Parameters --------- gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class] all_anchors: an array of shape (h, w, A, 4), feature_height: height of feature feature_width: width of feature image_height: height of image image_width: width of image stride: downscale factor w.r.t the input size, e.g., [4, 8, 16, 32] Returns -------- labels: Nx1 array in [0, num_classes] bbox_targets: N x (4) regression targets bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned. """ # TODO: speedup this module allow_border = cfg.FLAGS.allow_border all_anchors = all_anchors.reshape([-1, 4]) total_anchors = all_anchors.shape[0] labels = np.empty((total_anchors, ), dtype=np.int32) labels.fill(-1) jittered_gt_boxes = jitter_gt_boxes(gt_boxes[:, :4]) clipped_gt_boxes = clip_boxes(jittered_gt_boxes, (image_height, image_width)) if gt_boxes.size > 0: overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(all_anchors, dtype=np.float), np.ascontiguousarray(clipped_gt_boxes, dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # (A) max_overlaps = overlaps[np.arange(total_anchors), gt_assignment] gt_argmax_overlaps = overlaps.argmax(axis=0) # G gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] # bg label: less than threshold IOU labels[max_overlaps < cfg.FLAGS.rpn_bg_threshold] = 0 # fg label: above threshold IOU labels[max_overlaps >= cfg.FLAGS.rpn_fg_threshold] = 1 # ignore cross-boundary anchors if ignore_cross_boundary is True: cb_inds = _get_cross_boundary(all_anchors, image_height, image_width, allow_border) labels[cb_inds] = -1 # this is sentive to boxes of little overlaps, use with caution! gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # fg label: for each gt, hard-assign anchor with highest overlap despite its overlaps labels[gt_argmax_overlaps] = 1 # subsample positive labels if there are too many num_fg = int(cfg.FLAGS.fg_rpn_fraction * cfg.FLAGS.rpn_batch_size) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 else: # if there is no gt labels[:] = 0 # TODO: mild hard negative mining # subsample negative labels if there are too many num_fg = np.sum(labels == 1) num_bg = max(min(cfg.FLAGS.rpn_batch_size - num_fg, num_fg * 3), 8) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets = _compute_targets(all_anchors, gt_boxes[gt_assignment, :]) bbox_inside_weights = np.zeros((total_anchors, 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = 1.0 #0.1 labels = labels.reshape((1, feature_height, feature_width, -1)) bbox_targets = bbox_targets.reshape((1, feature_height, feature_width, -1)) bbox_inside_weights = bbox_inside_weights.reshape( (1, feature_height, feature_width, -1)) return labels, bbox_targets, bbox_inside_weights
def encode(gt_boxes, rois, num_classes): """Matching and Encoding groundtruth boxes (gt_boxes) into learning targets to boxes Sampling Parameters --------- gt_boxes an array of shape (G x 5), [x1, y1, x2, y2, class] rois an array of shape (R x 4), [x1, y1, x2, y2] num_classes: scalar, number of classes Returns -------- labels: Nx1 array in [0, num_classes) bbox_targets: of shape (N, Kx4) regression targets bbox_inside_weights: of shape (N, Kx4), in {0, 1} indicating which class is assigned. """ all_rois = rois num_rois = rois.shape[0] if gt_boxes.size > 0: # R x G matrix overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(all_rois[:, 0:4], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # R # max_overlaps = overlaps.max(axis=1) # R max_overlaps = overlaps[np.arange(rois.shape[0]), gt_assignment] # note: this will assign every rois with a positive label # labels = gt_boxes[gt_assignment, 4] labels = np.zeros([num_rois], dtype=np.float32) labels[:] = -1 # if _DEBUG: # print ('gt_assignment') # print (gt_assignment) # sample rois as to 1:3 fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0] fg_rois = int( min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction)) if fg_inds.size > 0 and fg_rois < fg_inds.size: fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False) labels[fg_inds] = gt_boxes[gt_assignment[fg_inds], 4] # TODO: sampling strategy bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0] bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 64) if bg_inds.size > 0 and bg_rois < bg_inds.size: bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False) labels[bg_inds] = 0 # ignore rois with overlaps between fg_threshold and bg_threshold ignore_inds = np.where(((max_overlaps > cfg.FLAGS.bg_threshold) &\ (max_overlaps < cfg.FLAGS.fg_threshold)))[0] labels[ignore_inds] = -1 keep_inds = np.append(fg_inds, bg_inds) if _DEBUG: print('keep_inds') print(keep_inds) print('fg_inds') print(fg_inds) print('bg_inds') print(bg_inds) print('bg_rois:', bg_rois) print('cfg.FLAGS.bg_threshold:', cfg.FLAGS.bg_threshold) # print (max_overlaps) LOG('ROIEncoder: %d positive rois, %d negative rois' % (len(fg_inds), len(bg_inds))) bbox_targets, bbox_inside_weights = _compute_targets( rois[keep_inds, 0:4], gt_boxes[gt_assignment[keep_inds], :4], labels[keep_inds], num_classes) bbox_targets = _unmap(bbox_targets, num_rois, keep_inds, 0) bbox_inside_weights = _unmap(bbox_inside_weights, num_rois, keep_inds, 0) else: # there is no gt labels = np.zeros((num_rois, ), np.float32) bbox_targets = np.zeros((num_rois, 4 * num_classes), np.float32) bbox_inside_weights = np.zeros((num_rois, 4 * num_classes), np.float32) bg_rois = min( int(cfg.FLAGS.rois_per_image * (1 - cfg.FLAGS.fg_roi_fraction)), 64) if bg_rois < num_rois: bg_inds = np.arange(num_rois) ignore_inds = np.random.choice(bg_inds, size=num_rois - bg_rois, replace=False) labels[ignore_inds] = -1 return labels, bbox_targets, bbox_inside_weights
def matching_box(boxes, image_inds, gt_boxes_list, bg_overlap_threshold=0.5, fg_overlap_threshold=0.6): """gt_boxes_list is a list of np.ndarray, batch_inds specify the image a boxes belongs""" if boxes.is_cuda: boxes_np = boxes.data.cpu().numpy() else: boxes_np = boxes.data.numpy() if image_inds.is_cuda: image_inds_np = image_inds.cpu().numpy() else: image_inds_np = image_inds.numpy() num_boxes = boxes_np.shape[0] assert num_boxes == image_inds_np.size match_labels = [] match_inds = [] match_boxes = [] for i, gt_boxes in enumerate(gt_boxes_list): boxes_im = boxes_np[image_inds_np == i] num_boxes_im = boxes_im.shape[0] match = np.zeros((boxes_im.shape[0], ), dtype=np.int32) - 1 labels = np.zeros((boxes_im.shape[0], ), dtype=np.int64) match_box = np.zeros((boxes_im.shape[0], 4), dtype=np.float32) if gt_boxes.size > 0 and boxes_im.size > 0: # B x G overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(boxes_im, dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # (B) max_overlaps = overlaps[np.arange(num_boxes_im), gt_assignment] match[:] = gt_assignment[:] match[max_overlaps < bg_overlap_threshold] = -1 match_box[:, :4] = gt_boxes[gt_assignment, :4] labels[:] = gt_boxes[gt_assignment, 4] # labels[max_overlaps < bg_overlap_threshold] = 0 # labels[np.logical_and(max_overlaps > bg_overlap_threshold, # max_overlaps < fg_overlap_threshold)] = -1 labels[max_overlaps < fg_overlap_threshold] = 0 # labels[np.logical_and(max_overlaps > bg_overlap_threshold, # max_overlaps < fg_overlap_threshold)] = -1 match_labels.append(labels) match_inds.append(match) match_boxes.append(match_box) match_labels = np.concatenate(match_labels, axis=0) match_inds = np.concatenate(match_inds, axis=0) match_boxes = np.concatenate(match_boxes, axis=0) if boxes.is_cuda: return torch.from_numpy(match_labels).cuda(), \ torch.from_numpy(match_inds).cuda(), \ torch.from_numpy(match_boxes).cuda() return torch.from_numpy(match_labels), \ torch.from_numpy(match_inds), \ torch.from_numpy(match_boxes)
def encode(gt_boxes, all_anchors, height, width, stride): """Matching and Encoding groundtruth into learning targets Sampling Parameters --------- gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class] all_anchors: an array of shape (h, w, A, 4), width: width of feature height: height of feature stride: downscale factor w.r.t the input size, e.g., [4, 8, 16, 32] Returns -------- labels: Nx1 array in [0, num_classes] anchors: Sampled anchors bbox_targets: N x (4) regression targets bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned. """ # TODO: speedup this module if all_anchors is None: all_anchors = anchors_plane(height, width, stride=stride) # anchors, inds_inside, total_anchors all_anchors = all_anchors.reshape((-1, 4)) inds_inside = np.where((all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0) & (all_anchors[:, 2] < width * stride) & (all_anchors[:, 3] < height * stride))[0] anchors = all_anchors[inds_inside, :] total_anchors = all_anchors.shape[0] # choose boxes to assign to this stride # TODO gt assignment outside areas = (gt_boxes[:, 3] - gt_boxes[:, 1] + 1) * (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) ks = np.floor(4 + np.log2(np.sqrt(areas) / 224.0)) K = int(np.log2(stride)) inds = np.where((K == ks + 4))[0] if inds.size > 0: gt_boxes = gt_boxes[inds] else: labels = np.zeros((total_anchors), dtype=np.float32) bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32) bbox_inside_weights = np.zeros((total_anchors, 4), dtype=np.float32) return labels, bbox_targets, bbox_inside_weights labels = np.zeros((anchors.shape[0], ), dtype=np.float32) overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # (A) max_overlaps = overlaps[np.arange(len(inds_inside)), gt_assignment] gt_argmax_overlaps = overlaps.argmax(axis=0) # G gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] if False: # this is sentive to boxes of little overlaps, no need! gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # fg label: for each gt, assign anchor with highest overlap despite its overlaps labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.FLAGS.fg_threshold] = 1 # print (np.min(labels), np.max(labels)) # subsample positive labels if there are too many num_fg = int(cfg.FLAGS.fg_rpn_fraction * cfg.FLAGS.rpn_batch_size) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # TODO: mild hard negative mining # subsample negative labels if there are too many num_bg = cfg.FLAGS.rpn_batch_size - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[gt_assignment, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = 1 # mapping to whole outputs labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) labels = labels.reshape((1, height, width, -1)) bbox_targets = bbox_targets.reshape((1, height, width, -1)) bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, -1)) return labels, bbox_targets, bbox_inside_weights
def encode(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width, indexs): """Encode masks groundtruth into learnable targets Sample some exmaples Params ------ gt_masks: image_height x image_width {0, 1} matrix, of shape (G, imh, imw) gt_boxes: ground-truth boxes of shape (G, 5), each raw is [x1, y1, x2, y2, class] rois: the bounding boxes of shape (N, 4), ## scores: scores of shape (N, 1) num_classes; K mask_height, mask_width: height and width of output masks Returns ------- # rois: boxes sampled for cropping masks, of shape (M, 4) labels: class-ids of shape (M, 1) mask_targets: learning targets of shape (M, pooled_height, pooled_width, K) in {0, 1} values mask_inside_weights: of shape (M, pooled_height, pooled_width, K) in {0, 1}Í indicating which mask is sampled """ total_masks = rois.shape[0] if gt_boxes.size > 0: # B x G overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(rois[:, 0:4], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # shape is N max_overlaps = overlaps[np.arange(len(gt_assignment)), gt_assignment] # N # note: this will assign every rois with a positive label # labels = gt_boxes[gt_assignment, 4] # N labels = np.zeros((total_masks, ), np.int32) labels[:] = -1 # sample positive rois which intersection is more than 0.5 keep_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0] num_masks = int(min(keep_inds.size, cfg.FLAGS.masks_per_image)) if keep_inds.size > 0 and num_masks < keep_inds.size: keep_inds = np.random.choice(keep_inds, size=num_masks, replace=False) labels[keep_inds] = gt_boxes[gt_assignment[keep_inds], -1] mask_targets = np.zeros( (total_masks, mask_height, mask_width, num_classes), dtype=np.float32) mask_inside_weights = np.zeros( (total_masks, mask_height, mask_width, num_classes), dtype=np.float32) rois[rois < 0] = 0 # TODO: speed bottleneck? # TODO: mask ground truth accuracy check for i in keep_inds: gt_height = gt_masks.shape[1] gt_width = gt_masks.shape[2] enlarged_width = mask_width * 20 enlarged_height = mask_height * 20 roi = rois[i, :4] cropped = gt_masks[gt_assignment[i], :, :] cropped = cv2.resize(cropped.astype(np.float32), (enlarged_width.astype(np.float32), enlarged_height.astype(np.float32)), interpolation=cv2.INTER_CUBIC) cropped = cropped[ int(round(roi[1] * enlarged_height / float(gt_height)) ):int(round(roi[3] * enlarged_height / float(gt_height))), int(round(roi[0] * enlarged_width / float(gt_width)) ):int(round(roi[2] * enlarged_width / float(gt_width)))] cropped = cv2.resize(cropped.astype(np.float32), (mask_width.astype(np.float32), mask_height.astype(np.float32)), interpolation=cv2.INTER_CUBIC) mask_targets[i, :, :, labels[i]] = cropped mask_inside_weights[i, :, :, labels[i]] = 1.0 mask_rois = rois[:, :4] else: # there is no gt labels = np.zeros((total_masks, ), np.int32) labels[:] = -1 mask_targets = np.zeros( (total_masks, mask_height, mask_width, num_classes), dtype=np.float32) mask_inside_weights = np.zeros( (total_masks, mask_height, mask_height, num_classes), dtype=np.float32) mask_rois = np.zeros((total_masks, 4), dtype=np.float32) return labels, mask_targets, mask_inside_weights, mask_rois, indexs
def evaluate_recall(self, candidate_boxes=None, thresholds=None, area='all', limit=None): """Evaluate detection proposal recall metrics. Returns: results: dictionary of results with keys 'ar': average recall 'recalls': vector recalls at each IoU overlap threshold 'thresholds': vector of IoU overlap thresholds 'gt_overlaps': vector of all ground-truth overlaps """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7} area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], # 512-inf ] assert areas.has_key(area), 'unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for i in xrange(self.num_images): # Checking for max_overlaps == 1 avoids including crowd annotations # (...pretty hacking :/) max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1) gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] gt_boxes = self.roidb[i]['boxes'][gt_inds, :] gt_areas = self.roidb[i]['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) if candidate_boxes is None: # If candidate_boxes is not supplied, the default is to use the # non-ground-truth boxes from this roidb non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0] boxes = self.roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = cython_bbox.bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in xrange(gt_boxes.shape[0]): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert(gt_ovr >= 0) # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert(_gt_overlaps[j] == gt_ovr) # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps}
def encode(gt_boxes, rois, num_classes): """Matching and Encoding groundtruth boxes (gt_boxes) into learning targets to boxes Sampling Parameters --------- gt_boxes an array of shape (G x 5), [x1, y1, x2, y2, class] rois an array of shape (R x 4), [x1, y1, x2, y2] num_classes: scalar, number of classes Returns -------- labels: Nx1 array in [0, num_classes) bbox_targets: of shape (N, Kx4) regression targets bbox_inside_weights: of shape (N, Kx4), in {0, 1} indicating which class is assigned. """ all_rois = rois num_rois = rois.shape[0] if gt_boxes.size > 0: # R x G matrix overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(all_rois[:, 0:4], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # R # max_overlaps = overlaps.max(axis=1) # R max_overlaps = overlaps[np.arange(rois.shape[0]), gt_assignment] # note: this will assign every rois with a positive label # labels = gt_boxes[gt_assignment, 4] labels = np.zeros([num_rois], dtype=np.float32) labels[:] = -1 # if _DEBUG: # print ('gt_assignment') # print (gt_assignment) # sample rois as to 1:3 fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0] fg_rois = int(min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction)) if fg_inds.size > 0 and fg_rois < fg_inds.size: fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False) labels[fg_inds] = gt_boxes[gt_assignment[fg_inds], 4] # TODO: sampling strategy bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0] bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 64) if bg_inds.size > 0 and bg_rois < bg_inds.size: bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False) labels[bg_inds] = 0 # ignore rois with overlaps between fg_threshold and bg_threshold ignore_inds = np.where(((max_overlaps > cfg.FLAGS.bg_threshold) &\ (max_overlaps < cfg.FLAGS.fg_threshold)))[0] labels[ignore_inds] = -1 keep_inds = np.append(fg_inds, bg_inds) if _DEBUG: print ('keep_inds') print (keep_inds) print ('fg_inds') print (fg_inds) print ('bg_inds') print (bg_inds) print ('bg_rois:', bg_rois) print ('cfg.FLAGS.bg_threshold:', cfg.FLAGS.bg_threshold) # print (max_overlaps) LOG('ROIEncoder: %d positive rois, %d negative rois' % (len(fg_inds), len(bg_inds))) bbox_targets, bbox_inside_weights = _compute_targets( rois[keep_inds, 0:4], gt_boxes[gt_assignment[keep_inds], :4], labels[keep_inds], num_classes) bbox_targets = _unmap(bbox_targets, num_rois, keep_inds, 0) bbox_inside_weights = _unmap(bbox_inside_weights, num_rois, keep_inds, 0) else: # there is no gt labels = np.zeros((num_rois, ), np.float32) bbox_targets = np.zeros((num_rois, 4 * num_classes), np.float32) bbox_inside_weights = np.zeros((num_rois, 4 * num_classes), np.float32) bg_rois = min(int(cfg.FLAGS.rois_per_image * (1 - cfg.FLAGS.fg_roi_fraction)), 64) if bg_rois < num_rois: bg_inds = np.arange(num_rois) ignore_inds = np.random.choice(bg_inds, size=num_rois - bg_rois, replace=False) labels[ignore_inds] = -1 return labels, bbox_targets, bbox_inside_weights