def sample_xyhs_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg, labels=None, overlaps=None, dbbox_targets=None, gt_boxes=None): """ :param rois: al_rois [n, 4]; e2e [n, 5] with batch_index :param fg_rois_per_image: :param rois_per_image: :param num_clases: :param cfg: :param labels: :param overlaps: :param dbbox_targets: :param gt_boxes: optional for e2e [n, 9] (x1, y1, ..., x4, y4, cls) :return: """ if labels is None: # hgt_boxes = np.hstack((bbox_poly2hbb(gt_boxes[:, :-1]), gt_boxes[:, -1])) hgt_boxes = bbox_poly2hbb(gt_boxes) ## rois: (xmin, ymin, xmax, ymax) overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), hgt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = hgt_boxes[gt_assignment, 4] # foreground RoI with FG_THRESH overlap fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) # Sample foreground regions without replacement if len(fg_indexes) > fg_rois_per_this_image: fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) # Sample foreground regions without replacement if len(bg_indexes) > bg_rois_per_this_image: bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) # indexes selected keep_indexes = np.append(fg_indexes, bg_indexes) # pad more to ensure a fixed minibatch size while keep_indexes.shape[0] < rois_per_image: gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0]) gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False) keep_indexes = np.append(keep_indexes, gap_indexes) # select labels labels = labels[keep_indexes] # set labels of bg_rois to be 0 labels[fg_rois_per_this_image:] = 0 rois = rois[keep_indexes] # load or compute bbox_target if dbbox_targets is not None: bbox_target_data = dbbox_targets[keep_indexes, :] else: # targets = dbbox_transform2_warp(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :8]) targets = dbboxtransform3_warp(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :8]) if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) bbox_targets, bbox_weights = \ expand_bbox_regression_targets_base(bbox_target_data, num_classes, cfg) return rois, labels, bbox_targets, bbox_weights
def assign_pyramid_anchor_poly( feat_shapes, gt_boxes, im_info, cfg, feat_strides=(4, 8, 16, 32, 64), scales=(8, ), ratios=(0.5, 1, 2), allowed_border=0, balance_scale_bg=False, ): """ assign ground truth boxes to anchor positions :param feat_shapes: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_strides: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :param balance_scale_bg: restrict the background samples for each pyramid level :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) ratios = np.array(ratios, dtype=np.float32) assert (len(feat_shapes) == len(feat_strides)) fpn_args = [] fpn_anchors_fid = np.zeros(0).astype(int) fpn_anchors = np.zeros([0, 4]) fpn_labels = np.zeros(0) fpn_inds_inside = [] for feat_id in range(len(feat_strides)): # len(scales.shape) == 1 just for backward compatibility, will remove in the future if len(scales.shape) == 1: base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios, scales=scales) else: assert len(scales.shape) == len(ratios.shape) == 2 base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios[feat_id], scales=scales[feat_id]) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shapes[feat_id][0][-2:] # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_strides[feat_id] shift_y = np.arange(0, feat_height) * feat_strides[feat_id] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care # for sigmoid classifier, ignore the 'background' class labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside))) fpn_anchors = np.vstack((fpn_anchors, anchors)) fpn_labels = np.hstack((fpn_labels, labels)) fpn_inds_inside.append(inds_inside) fpn_args.append([feat_height, feat_width, A, total_anchors]) gt_boxes = bbox_poly2hbb(gt_boxes) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(fpn_anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(fpn_anchors)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap fpn_labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: fpn_labels[:] = 0 # subsample positive labels if we have too many num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else int( cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(fpn_labels >= 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] fpn_labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = fpn_labels.shape[ 0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum( fpn_labels >= 1) bg_inds = np.where(fpn_labels == 0)[0] fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum())) if balance_scale_bg: num_bg_scale = num_bg / len(feat_strides) for feat_id in range(0, len(feat_strides)): bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id]) & (bg_inds < fpn_anchors_fid[feat_id + 1])] if len(bg_ind_scale) > num_bg_scale: disable_inds = npr.choice(bg_ind_scale, size=(len(bg_ind_scale) - num_bg_scale), replace=False) fpn_labels[disable_inds] = -1 else: if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] fpn_labels[disable_inds] = -1 fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32) if gt_boxes.size > 0: fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform( fpn_anchors[fpn_labels >= 1, :], gt_boxes[argmax_overlaps[fpn_labels >= 1], :4]) # fpn_bbox_targets[:] = bbox_transform(fpn_anchors, gt_boxes[argmax_overlaps, :4]) # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS) fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32) fpn_bbox_weights[fpn_labels >= 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) label_list = [] bbox_target_list = [] bbox_weight_list = [] for feat_id in range(0, len(feat_strides)): feat_height, feat_width, A, total_anchors = fpn_args[feat_id] # map up to original set of anchors labels = _unmap( fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=-1) bbox_targets = _unmap( fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=0) bbox_weights = _unmap( fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=0) labels = labels.reshape( (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape( (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_targets = bbox_targets.reshape((1, A * 4, -1)) bbox_weights = bbox_weights.reshape( (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) bbox_weights = bbox_weights.reshape((1, A * 4, -1)) label_list.append(labels) bbox_target_list.append(bbox_targets) bbox_weight_list.append(bbox_weights) # label.update({'label_p' + str(feat_id + feat_id_start): labels, # 'bbox_target_p' + str(feat_id + feat_id_start): bbox_targets, # 'bbox_weight_p' + str(feat_id + feat_id_start): bbox_weights}) # pdb.set_trace() label = { 'label': np.concatenate(label_list, axis=1), 'bbox_target': np.concatenate(bbox_target_list, axis=2), 'bbox_weight': np.concatenate(bbox_weight_list, axis=2) } return label