def _compute_targets(entry): """Compute bounding-box regression targets for an image.""" # Indices of ground-truth ROIs rois = entry['boxes'] overlaps = entry['max_overlaps'] labels = entry['max_classes'] gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] # Targets has format (class, tx, ty, tw, th) targets = np.zeros((rois.shape[0], 5), dtype=np.float32) if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return targets # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = box_utils.bbox_overlaps( rois[ex_inds, :].astype(dtype=np.float32, copy=False), rois[gt_inds, :].astype(dtype=np.float32, copy=False)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] # Use class "1" for all boxes if using class_agnostic_bbox_reg targets[ex_inds, 0] = (1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds]) targets[ex_inds, 1:] = box_utils.bbox_transform_inv(ex_rois, gt_rois, cfg.MODEL.BBOX_REG_WEIGHTS) return targets
def _merge_proposal_boxes_into_roidb(roidb, box_list): """Add proposal boxes to each roidb entry.""" assert len(box_list) == len(roidb) for i, entry in enumerate(roidb): boxes = box_list[i] num_boxes = boxes.shape[0] gt_overlaps = np.zeros((num_boxes, entry['gt_overlaps'].shape[1]), dtype=entry['gt_overlaps'].dtype) box_to_gt_ind_map = -np.ones( (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype) # Note: unlike in other places, here we intentionally include all gt # rois, even ones marked as crowd. Boxes that overlap with crowds will # be filtered out later (see: _filter_crowd_proposals). gt_inds = np.where(entry['gt_classes'] > 0)[0] if len(gt_inds) > 0: gt_boxes = entry['boxes'][gt_inds, :] gt_classes = entry['gt_classes'][gt_inds] proposal_to_gt_overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) # Gt box that overlaps each input box the most # (ties are broken arbitrarily by class order) argmaxes = proposal_to_gt_overlaps.argmax(axis=1) # Amount of that overlap maxes = proposal_to_gt_overlaps.max(axis=1) # Those boxes with non-zero overlap with gt boxes I = np.where(maxes > 0)[0] # Record max overlaps with the class of the appropriate gt box gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] box_to_gt_ind_map[I] = gt_inds[argmaxes[I]] entry['boxes'] = np.append(entry['boxes'], boxes.astype(entry['boxes'].dtype, copy=False), axis=0) entry['gt_classes'] = np.append( entry['gt_classes'], np.zeros((num_boxes), dtype=entry['gt_classes'].dtype)) entry['seg_areas'] = np.append( entry['seg_areas'], np.zeros((num_boxes), dtype=entry['seg_areas'].dtype)) entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(), gt_overlaps, axis=0) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append( entry['is_crowd'], np.zeros((num_boxes), dtype=entry['is_crowd'].dtype)) entry['box_to_gt_ind_map'] = np.append( entry['box_to_gt_ind_map'], box_to_gt_ind_map.astype(entry['box_to_gt_ind_map'].dtype, copy=False))
def add_uv_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): M = cfg.UVRCNN.HEATMAP_SIZE IsFlipped = roidb['flipped'] # polys_gt_inds = np.where(roidb['ignore_UV_body'] == 0)[0] boxes_from_polys = [roidb['boxes'][i, :] for i in polys_gt_inds] if not (boxes_from_polys): pass else: boxes_from_polys = np.vstack(boxes_from_polys) boxes_from_polys = np.array(boxes_from_polys) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = np.zeros(blobs['labels_int32'].shape) if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)): rois_fg = sampled_boxes[fg_inds] # rois_fg.astype(np.float32, copy=False) boxes_from_polys.astype(np.float32, copy=False) # overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) fg_polys_value = np.max(overlaps_bbfg_bbpolys, axis=1) fg_inds = fg_inds[fg_polys_value > 0.7] if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)): for jj in fg_inds: roi_has_mask[jj] = 1 # Create blobs for densepose supervision. ################################################## The mask All_labels = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) All_Weights = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) ################################################# The points X_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Y_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Ind_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True) I_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True) U_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) V_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Uv_point_weights = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) ################################################# rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) for i in range(rois_fg.shape[0]): # fg_polys_ind = polys_gt_inds[fg_polys_inds[i]] # Ilabel = segm_utils.GetDensePoseMask( roidb['dp_masks'][fg_polys_ind]) # GT_I = np.array(roidb['dp_I'][fg_polys_ind]) GT_U = np.array(roidb['dp_U'][fg_polys_ind]) GT_V = np.array(roidb['dp_V'][fg_polys_ind]) GT_x = np.array(roidb['dp_x'][fg_polys_ind]) GT_y = np.array(roidb['dp_y'][fg_polys_ind]) GT_weights = np.ones(GT_I.shape).astype(np.float32) # ## Do the flipping of the densepose annotation ! if (IsFlipped): GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel = DP.get_symmetric_densepose( GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel) # roi_fg = rois_fg[i] roi_gt = boxes_from_polys[fg_polys_inds[i], :] # x1 = roi_fg[0] x2 = roi_fg[2] y1 = roi_fg[1] y2 = roi_fg[3] # x1_source = roi_gt[0] x2_source = roi_gt[2] y1_source = roi_gt[1] y2_source = roi_gt[3] # x_targets = (np.arange(x1, x2, (x2 - x1) / M) - x1_source) * (256. / (x2_source - x1_source)) y_targets = (np.arange(y1, y2, (y2 - y1) / M) - y1_source) * (256. / (y2_source - y1_source)) # x_targets = x_targets[ 0: M] ## Strangely sometimes it can be M+1, so make sure size is OK! y_targets = y_targets[0:M] # [X_targets, Y_targets] = np.meshgrid(x_targets, y_targets) New_Index = cv2.remap(Ilabel, X_targets.astype(np.float32), Y_targets.astype(np.float32), interpolation=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=(0)) # All_L = np.zeros(New_Index.shape) All_W = np.ones(New_Index.shape) # All_L = New_Index # gt_length_x = x2_source - x1_source gt_length_y = y2_source - y1_source # GT_y = ((GT_y / 256. * gt_length_y) + y1_source - y1) * (M / (y2 - y1)) GT_x = ((GT_x / 256. * gt_length_x) + x1_source - x1) * (M / (x2 - x1)) # GT_I[GT_y < 0] = 0 GT_I[GT_y > (M - 1)] = 0 GT_I[GT_x < 0] = 0 GT_I[GT_x > (M - 1)] = 0 # points_inside = GT_I > 0 GT_U = GT_U[points_inside] GT_V = GT_V[points_inside] GT_x = GT_x[points_inside] GT_y = GT_y[points_inside] GT_weights = GT_weights[points_inside] GT_I = GT_I[points_inside] # X_points[i, 0:len(GT_x)] = GT_x Y_points[i, 0:len(GT_y)] = GT_y Ind_points[i, 0:len(GT_I)] = i I_points[i, 0:len(GT_I)] = GT_I U_points[i, 0:len(GT_U)] = GT_U V_points[i, 0:len(GT_V)] = GT_V Uv_point_weights[i, 0:len(GT_weights)] = GT_weights # All_labels[i, :] = np.reshape(All_L.astype(np.int32), M**2) All_Weights[i, :] = np.reshape(All_W.astype(np.int32), M**2) ## else: bg_inds = np.where(blobs['labels_int32'] == 0)[0] # if (len(bg_inds) == 0): rois_fg = sampled_boxes[0].reshape((1, -1)) else: rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) roi_has_mask[0] = 1 # X_points = blob_utils.zeros((1, 196), int32=False) Y_points = blob_utils.zeros((1, 196), int32=False) Ind_points = blob_utils.zeros((1, 196), int32=True) I_points = blob_utils.zeros((1, 196), int32=True) U_points = blob_utils.zeros((1, 196), int32=False) V_points = blob_utils.zeros((1, 196), int32=False) Uv_point_weights = blob_utils.zeros((1, 196), int32=False) # All_labels = -blob_utils.ones((1, M**2), int32=True) * 0 ## zeros All_Weights = -blob_utils.ones((1, M**2), int32=True) * 0 ## zeros # rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # K = cfg.UVRCNN.NUM_PATCHES # U_points = np.tile(U_points, [1, K + 1]) V_points = np.tile(V_points, [1, K + 1]) Uv_Weight_Points = np.zeros(U_points.shape) # for jjj in range(1, K + 1): Uv_Weight_Points[:, jjj * I_points.shape[1]:(jjj + 1) * I_points.shape[1]] = (I_points == jjj).astype( np.float32) # ################ # Update blobs dict with Mask R-CNN blobs ############### # blobs['uv_rois'] = np.array(rois_fg) blobs['roi_has_uv_int32'] = np.array(roi_has_mask).astype(np.int32) ## blobs['uv_ann_labels'] = np.array(All_labels).astype(np.int32) blobs['uv_ann_weights'] = np.array(All_Weights).astype(np.float32) # ########################## blobs['uv_X_points'] = X_points.astype(np.float32) blobs['uv_Y_points'] = Y_points.astype(np.float32) blobs['uv_Ind_points'] = Ind_points.astype(np.float32) blobs['uv_I_points'] = I_points.astype(np.float32) blobs['uv_U_points'] = U_points.astype( np.float32) #### VERY IMPORTANT : These are switched here : blobs['uv_V_points'] = V_points.astype(np.float32) blobs['uv_point_weights'] = Uv_Weight_Points.astype(np.float32)
def add_parsing_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add parsing R-CNN specific blobs to the input blob dictionary.""" # Prepare the parsing targets by associating one gt parsing to each training roi # that has a fg (non-bg) class label. M = cfg.PRCNN.RESOLUTION polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] parsing_gt = [roidb['parsing'][i] for i in polys_gt_inds] boxes_from_png = parsing_utils.parsing_to_boxes(parsing_gt, roidb['flipped']) fg_inds = np.where(blobs['labels_int32'] > 0)[0] if fg_inds.shape[0] > 0: if cfg.PRCNN.ROI_BATCH_SIZE > 0: fg_rois_per_this_image = np.minimum(cfg.PRCNN.ROI_BATCH_SIZE, fg_inds.shape[0]) fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) parsings = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_png.astype(np.float32, copy=False)) # Map from each fg rois to the index of the parsing with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] parsing_gt_fg = parsing_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image parsing = parsing_utils.parsing_wrt_box(parsing_gt_fg, roi_fg, M, roidb['flipped']) parsings[i, :] = parsing weights = blob_utils.ones((rois_fg.shape[0], M**2)) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... if (len(bg_inds) == 0): rois_fg = sampled_boxes[0].reshape((1, -1)) else: rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) parsings = blob_utils.zeros((1, M**2), int32=True) # Mark that the first roi has a mask weights = blob_utils.zeros((1, M**2)) parsings = np.reshape(parsings, (-1, 1)) weights = np.reshape(weights, (-1, 1)) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['parsing_rois'] = rois_fg blobs['parsing_weights'] = weights blobs['parsing_int32'] = parsings
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) # boxes_from_polys = [roidb['boxes'][i] for i in polys_gt_inds] fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 if fg_inds.shape[0] > 0: if cfg.MRCNN.ROI_BATCH_SIZE > 0: fg_rois_per_this_image = np.minimum(cfg.MRCNN.ROI_BATCH_SIZE, fg_inds.shape[0]) fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks
def _get_retinanet_blobs(foas, all_anchors, gt_boxes, gt_classes, im_width, im_height): total_anchors = all_anchors.shape[0] logger.debug('Getting mad blobs: im_height {} im_width: {}'.format( im_height, im_width)) inds_inside = np.arange(all_anchors.shape[0]) # 0, 1... 371349 anchors = all_anchors num_inside = len(inds_inside) # 371349 logger.debug('total_anchors: {}'.format(total_anchors)) logger.debug('inds_inside: {}'.format(num_inside)) logger.debug('anchors.shape: {}'.format(anchors.shape)) # Compute anchor labels: # label=1 is positive, 0 is negative, -1 is don't care (ignore) labels = np.empty((num_inside, ), dtype=np.float32) labels.fill(-1) if len(gt_boxes) > 0: # Compute overlaps between the anchors and the gt boxes overlaps anchor_by_gt_overlap = box_utils.bbox_overlaps( anchors, gt_boxes) # (371349, 17) # Map from anchor to gt box that has highest overlap anchor_to_gt_argmax = anchor_by_gt_overlap.argmax( axis=1) # (371349,) this is index # For each anchor, amount of overlap with most overlapping gt box anchor_to_gt_max = anchor_by_gt_overlap[ # (371349,) this is area np.arange(num_inside), anchor_to_gt_argmax] # Map from gt box to an anchor that has highest overlap gt_to_anchor_argmax = anchor_by_gt_overlap.argmax( axis=0) # (17,) index # For each gt box, amount of overlap with most overlapping anchor gt_to_anchor_max = anchor_by_gt_overlap[ # (17,) area gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1])] # Find all anchors that share the max overlap amount # (this includes many ties) anchors_with_max_overlap = np.where( # (21,) find all anchors with most overlaps anchor_by_gt_overlap == gt_to_anchor_max)[0] # Fg label: for each gt use anchors with highest overlap # (including ties) gt_inds = anchor_to_gt_argmax[anchors_with_max_overlap] # 416 labels[anchors_with_max_overlap] = gt_classes[gt_inds] # Fg label: above threshold IOU inds = anchor_to_gt_max >= cfg.RETINANET.POSITIVE_OVERLAP gt_inds = anchor_to_gt_argmax[inds] labels[inds] = gt_classes[ gt_inds] # for all anchors, inds are valued by gt_inds, this gives class values 1~80 fg_inds = np.where(labels >= 1)[0] bg_inds = np.where(anchor_to_gt_max < cfg.RETINANET.NEGATIVE_OVERLAP)[0] labels[bg_inds] = 0 num_fg, num_bg = len(fg_inds), len(bg_inds) bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets[fg_inds, :] = data_utils.compute_targets( anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :]) # Map up to original set of anchors labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = data_utils.unmap(bbox_targets, total_anchors, inds_inside, fill=0) # Split the generated labels, etc. into labels per each field of anchors blobs_out = [] start_idx = 0 for foa in foas: H = foa.field_size W = foa.field_size end_idx = start_idx + H * W _labels = labels[start_idx:end_idx] triangle_start_idx = start_idx _bbox_targets = bbox_targets[start_idx:end_idx, :] start_idx = end_idx # labels output with shape (1, height, width) _labels = _labels.reshape((1, 1, H, W)) # bbox_targets output with shape (1, 4 * A, height, width) _bbox_targets = _bbox_targets.reshape( (1, H, W, 4)).transpose(0, 3, 1, 2) stride = foa.stride w = int(im_width / stride) h = int(im_height / stride) # data for select_smooth_l1 loss num_classes = cfg.MODEL.NUM_CLASSES - 1 inds_4d = np.where(_labels > 0) M = len(inds_4d) _roi_bbox_targets = np.zeros((0, 4)) _roi_fg_bbox_locs = np.zeros((0, 4)) if M > 0: im_inds, y, x = inds_4d[0], inds_4d[2], inds_4d[3] _roi_bbox_targets = np.zeros((len(im_inds), 4)) _roi_fg_bbox_locs = np.zeros((len(im_inds), 4)) lbls = _labels[im_inds, :, y, x] for i, lbl in enumerate(lbls): l = lbl[0] - 1 if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: l = 0 assert l >= 0 and l < num_classes, 'label out of the range' _roi_bbox_targets[i, :] = _bbox_targets[:, :, y[i], x[i]] _roi_fg_bbox_locs[i, :] = np.array([[0, l, y[i], x[i]]]) if _roi_bbox_targets.astype( np.float32).shape[0] == 0 and _roi_fg_bbox_locs.astype( np.float32).shape[0] == 0: blobs_out.append( dict( retnet_cls_labels=_labels[:, :, 0:h, 0:w].astype(np.int32), retnet_roi_bbox_targets=_roi_bbox_targets.astype( np.float32), retnet_roi_fg_bbox_locs=_roi_fg_bbox_locs.astype( np.float32), # retnet_roi_bbox_targets=np.array([[0, 0, 0, 0]]), # retnet_roi_fg_bbox_locs=np.array([[0, 0, 0, 0]]), )) # we don't add zero padding here, because this is inside the loop of foa, we don't # want every anchor to have padding, instead we want to firstly sum all anchors in a FPN of an image, and then check if it's emtpy else: blobs_out.append( dict( retnet_cls_labels=_labels[:, :, 0:h, 0:w].astype(np.int32), retnet_roi_bbox_targets=_roi_bbox_targets.astype( np.float32), retnet_roi_fg_bbox_locs=_roi_fg_bbox_locs.astype( np.float32), )) out_num_fg = np.array([num_fg + 1.0], dtype=np.float32) out_num_bg = (np.array([num_bg + 1.0]) * (cfg.MODEL.NUM_CLASSES - 1) + out_num_fg * (cfg.MODEL.NUM_CLASSES - 2)) return blobs_out, out_num_fg, out_num_bg
def evaluate_box_proposals(json_dataset, roidb, thresholds=None, area='all', limit=None): """Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7 } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2] ] # 512-inf assert area in areas, 'Unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for entry in roidb: gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] gt_boxes = entry['boxes'][gt_inds, :] gt_areas = entry['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) non_gt_inds = np.where(entry['gt_classes'] == 0)[0] boxes = entry['boxes'][non_gt_inds, :] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in range(min(boxes.shape[0], gt_boxes.shape[0])): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { 'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps, 'num_pos': num_pos }
def _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_boxes): total_anchors = all_anchors.shape[0] straddle_thresh = cfg.TRAIN.RPN_STRADDLE_THRESH if straddle_thresh >= 0: # Only keep anchors inside the image by a margin of straddle_thresh # Set TRAIN.RPN_STRADDLE_THRESH to -1 (or a large value) to keep all # anchors inds_inside = np.where( (all_anchors[:, 0] >= -straddle_thresh) & (all_anchors[:, 1] >= -straddle_thresh) & (all_anchors[:, 2] < im_width + straddle_thresh) & (all_anchors[:, 3] < im_height + straddle_thresh))[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] else: inds_inside = np.arange(all_anchors.shape[0]) anchors = all_anchors num_inside = len(inds_inside) logger.debug('total_anchors: %d', total_anchors) logger.debug('inds_inside: %d', num_inside) logger.debug('anchors.shape: %s', str(anchors.shape)) # Compute anchor labels: # label=1 is positive, 0 is negative, -1 is don't care (ignore) labels = np.empty((num_inside, ), dtype=np.int32) labels.fill(-1) if len(gt_boxes) > 0: # Compute overlaps between the anchors and the gt boxes overlaps anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes) # Map from anchor to gt box that has highest overlap anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) # For each anchor, amount of overlap with most overlapping gt box anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside), anchor_to_gt_argmax] # Map from gt box to an anchor that has highest overlap gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) # For each gt box, amount of overlap with most overlapping anchor gt_to_anchor_max = anchor_by_gt_overlap[ gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1])] # Find all anchors that share the max overlap amount # (this includes many ties) anchors_with_max_overlap = np.where( anchor_by_gt_overlap == gt_to_anchor_max)[0] # Fg label: for each gt use anchors with highest overlap # (including ties) labels[anchors_with_max_overlap] = 1 # Fg label: above threshold IOU labels[anchor_to_gt_max >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE_PER_IM) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 fg_inds = np.where(labels == 1)[0] # subsample negative labels if we have too many # (samples with replacement, but since the set of bg inds is large most # samples will not have repeats) num_bg = cfg.TRAIN.RPN_BATCH_SIZE_PER_IM - np.sum(labels == 1) bg_inds = np.where(anchor_to_gt_max < cfg.TRAIN.RPN_NEGATIVE_OVERLAP)[0] if len(bg_inds) > num_bg: enable_inds = bg_inds[npr.randint(len(bg_inds), size=num_bg)] labels[enable_inds] = 0 bg_inds = np.where(labels == 0)[0] bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets[fg_inds, :] = data_utils.compute_targets( anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :]) # Bbox regression loss has the form: # loss(x) = weight_outside * L(weight_inside * x) # Inside weights allow us to set zero loss on an element-wise basis # Bbox regression is only trained on positive examples so we set their # weights to 1.0 (or otherwise if config is different) and 0 otherwise bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = (1.0, 1.0, 1.0, 1.0) # The bbox regression loss only averages by the number of images in the # mini-batch, whereas we need to average by the total number of example # anchors selected # Outside weights are used to scale each element-wise loss so the final # average over the mini-batch is correct bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32) # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) bbox_outside_weights[labels == 1, :] = 1.0 / num_examples bbox_outside_weights[labels == 0, :] = 1.0 / num_examples # Map up to original set of anchors labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = data_utils.unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = data_utils.unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = data_utils.unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # Split the generated labels, etc. into labels per each field of anchors blobs_out = [] start_idx = 0 for foa in foas: H = foa.field_size W = foa.field_size A = foa.num_cell_anchors end_idx = start_idx + H * W * A _labels = labels[start_idx:end_idx] _bbox_targets = bbox_targets[start_idx:end_idx, :] _bbox_inside_weights = bbox_inside_weights[start_idx:end_idx, :] _bbox_outside_weights = bbox_outside_weights[start_idx:end_idx, :] start_idx = end_idx # labels output with shape (1, A, height, width) _labels = _labels.reshape((1, H, W, A)).transpose(0, 3, 1, 2) # bbox_targets output with shape (1, 4 * A, height, width) _bbox_targets = _bbox_targets.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) # bbox_inside_weights output with shape (1, 4 * A, height, width) _bbox_inside_weights = _bbox_inside_weights.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) # bbox_outside_weights output with shape (1, 4 * A, height, width) _bbox_outside_weights = _bbox_outside_weights.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) blobs_out.append( dict(rpn_labels_int32_wide=_labels, rpn_bbox_targets_wide=_bbox_targets, rpn_bbox_inside_weights_wide=_bbox_inside_weights, rpn_bbox_outside_weights_wide=_bbox_outside_weights)) return blobs_out[0] if len(blobs_out) == 1 else blobs_out