def _expand_bbox_targets(bbox_target_data): """Bounding-box regression targets are stored in a compact form in the roidb. This function expands those targets into the 4-of-4*K representation used by the network (i.e. only one class has non-zero targets). The loss weights are similarly expanded. Returns: bbox_target_data (ndarray): N x 4K blob of regression targets bbox_inside_weights (ndarray): N x 4K blob of loss weights """ num_bbox_reg_classes = cfg.MODEL.NUM_CLASSES if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: num_bbox_reg_classes = 2 # bg and fg clss = bbox_target_data[:, 0] bbox_targets = blob_utils.zeros((clss.size, 4 * num_bbox_reg_classes)) bbox_inside_weights = blob_utils.zeros(bbox_targets.shape) inds = np.where(clss > 0)[0] for ind in inds: cls = int(clss[ind]) start = 4 * cls end = start + 4 bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] bbox_inside_weights[ind, start:end] = (1.0, 1.0, 1.0, 1.0) return bbox_targets, bbox_inside_weights
def add_refine_keypoints_blobs_gaussian(blobs, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx, data): """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary.""" # Note: gt_inds must match how they're computed in # datasets.json_dataset._merge_proposal_boxes_into_roidb gt_inds = np.where(roidb['gt_classes'] > 0)[0] gt_keypoints = roidb['gt_keypoints'] # Load the kp_fg_inds generated by keypoint_rcnn.py. So we avoid the issue # of mismatched keypoint_rois and refined_keypoint_rois, which cause a big # issue for training. kp_fg_inds = blobs['keypoint_fg_inds'] if kp_fg_inds.shape[0] > 0: sampled_fg_rois = roidb['boxes'][kp_fg_inds] box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds] # Let's expand the rois up_scale = cfg.REFINENET.UP_SCALE inp_h, inp_w = data.shape[2], data.shape[3] pad_img_h, pad_img_w = inp_h / im_scale, inp_w / im_scale pad_fg_rois = box_utils.expand_boxes(sampled_fg_rois, up_scale) pad_fg_rois = box_utils.clip_boxes_to_image(pad_fg_rois, pad_img_h, pad_img_w) num_keypoints = gt_keypoints.shape[2] sampled_keypoints = -np.ones( (len(pad_fg_rois), gt_keypoints.shape[1], num_keypoints), dtype=gt_keypoints.dtype) for ii in range(len(pad_fg_rois)): ind = box_to_gt_ind_map[ii] if ind >= 0: sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :] assert np.sum(sampled_keypoints[ii, 2, :]) > 0 heats, weights = keypoint_utils.keypoints_to_gaussian_heatmap_labels( sampled_keypoints, pad_fg_rois, M=cfg.REFINENET.KRCNN.HEATMAP_SIZE) else: # If there are no fg keypoint rois (it does happen) # The network cannot handle empty blobs, so we must provide a heatmap # We simply take the first bg roi, given it an all zero heatmap, and # set its weights to zero (ignore label). roi_inds = np.where(roidb['gt_classes'] == 0)[0] # sampled_fg_rois is actually one random roi, but that's ok because ... pad_fg_rois = roidb['boxes'][roi_inds[0]].reshape((1, -1)) # We give it an 0's blob M = cfg.REFINENET.KRCNN.HEATMAP_SIZE heats = blob_utils.zeros((1, cfg.KRCNN.NUM_KEYPOINTS, M, M)) # We set weights to 0 (ignore label) weights = blob_utils.zeros((1, cfg.KRCNN.NUM_KEYPOINTS, 1)) pad_fg_rois *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((pad_fg_rois.shape[0], 1)) pad_fg_rois = np.hstack((repeated_batch_idx, pad_fg_rois)) blobs['refined_keypoint_rois'] = pad_fg_rois blobs['refined_keypoint_heatmaps'] = heats blobs['refined_keypoint_weights'] = weights
def keypoints_to_heatmap_labels(keypoints, rois): """Generate location of heatmap Each roi and each keypoint -> xy location of keypoint For SoftmaxWithLoss across space rgirdhar: Don't modify for tubes, the modification was done in roi_data/keypoint_rcnn.py """ # Maps keypoints from the half-open interval [x1, x2) on continuous image # coordinates to the closed interval [0, HEATMAP_SIZE - 1] on discrete image # coordinates. We use the continuous <-> discrete conversion from Heckbert # 1990 ("What is the coordinate of a pixel?"): d = floor(c) and c = d + 0.5, # where d is a discrete coordinate and c is a continuous coordinate. assert keypoints.shape[2] == cfg.KRCNN.NUM_KEYPOINTS shape = (len(rois), cfg.KRCNN.NUM_KEYPOINTS) heatmaps = blob_utils.zeros(shape) weights = blob_utils.zeros(shape) offset_x = rois[:, 0] offset_y = rois[:, 1] # +1 added by rgirdhar, to avoid divides by 0 scale_x = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 2] - rois[:, 0] + 1) scale_y = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 3] - rois[:, 1] + 1) for kp in range(keypoints.shape[2]): vis = keypoints[:, 2, kp] > 0 x = keypoints[:, 0, kp].astype(np.float32) y = keypoints[:, 1, kp].astype(np.float32) # Since we use floor below, if a keypoint is exactly on the roi's right # or bottom boundary, we shift it in by eps (conceptually) to keep it in # the ground truth heatmap. x_boundary_inds = np.where(x == rois[:, 2])[0] y_boundary_inds = np.where(y == rois[:, 3])[0] x = (x - offset_x) * scale_x x = np.floor(x) if len(x_boundary_inds) > 0: x[x_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1 y = (y - offset_y) * scale_y y = np.floor(y) if len(y_boundary_inds) > 0: y[y_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1 valid_loc = np.logical_and( np.logical_and(x >= 0, y >= 0), np.logical_and(x < cfg.KRCNN.HEATMAP_SIZE, y < cfg.KRCNN.HEATMAP_SIZE)) valid = np.logical_and(valid_loc, vis) valid = valid.astype(np.int32) lin_ind = y * cfg.KRCNN.HEATMAP_SIZE + x heatmaps[:, kp] = lin_ind * valid weights[:, kp] = valid return heatmaps, weights
def keypoints_to_gaussian_heatmap_labels(keypoints, rois, M=56): """Encode keypoint location in the target heatmap for use in MSELoss """ # Maps keypoints from the half-open interval [x1, x2) on continuous image # coordinates to the closed interval [0, HEATMAP_SIZE - 1] on discrete image # coordinates. We use the continuous <-> discrete conversion from Heckbert # 1990 ("What is the coordinate of a pixel?"): d = floor(c) and c = d + 0.5, # where d is a discrete coordinate and c is a continuous coordinate. assert keypoints.shape[2] == cfg.KRCNN.NUM_KEYPOINTS shape = (len(rois), cfg.KRCNN.NUM_KEYPOINTS, M, M) heatmaps = blob_utils.zeros(shape) weights = blob_utils.zeros((len(rois), cfg.KRCNN.NUM_KEYPOINTS)) offset_x = rois[:, 0] offset_y = rois[:, 1] scale_x = M / (rois[:, 2] - rois[:, 0]) scale_y = M / (rois[:, 3] - rois[:, 1]) for kp in range(keypoints.shape[2]): vis = keypoints[:, 2, kp] > 0 x = keypoints[:, 0, kp].astype(np.float32) y = keypoints[:, 1, kp].astype(np.float32) # Since we use floor below, if a keypoint is exactly on the roi's right # or bottom boundary, we shift it in by eps (conceptually) to keep it in # the ground truth heatmap. x_boundary_inds = np.where(x == rois[:, 2])[0] y_boundary_inds = np.where(y == rois[:, 3])[0] x = (x - offset_x) * scale_x x = np.floor(x) if len(x_boundary_inds) > 0: x[x_boundary_inds] = M - 1 y = (y - offset_y) * scale_y y = np.floor(y) if len(y_boundary_inds) > 0: y[y_boundary_inds] = M - 1 valid_loc = np.logical_and( np.logical_and(x >= 0, y >= 0), np.logical_and( x < M, y < M)) valid = np.logical_and(valid_loc, vis) valid = valid.astype(np.int32) weights[:, kp] = valid for i in range(len(rois)): if valid[i] > 0: heatmaps[i, kp] = draw_gaussian_heatmap( heatmaps[i, kp], (x[i], y[i]), sigma=1 ) return heatmaps, weights
def _gen_blobs(entry, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" M = cfg.MRCNN.RESOLUTION selected_inds = np.where(entry['gt_classes'] > 0)[0] polys = [entry['segms'][i] for i in selected_inds] # Class labels and bounding boxes for the polys mask_class_labels = entry['gt_classes'][selected_inds] mask_rois = np.array(entry['boxes'][selected_inds], dtype='float32') # add mask polys masks = blob_utils.zeros((selected_inds.shape[0], M**2), int32=True) for i in range(len(polys)): # Rasterize the polygon mask to an M x M class labels image poly_gt = polys[i] mask_roi = mask_rois[i] mask_class_label = mask_class_labels[i] mask = segm_utils.polys_to_mask_wrt_box(poly_gt, mask_roi, M) mask = mask_class_label * np.array(mask > 0, dtype=np.int32) masks[i, :] = np.reshape(mask, M**2) blob_dict = {} blob_dict['masks_int32'] = masks return blob_dict
def add_prn_blobs(blobs_out, blobs_in): """ Add PRN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. num_cls = cfg.MODEL.NUM_CLASSES iou_thres = cfg.PRN.IOU_THRESHOLD fg_inds = np.where(blobs_in['labels_int32'] > 0)[0] if fg_inds.shape[0] > 0: # Class labels for the foreground rois fg_labels = blobs_in['labels_int32'][fg_inds] # if below threshold, then set labels to 1, otherwise 0 prn_labels = (blobs_in['mask_ious'] < iou_thres).astype(np.int32) # and set roi_needs_refine same as prn_labels roi_needs_refine = (blobs_in['mask_ious'] < iou_thres).astype(np.int32) # calculate refine ratio refine_ratio = np.sum(roi_needs_refine, keepdims=True).astype(np.float32) refine_ratio /= fg_inds.shape[0] # sometimes the prn_labels might be all false, but we still need # a non-all-false roi_needs_refine. So set the first one as True if np.sum(roi_needs_refine) == 0: roi_needs_refine[0] = 1 else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs_in['labels_int32'] == 0)[0] # We give it an -1's blob (ignore label) prn_labels = -blob_utils.ones((1, ), int32=True) # We label it with class = 0 (background) fg_labels = blob_utils.zeros((1, )) # and set roi_needs_refine to be 1 roi_needs_refine = blob_utils.ones((1, ), int32=True) # set refine_ratio to be 0 refine_ratio = blob_utils.zeros((1, )) if cfg.PRN.CLS_SPECIFIC_LABEL: prn_labels = _expand_to_class_specific_prn_targets( prn_labels, fg_labels) blobs_out['prn_labels_int32'] = prn_labels blobs_out['roi_needs_refine_int32'] = roi_needs_refine blobs_out['refine_ratio'] = refine_ratio
def add_semantic_segms_blobs(blobs, roidb, im_scale, batch_idx, data): """ Add Semantic Segmentation Net specidfic blobs to the input blob dictionary. Draw all gt polygons to the label """ num_cls = cfg.MODEL.NUM_CLASSES rescale_factor = cfg.SEMANTIC_NET.RESCALE_FACTOR polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] # Define size variables inp_h, inp_w = data.shape[2], data.shape[3] out_h, out_w = int(inp_h * rescale_factor), int(inp_w * rescale_factor) if polys_gt_inds.shape[0] > 0: # class label for the mask gt_class_labels = roidb['gt_classes'][polys_gt_inds] semantic_segms = blob_utils.zeros((num_cls, out_h, out_w), int32=True) # narrow scale and size scale = im_scale * rescale_factor im_h, im_w = roidb['height'], roidb['width'] im_label_h, im_label_w = int(im_h * scale), int(im_w * scale) # add for i in range(polys_gt_inds.shape[0]): cls_label = gt_class_labels[i] poly_gt = polys_gt[i] # Rasterize the portion of the polygon mask within the given fg roi # to an im_label_h x im_label_w binary image mask = segm_utils.polys_to_mask_scaled(poly_gt, im_h, im_w, scale) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary semantic_segms[cls_label, 0:im_label_h, 0:im_label_w] = np.maximum( semantic_segms[cls_label, 0:im_label_h, 0:im_label_w], mask, dtype=np.int32) semantic_segms = np.reshape(semantic_segms, (1, num_cls * out_h * out_w)) else: # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). # We give it an -1's blob (ignore label) semantic_segms = -blob_utils.ones( (1, num_cls * out_h * out_w), int32=True) blobs['semantic_segms_int32'] = semantic_segms blobs['img_rois'] = np.array([batch_idx, 0, 0, inp_w - 1, inp_h - 1], dtype=np.float32)[np.newaxis, :]
def forward(self, inputs, outputs): data = inputs[0].data keypoint_probs = inputs[1].data keypoint_rois = inputs[2].data # output indicator resolution M = self.resolution up_scale = self.up_scale num_rois = keypoint_rois.shape[0] num_keypoints = keypoint_probs.shape[1] # first expand the keypoint rois height, width = data.shape[2], data.shape[3] pad_rois = box_utils.expand_boxes(keypoint_rois[:, 1:5], up_scale) pad_rois = box_utils.clip_boxes_to_image(pad_rois, height, width) # get keypoint predictions and their probs # output shape is (#rois, 3, #keypoints) and 3 means (x, y, prob) pred_rois = keypoint_utils.probs_to_keypoints(keypoint_probs, keypoint_rois) # map keypoint position to the pad_rois # output shape is (#rois, #keypoints), locations flatter out locations_on_pad_rois, _ = keypoint_utils.keypoints_to_heatmap_labels( pred_rois, pad_rois, M ) locations_on_pad_rois = locations_on_pad_rois.astype(np.int32) # and now generate keypoint indicators keypoint_indicators = blob_utils.zeros((num_rois, num_keypoints, M**2)) for i in range(num_rois): locations = locations_on_pad_rois[i] # shape (#keypoints, ) for k in range(num_keypoints): keypoint_indicators[i, k, locations[k]] = pred_rois[i, 2, k] # and reshape to 4 dimension keypoint_indicators = keypoint_indicators.reshape( (num_rois, num_keypoints, M, M) ) outputs[0].reshape(keypoint_indicators.shape) outputs[0].data[...] = keypoint_indicators
def im_classify_bbox(model, im, box_proposals, timers=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) timers['im_detect_bbox'].tic() inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) # do something to create the rois sampled_rois = box_proposals * inputs['im_info'][0, 2] repeated_batch_idx = blob_utils.zeros((sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) inputs['rois'] = sampled_rois if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS: _add_multilevel_rois(inputs) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.net.Proto().name) if cfg.MODEL.TYPE == 'region_classification': cls_prob = core.ScopedName('cls_prob') elif cfg.MODEL.TYPE == 'region_memory': cls_prob = core.ScopedName('final/cls_prob') else: raise NotImplementedError cls_scores = workspace.FetchBlob(cls_prob) timers['im_detect_bbox'].toc() # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() timers['misc_bbox'].toc() return cls_scores
def dp_annot_process(ann, heatmap_size, crop_res, center, scale, IsFlipped): bb_xywh = np.array(ann['bbox']) bbox_gt = [bb_xywh[0], bb_xywh[1], bb_xywh[0] + bb_xywh[2], bb_xywh[1] + bb_xywh[3]] # Cropped Upper left point crop_ul = np.array(transform([1, 1], center, scale, [crop_res] * 2, invert=1)) - 1 # Cropped Bottom right point crop_br = np.array( transform([crop_res + 1] * 2, center, scale, [crop_res] * 2, invert=1)) - 1 bbox_crop = np.concatenate([crop_ul, crop_br]) dp_dict = {} M = heatmap_size # Create blobs for densepose supervision. ################################################## The mask All_labels = blob_utils.zeros(M ** 2, int32=True) All_Weights = blob_utils.zeros(M ** 2, int32=True) ################################################# The points X_points = blob_utils.zeros(196, int32=False) Y_points = blob_utils.zeros(196, int32=False) Ind_points = blob_utils.zeros(196, int32=True) I_points = blob_utils.zeros(196, int32=True) U_points = blob_utils.zeros(196, int32=False) V_points = blob_utils.zeros(196, int32=False) Uv_point_weights = blob_utils.zeros(196, int32=False) ################################################# Ilabel = segm_utils.GetDensePoseMask(ann['dp_masks']) # GT_I = np.array(ann['dp_I']) GT_U = np.array(ann['dp_U']) GT_V = np.array(ann['dp_V']) GT_x = np.array(ann['dp_x']) GT_y = np.array(ann['dp_y']) GT_weights = np.ones(GT_I.shape).astype(np.float32) # ## Do the flipping of the densepose annotation ! if IsFlipped: GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel = DP.get_symmetric_densepose(GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel) # roi_fg = bbox_crop roi_gt = bbox_gt # x1 = roi_fg[0]; x2 = roi_fg[2] y1 = roi_fg[1]; y2 = roi_fg[3] # x1_source = roi_gt[0]; x2_source = roi_gt[2] y1_source = roi_gt[1]; y2_source = roi_gt[3] # x_targets = (np.arange(x1, x2, (x2 - x1) / float(M)) - x1_source) * (255. / (x2_source - x1_source)) y_targets = (np.arange(y1, y2, (y2 - y1) / float(M)) - y1_source) * (255. / (y2_source - y1_source)) # x_targets = x_targets[0:M] ## Strangely sometimes it can be M+1, so make sure size is OK! y_targets = y_targets[0:M] # [X_targets, Y_targets] = np.meshgrid(x_targets, y_targets) New_Index = cv2.remap(Ilabel, X_targets.astype(np.float32), Y_targets.astype(np.float32), interpolation=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=(0)) # # All_L = np.zeros(New_Index.shape) All_W = np.ones(New_Index.shape) # All_L = New_Index # gt_length_x = x2_source - x1_source gt_length_y = y2_source - y1_source # GT_y = ((GT_y / 255. * gt_length_y) + y1_source - y1) * (float(M) / (y2 - y1)) GT_x = ((GT_x / 255. * gt_length_x) + x1_source - x1) * (float(M) / (x2 - x1)) # GT_I[GT_y < 0] = 0 GT_I[GT_y > (M - 1)] = 0 GT_I[GT_x < 0] = 0 GT_I[GT_x > (M - 1)] = 0 # points_inside = GT_I > 0 GT_U = GT_U[points_inside] GT_V = GT_V[points_inside] GT_x = GT_x[points_inside] GT_y = GT_y[points_inside] GT_weights = GT_weights[points_inside] GT_I = GT_I[points_inside] # X_points[0:len(GT_x)] = GT_x Y_points[0:len(GT_y)] = GT_y # Ind_points[i, 0:len(GT_I)] = i I_points[0:len(GT_I)] = GT_I U_points[0:len(GT_U)] = GT_U V_points[0:len(GT_V)] = GT_V Uv_point_weights[0:len(GT_weights)] = GT_weights All_labels[:] = np.reshape(All_L.astype(np.int32), M ** 2) All_Weights[:] = np.reshape(All_W.astype(np.int32), M ** 2) # K = cfg.BODY_UV_RCNN.NUM_PATCHES K = 24 # print(K) # U_points = np.tile(U_points, [K + 1]) V_points = np.tile(V_points, [K + 1]) Uv_Weight_Points = np.zeros(U_points.shape) # for jjj in range(1, K + 1): Uv_Weight_Points[jjj * I_points.shape[0]: (jjj + 1) * I_points.shape[0]] = (I_points == jjj).astype( np.float32) # Uv_Weight_Points[:, jjj * I_points.shape[1]: (jjj + 1) * I_points.shape[1]] = (I_points == jjj).astype( # np.float32) ## dp_dict['body_uv_ann_labels'] = np.array(All_labels).astype(np.int32) dp_dict['body_uv_ann_weights'] = np.array(All_Weights).astype(np.float32) # ########################## dp_dict['body_uv_X_points'] = X_points.astype(np.float32) dp_dict['body_uv_Y_points'] = Y_points.astype(np.float32) dp_dict['body_uv_Ind_points'] = Ind_points.astype(np.float32) dp_dict['body_uv_I_points'] = I_points.astype(np.float32) dp_dict['body_uv_U_points'] = U_points.astype( np.float32) #### VERY IMPORTANT : These are switched here : dp_dict['body_uv_V_points'] = V_points.astype(np.float32) dp_dict['body_uv_point_weights'] = Uv_Weight_Points.astype(np.float32) return dp_dict
def _sample_rois(roidb, im_scale, batch_idx): """Generate a random sample of RoIs comprising foreground and background examples. """ rois_per_image = int(cfg.TRAIN.BATCH_SIZE_PER_IM) fg_rois_per_image = int(np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)) max_overlaps = roidb['max_overlaps'] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size) # Sample foreground regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Label is the class each RoI has max overlap with sampled_labels = roidb['max_classes'][keep_inds] sampled_labels[fg_rois_per_this_image:] = 0 # Label bg RoIs with class 0 sampled_boxes = roidb['boxes'][keep_inds] if 'bbox_targets' not in roidb: gt_inds = np.where(roidb['gt_classes'] > 0)[0] gt_boxes = roidb['boxes'][gt_inds, :] if not len(gt_boxes): num_bbox_reg_classes = cfg.MODEL.NUM_CLASSES clss = sampled_labels bbox_targets = blob_utils.zeros( (clss.size, 4 * num_bbox_reg_classes)) bbox_inside_weights = blob_utils.zeros(bbox_targets.shape) else: gt_assignments = gt_inds[roidb['box_to_gt_ind_map'][keep_inds]] bbox_targets = _compute_targets(sampled_boxes, gt_boxes[gt_assignments, :], sampled_labels) bbox_targets, bbox_inside_weights = _expand_bbox_targets( bbox_targets) else: bbox_targets, bbox_inside_weights = _expand_bbox_targets( roidb['bbox_targets'][keep_inds, :]) bbox_outside_weights = np.array(bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype) # Scale rois and format as (batch_idx, x1, y1, x2, y2) sampled_rois = sampled_boxes * im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) # Base Fast R-CNN blobs blob_dict = dict(labels_int32=sampled_labels.astype(np.int32, copy=False), rois=sampled_rois, bbox_targets=bbox_targets, bbox_inside_weights=bbox_inside_weights, bbox_outside_weights=bbox_outside_weights) # Optionally add Mask R-CNN blobs if cfg.MODEL.MASK_ON: roi_data.mask_rcnn.add_mask_rcnn_blobs(blob_dict, sampled_boxes, roidb, im_scale, batch_idx) # Optionally add Keypoint R-CNN blobs if cfg.MODEL.KEYPOINTS_ON: roi_data.keypoint_rcnn.add_keypoint_rcnn_blobs(blob_dict, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx) return blob_dict
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION polys_gt_inds = np.where( (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0) )[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) # Keep only a subset of classes (set A in the paper) for mask training if cfg.TRAIN.MRCNN_FILTER_LABELS: keep_label_set = set(cfg.TRAIN.MRCNN_LABELS_TO_KEEP) labels_int32 = blobs['labels_int32'] labels_int32_keep = np.array( [(l if l in keep_label_set else 0) for l in labels_int32], dtype=labels_int32.dtype) else: labels_int32_keep = blobs['labels_int32'] fg_inds = np.where(labels_int32_keep > 0)[0] roi_has_mask = labels_int32_keep.copy() roi_has_mask[roi_has_mask > 0] = 1 if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False) ) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks
def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] for lvl in range(k_min, k_max + 1): field_stride = 2.**lvl anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors(field_stride, anchor_sizes, anchor_aspect_ratios) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = data_utils.get_field_of_anchors(cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS) all_anchors = foa.field_of_anchors for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] # Added to ignore anchors that have overlap with crowd area ignore_inds = np.where(entry['is_crowd'][gt_inds] == 1)[0] if len(ignore_inds) == 0: ignore_inds = None gt_rois = entry['boxes'][gt_inds, :] * scale # TODO(rbg): gt_boxes is poorly named; # should be something like 'gt_rois_info' gt_boxes = blob_utils.zeros((len(gt_inds), 6)) gt_boxes[:, 0] = im_i # batch inds gt_boxes[:, 1:5] = gt_rois gt_boxes[:, 5] = entry['gt_classes'][gt_inds] im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # Add RPN targets if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper rpn_blobs, vis_labels, vis_anchors = _get_rpn_blobs( im_height, im_width, foas, all_anchors, gt_rois, ignore_inds) for i, lvl in enumerate(range(k_min, k_max + 1)): for k, v in rpn_blobs[i].items(): blobs[k + '_fpn' + str(lvl)].append(v) else: # Classical RPN, applied to a single feature level rpn_blobs, vis_labels, vis_anchors = _get_rpn_blobs( im_height, im_width, [foa], all_anchors, gt_rois, ignore_inds) for k, v in rpn_blobs.items(): blobs[k].append(v) if cfg.TRAIN.VIS_ANCHOR: im = blobs['data'][0, :, :, :].squeeze() + np.array( cfg.PIXEL_MEANS).transpose((2, 0, 1)) idx = np.where(vis_labels == 1)[0] anchor_bboxes = vis_anchors[idx, :] if not osp.exists(cfg.TRAIN.VIS_ANCHOR_DIR): os.makedirs(cfg.TRAIN.VIS_ANCHOR_DIR) save_path = osp.join( cfg.TRAIN.VIS_ANCHOR_DIR, osp.splitext(os.path.basename(entry['image']))[0]) vis2d_utils.draw_pred_and_gt_tensor(im, gt_rois, save_path, anchor_bboxes) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) if cfg.LESION.USE_POSITION or cfg.LESION.POSITION_RCNN or cfg.LESION.SHALLOW_POSITION or cfg.LESION.MM_POS: valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints', 'z_position' ] else: valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] # blobs['roidb'] = blob_utils.serialize(minimal_roidb) blobs['roidb'] = minimal_roidb # Always return valid=True, since RPN minibatches are valid by design return True
def test_net( weights_file, dataset_name, proposal_file, output_dir, ind_range=None, gpu_id=0 ): """Run inference on all images in a dataset or over an index range of images in a dataset using a single GPU. """ roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset( dataset_name, proposal_file, ind_range ) model = initialize_model_from_cfg(weights_file, gpu_id=gpu_id) num_images = len(roidb) num_classes = cfg.MODEL.NUM_CLASSES all_scores = empty_results(num_images) timers = defaultdict(Timer) for i, entry in enumerate(roidb): # just get the ground truth boxes box_proposals = entry['boxes'][entry['gt_classes'] > 0] if len(box_proposals) == 0: cls_scores_i = blob_utils.zeros((0, cfg.MODEL.NUM_CLASSES)) extend_results(i, all_scores, cls_scores_i) continue im = cv2.imread(entry['image']) with c2_utils.NamedCudaScope(gpu_id): cls_scores_i = im_classify_bbox( model, im, box_proposals, timers ) extend_results(i, all_scores, cls_scores_i) if i % 10 == 0: # Reduce log file size ave_total_time = np.sum([t.average_time for t in timers.values()]) eta_seconds = ave_total_time * (num_images - i - 1) eta = str(datetime.timedelta(seconds=int(eta_seconds))) det_time = ( timers['im_classify_bbox'].average_time ) misc_time = ( timers['misc_bbox'].average_time ) logger.info( ( 'im_detect: range [{:d}, {:d}] of {:d}: ' '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})' ).format( start_ind + 1, end_ind, total_num_images, start_ind + i + 1, start_ind + num_images, det_time, misc_time, eta ) ) # if cfg.VIS: # im_name = os.path.splitext(os.path.basename(entry['image']))[0] # vis_utils.vis_one_image( # im[:, :, ::-1], # '{:d}_{:s}'.format(i, im_name), # os.path.join(output_dir, 'vis'), # cls_boxes_i, # segms=cls_segms_i, # keypoints=cls_keyps_i, # thresh=cfg.VIS_TH, # box_alpha=0.8, # dataset=dataset, # show_class=True # ) cfg_yaml = yaml.dump(cfg) if ind_range is not None: det_name = cfg.CFG_FILE + '_range_%s_%s.pkl' % tuple(ind_range) else: det_name = 'detections.pkl' det_file = os.path.join(output_dir, det_name) save_object( dict( all_scores=all_scores, cfg=cfg_yaml ), det_file ) logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file))) return all_scores
def add_refine_local_mask_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx, data): """Add RefineNet Mask specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.REFINENET.RESOLUTION up_scale = cfg.REFINENET.UP_SCALE polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] gt_classes = roidb['gt_classes'][polys_gt_inds] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 # Define size variables inp_h, inp_w = data.shape[2], data.shape[3] pad_img_h, pad_img_w = inp_h / im_scale, inp_w / im_scale if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # Expand the foreground rois by a factor of up_scale and # clip by the padded image boundary pad_rois_fg = box_utils.expand_boxes(rois_fg, up_scale) pad_rois_fg = box_utils.clip_boxes_to_image(pad_rois_fg, pad_img_h, pad_img_w) if cfg.REFINENET.ONLY_USE_CROWDED_SAMPLES: # Only use crowded samples to train the RefineNet THRES = cfg.REFINENET.OVERLAP_THRESHOLD for i in range(rois_fg.shape[0]): overlap = overlaps_bbfg_bbpolys[i] if np.sum(overlap > THRES) > 1: # if has multiple instances overlapped, use it for training fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] pad_roi_fg = pad_rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box( poly_gt, pad_roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # Only one instance, then set label to be -1 (ignored) masks[i, :] = -1 mask_class_labels[i] = 0 elif cfg.REFINENET.ASSIGN_LARGER_WEIGHT_FOR_CROWDED_SAMPLES: loss_weights = blob_utils.ones((rois_fg.shape[0], )) for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] pad_roi_fg = pad_rois_fg[i] class_label = mask_class_labels[i] # Rasterize the portion of the polygon mask within the given # fg roi to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, pad_roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) # And now determine the weight for each roi. If any instance # that is of the same class as the RoI, then we expect it to # be a hard sample and assigns a larger weight for this RoI for j in range(len(polys_gt)): if j == fg_polys_ind: continue if gt_classes[ j] == class_label: # only same class is valid mask = segm_utils.polys_to_mask_wrt_box( polys_gt[j], pad_roi_fg, M) # and check if has anypart fall inside the bbox is_inside_bbox = (np.sum(mask) > 0) if is_inside_bbox: loss_weights[i] = cfg.REFINENET.WEIGHT_LOSS_CROWDED break # early stop else: # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] pad_roi_fg = pad_rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, pad_roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # pad_rois_fg is actually one background roi, but that's ok because ... pad_rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) pad_rois_fg = (pad_rois_fg.astype(np.float32)) * im_scale repeated_batch_idx = batch_idx * blob_utils.ones((pad_rois_fg.shape[0], 1)) pad_rois_fg = np.hstack((repeated_batch_idx, pad_rois_fg)).astype(np.int32) # Update blobs dict with Refine-Net blobs blobs['refined_mask_rois'] = pad_rois_fg blobs['roi_has_refined_mask_int32'] = roi_has_mask blobs['refined_masks_int32'] = masks if cfg.REFINENET.ASSIGN_LARGER_WEIGHT_FOR_CROWDED_SAMPLES: blobs['loss_weights'] = loss_weights
def add_charmask_rcnn_blobs(blobs, sampled_boxes, gt_boxes, gt_inds, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. is_e2e = cfg.MRCNN.IS_E2E M_HEIGHT = cfg.MRCNN.RESOLUTION_H M_WIDTH = cfg.MRCNN.RESOLUTION_W mask_rois_per_this_image = cfg.MRCNN.MASK_BATCH_SIZE_PER_IM polys_gt_inds = np.where( (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0) )[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] chars_gts = roidb['charboxes'] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) if DEBUG: img_path = roidb['image'] img = Image.open(img_path) # img = blobs['data'][0] # img = img.transpose((1,2,0)) # img += cfg.PIXEL_MEANS # img = img.astype(np.int8) # img = Image.fromarray(img) if is_e2e: fg_inds = np.where(blobs['labels_int32'] > 0)[0] if fg_inds.size > mask_rois_per_this_image: fg_inds = npr.choice( fg_inds, size=mask_rois_per_this_image, replace=False ) roi_has_mask = np.ones((fg_inds.shape[0], ), dtype=np.int32) if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], 2, M_HEIGHT*M_WIDTH), int32=True) mask_weights = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH), dtype=np.float32) char_boxes = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH, 4), dtype=np.float32) char_boxes_inside_weight = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH, 4), dtype=np.float32) char_boxes_outside_weight = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH, 4), dtype=np.float32) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False) ) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] indexes_rec_rois_gt_chars = np.where(chars_gts[:, 9] == fg_polys_ind) chars_gt = chars_gts[indexes_rec_rois_gt_chars, :9] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M_HEIGHT x M_WIDTH binary image mask, mask_weight, char_box, char_box_inside_weight = segm_utils.polys_to_mask_wrt_box_rec(chars_gt.copy(), poly_gt, roi_fg.copy(), M_HEIGHT, M_WIDTH, weight_wh=cfg.MRCNN.WEIGHT_WH) if DEBUG: draw = ImageDraw.Draw(img) draw.rectangle([(roi_fg[0],roi_fg[1]), (roi_fg[2],roi_fg[3])]) img.save('./tests/image.jpg') _visu_global_map(mask[0,:,:].copy(), './tests/proposals_visu_global.jpg') _visu_char_map(mask[1,:,:].copy(), './tests/proposals_visu_char.jpg') _visu_char_box(char_box, char_box_inside_weight, './tests/char_box.jpg', M_HEIGHT, M_WIDTH) masks[i, 0, :] = np.reshape(mask[0,:,:], M_HEIGHT*M_WIDTH) masks[i, 1, :] = np.reshape(mask[1,:,:], M_HEIGHT*M_WIDTH) mask_weights[i, :] = np.reshape(mask_weight, M_HEIGHT*M_WIDTH) char_boxes[i, :, :] = np.reshape(char_box, (M_HEIGHT*M_WIDTH, 4)) char_boxes_inside_weight[i, :, :] = np.reshape(char_box_inside_weight, (M_HEIGHT*M_WIDTH, 4)) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, 2, M_HEIGHT*M_WIDTH), int32=True) mask_weights = -blob_utils.ones((1, 2, M_HEIGHT*M_WIDTH), int32=True) char_boxes_inside_weight = np.zeros(1, M_HEIGHT*M_WIDTH, 4, dtype=np.float32) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 else: fg_inds = gt_inds roi_has_mask = np.ones((fg_inds.shape[0], ), dtype=np.int32) if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = np.ones((fg_inds.shape[0], ), dtype=np.int32) masks = blob_utils.zeros((fg_inds.shape[0], 2, M_HEIGHT*M_WIDTH), int32=True) char_boxes = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH, 4), dtype=np.float32) char_boxes_inside_weight = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH, 4), dtype=np.float32) char_boxes_outside_weight = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH, 4), dtype=np.float32) # mask_weights = blob_utils.zeros((fg_inds.shape[0], 2, M_HEIGHT*M_WIDTH), int32=True) rois_fg = gt_boxes # print(gt_boxes.shape[0]) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_inds[i] poly_gt = polys_gt[fg_polys_ind] indexes_rec_rois_gt_chars = np.where(chars_gts[:, 9] == fg_polys_ind) chars_gt = chars_gts[indexes_rec_rois_gt_chars, :9] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M_HEIGHT x M_WIDTH binary image mask, char_box, char_box_inside_weight = segm_utils.polys_to_mask_wrt_box_rec(chars_gt, poly_gt, roi_fg, M_HEIGHT, M_WIDTH, weight_wh=cfg.MRCNN.WEIGHT_WH) if DEBUG: _visu_char_box(char_box, char_box_inside_weight, './tests/char_box.jpg', M_HEIGHT, M_WIDTH) mask = np.array(mask, dtype=np.int32) # Ensure it's binary # mask_weight = np.array(mask_weight, dtype=np.int32) # Ensure it's binary masks[i, 0, :] = np.reshape(mask[0,:,:], M_HEIGHT*M_WIDTH) masks[i, 1, :] = np.reshape(mask[1,:,:], M_HEIGHT*M_WIDTH) char_boxes[i, :, :] = np.reshape(char_box, (M_HEIGHT*M_WIDTH, 4)) char_boxes_inside_weight[i, :, :] = np.reshape(char_box_inside_weight, (M_HEIGHT*M_WIDTH, 4)) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, 2, M_HEIGHT*M_WIDTH), int32=True) mask_weights = -blob_utils.ones((1, 2, M_HEIGHT*M_WIDTH), int32=True) char_boxes = -np.ones(1, M_HEIGHT*M_WIDTH, 4, dtype=np.int32) char_boxes_inside_weight = -np.zeros(1, M_HEIGHT*M_WIDTH, 4, dtype=np.float32) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) char_boxes_outside_weight = np.array( char_boxes_inside_weight > 0, dtype=char_boxes_inside_weight.dtype ) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_global_int32'] = masks[:, 0, :] blobs['masks_char_int32'] = masks[:, 1, :].reshape((-1, M_HEIGHT, M_WIDTH)) blobs['masks_char_weight'] = mask_weights blobs['char_bbox_targets'] = char_boxes.reshape((-1,4)) blobs['char_bbox_inside_weights'] = char_boxes_inside_weight.reshape((-1,4)) blobs['char_bbox_outside_weights'] = char_boxes_outside_weight.reshape((-1,4))
def add_boundary_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Boundary specific blobs to the input blob dictionary.""" # Prepare the boundary targets by associating one gt boundary to each training roi # that has a fg (non-bg) class label. M = cfg.BOUNDARY.RESOLUTION polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_boundary = blobs['labels_int32'].copy() roi_has_boundary[roi_has_boundary > 0] = 1 if fg_inds.shape[0] > 0: # Class labels for the foreground rois boundary_class_labels = blobs['labels_int32'][fg_inds] boundarys = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the boundary with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon boundary within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary boundary = get_boundary(mask) boundarys[i, :] = np.reshape(boundary, M**2) else: # If there are no fg boundarys (it does happen) # The network cannot handle empty blobs, so we must provide a boundary # We simply take the first bg roi, given it an all -1's boundary (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) boundarys = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) boundary_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a boundary roi_has_boundary[0] = 1 if cfg.BOUNDARY.CLS_SPECIFIC_MASK: boundarys = _expand_to_class_specific_boundary_targets( boundarys, boundary_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['boundary_rois'] = rois_fg blobs['roi_has_boundary_int32'] = roi_has_boundary blobs['boundary_int32'] = boundarys
def add_keypoint_rcnn_blobs_sigmoid( blobs, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx ): """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary.""" # Note: gt_inds must match how they're computed in # datasets.json_dataset._merge_proposal_boxes_into_roidb gt_inds = np.where(roidb['gt_classes'] > 0)[0] max_overlaps = roidb['max_overlaps'] gt_keypoints = roidb['gt_keypoints'] M = cfg.KRCNN.HEATMAP_SIZE ind_kp = gt_inds[roidb['box_to_gt_ind_map']] within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes']) vis_kp = gt_keypoints[ind_kp, 2, :] > 0 is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0 kp_fg_inds = np.where( np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible) )[0] kp_fg_rois_per_this_image = np.minimum(fg_rois_per_image, kp_fg_inds.size) if kp_fg_inds.size > kp_fg_rois_per_this_image: kp_fg_inds = np.random.choice( kp_fg_inds, size=kp_fg_rois_per_this_image, replace=False ) if kp_fg_inds.shape[0] > 0: sampled_fg_rois = roidb['boxes'][kp_fg_inds] box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds] num_keypoints = gt_keypoints.shape[2] sampled_keypoints = -np.ones( (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints), dtype=gt_keypoints.dtype ) for ii in range(len(sampled_fg_rois)): ind = box_to_gt_ind_map[ii] if ind >= 0: sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :] assert np.sum(sampled_keypoints[ii, 2, :]) > 0 heats, weights = keypoint_utils.keypoints_to_sigmoid_heatmap_labels( sampled_keypoints, sampled_fg_rois, M=cfg.KRCNN.HEATMAP_SIZE ) shape = sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS heats = heats.reshape((shape, M**2)) weights = weights.reshape((shape, 1)) else:# If there are no fg keypoint rois (it does happen) # The network cannot handle empty blobs, so we must provide a heatmap # We simply take the first bg roi, given it an all zero heatmap, and # set its weights to zero (ignore label). roi_inds = np.where(roidb['gt_classes'] == 0)[0] # sampled_fg_rois is actually one random roi, but that's ok because ... sampled_fg_rois = roidb['boxes'][roi_inds[0]].reshape((1, -1)) # We give it an 0's blob heats = (-1) * blob_utils.ones((1 * cfg.KRCNN.NUM_KEYPOINTS, M**2)) # We set weights to 0 (ignore label) weights = blob_utils.zeros((1 * cfg.KRCNN.NUM_KEYPOINTS, 1)) sampled_fg_rois *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_fg_rois.shape[0], 1) ) sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois)) blobs['keypoint_rois'] = sampled_fg_rois blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False) blobs['keypoint_weights'] = weights # Since in this function we may random sample a subset of bbox as the roi, # we need to make sure it's the same subset for the refined_keypoint_rois, # so we pass out the inds for the subset too. blobs['keypoint_fg_inds'] = kp_fg_inds.astype(np.int32, copy=False)
def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] for lvl in range(k_min, k_max + 1): #field_stride = 2.**lvl anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) field_stride = min(16., 2.**lvl) #anchor_sizes = (min(128., cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min)), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors( field_stride, anchor_sizes, anchor_aspect_ratios ) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = data_utils.get_field_of_anchors( cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS ) all_anchors = foa.field_of_anchors for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where( (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0) )[0] gt_rois = entry['boxes'][gt_inds, :] * scale # TODO(rbg): gt_boxes is poorly named; # should be something like 'gt_rois_info' gt_boxes = blob_utils.zeros((len(gt_inds), 6)) gt_boxes[:, 0] = im_i # batch inds gt_boxes[:, 1:5] = gt_rois gt_boxes[:, 5] = entry['gt_classes'][gt_inds] im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # Add RPN targets if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper rpn_blobs = _get_rpn_blobs( im_height, im_width, foas, all_anchors, gt_rois ) for i, lvl in enumerate(range(k_min, k_max + 1)): for k, v in rpn_blobs[i].items(): blobs[k + '_fpn' + str(lvl)].append(v) else: # Classical RPN, applied to a single feature level rpn_blobs = _get_rpn_blobs( im_height, im_width, [foa], all_anchors, gt_rois ) for k, v in rpn_blobs.items(): blobs[k].append(v) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] blobs['roidb'] = blob_utils.serialize(minimal_roidb) # Always return valid=True, since RPN minibatches are valid by design return True
def add_refine_global_mask_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx, data): """Add RefineNet Mask specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. dst_scale = cfg.REFINENET.SPATIAL_SCALE polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 # Define size variables inp_h, inp_w = data.shape[2], data.shape[3] out_h, out_w = int(inp_h * dst_scale), int(inp_w * dst_scale) if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], out_h, out_w), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # narrow scale and size scale = im_scale * dst_scale im_h, im_w = roidb['height'], roidb['width'] im_label_h, im_label_w = int(im_h * scale), int(im_w * scale) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an im_label_h x im_label_w binary image mask = segm_utils.polys_to_mask_scaled(poly_gt, im_h, im_w, scale) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, 0:im_label_h, 0:im_label_w] = mask masks = np.reshape(masks, (-1, out_h * out_w)) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, out_h * out_w), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Refine-Net blobs blobs['refined_mask_rois'] = rois_fg blobs['roi_has_refined_mask_int32'] = roi_has_mask blobs['refined_masks_int32'] = masks