def _gen_blobs(entry, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" M = cfg.MRCNN.RESOLUTION selected_inds = np.where(entry['gt_classes'] > 0)[0] polys = [entry['segms'][i] for i in selected_inds] # Class labels and bounding boxes for the polys mask_class_labels = entry['gt_classes'][selected_inds] mask_rois = np.array(entry['boxes'][selected_inds], dtype='float32') # add mask polys masks = blob_utils.zeros((selected_inds.shape[0], M**2), int32=True) for i in range(len(polys)): # Rasterize the polygon mask to an M x M class labels image poly_gt = polys[i] mask_roi = mask_rois[i] mask_class_label = mask_class_labels[i] mask = segm_utils.polys_to_mask_wrt_box(poly_gt, mask_roi, M) mask = mask_class_label * np.array(mask > 0, dtype=np.int32) masks[i, :] = np.reshape(mask, M**2) blob_dict = {} blob_dict['masks_int32'] = masks return blob_dict
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION polys_gt_inds = np.where( (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0) )[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) # Keep only a subset of classes (set A in the paper) for mask training if cfg.TRAIN.MRCNN_FILTER_LABELS: keep_label_set = set(cfg.TRAIN.MRCNN_LABELS_TO_KEEP) labels_int32 = blobs['labels_int32'] labels_int32_keep = np.array( [(l if l in keep_label_set else 0) for l in labels_int32], dtype=labels_int32.dtype) else: labels_int32_keep = blobs['labels_int32'] fg_inds = np.where(labels_int32_keep > 0)[0] roi_has_mask = labels_int32_keep.copy() roi_has_mask[roi_has_mask > 0] = 1 if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False) ) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks
def add_refine_local_mask_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx, data): """Add RefineNet Mask specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.REFINENET.RESOLUTION up_scale = cfg.REFINENET.UP_SCALE polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] gt_classes = roidb['gt_classes'][polys_gt_inds] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 # Define size variables inp_h, inp_w = data.shape[2], data.shape[3] pad_img_h, pad_img_w = inp_h / im_scale, inp_w / im_scale if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # Expand the foreground rois by a factor of up_scale and # clip by the padded image boundary pad_rois_fg = box_utils.expand_boxes(rois_fg, up_scale) pad_rois_fg = box_utils.clip_boxes_to_image(pad_rois_fg, pad_img_h, pad_img_w) if cfg.REFINENET.ONLY_USE_CROWDED_SAMPLES: # Only use crowded samples to train the RefineNet THRES = cfg.REFINENET.OVERLAP_THRESHOLD for i in range(rois_fg.shape[0]): overlap = overlaps_bbfg_bbpolys[i] if np.sum(overlap > THRES) > 1: # if has multiple instances overlapped, use it for training fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] pad_roi_fg = pad_rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box( poly_gt, pad_roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # Only one instance, then set label to be -1 (ignored) masks[i, :] = -1 mask_class_labels[i] = 0 elif cfg.REFINENET.ASSIGN_LARGER_WEIGHT_FOR_CROWDED_SAMPLES: loss_weights = blob_utils.ones((rois_fg.shape[0], )) for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] pad_roi_fg = pad_rois_fg[i] class_label = mask_class_labels[i] # Rasterize the portion of the polygon mask within the given # fg roi to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, pad_roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) # And now determine the weight for each roi. If any instance # that is of the same class as the RoI, then we expect it to # be a hard sample and assigns a larger weight for this RoI for j in range(len(polys_gt)): if j == fg_polys_ind: continue if gt_classes[ j] == class_label: # only same class is valid mask = segm_utils.polys_to_mask_wrt_box( polys_gt[j], pad_roi_fg, M) # and check if has anypart fall inside the bbox is_inside_bbox = (np.sum(mask) > 0) if is_inside_bbox: loss_weights[i] = cfg.REFINENET.WEIGHT_LOSS_CROWDED break # early stop else: # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] pad_roi_fg = pad_rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, pad_roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # pad_rois_fg is actually one background roi, but that's ok because ... pad_rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) pad_rois_fg = (pad_rois_fg.astype(np.float32)) * im_scale repeated_batch_idx = batch_idx * blob_utils.ones((pad_rois_fg.shape[0], 1)) pad_rois_fg = np.hstack((repeated_batch_idx, pad_rois_fg)).astype(np.int32) # Update blobs dict with Refine-Net blobs blobs['refined_mask_rois'] = pad_rois_fg blobs['roi_has_refined_mask_int32'] = roi_has_mask blobs['refined_masks_int32'] = masks if cfg.REFINENET.ASSIGN_LARGER_WEIGHT_FOR_CROWDED_SAMPLES: blobs['loss_weights'] = loss_weights
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) # boxes_from_polys = [roidb['boxes'][i] for i in polys_gt_inds] fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks
def add_refined_mask_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add RefineNet specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. dst_scale = cfg.REFINENET.SPATIAL_SCALE polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 # Define global bbox global_width = floor(roidb['width'] * scale_factor[1]) global_height = floor(roidb['height'] * scale_factor[0]) global_bbox = np.array((0, 0, global_width - 1, global_height - 1), dtype=np.float32) if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) global_masks = blob_utils.zeros((fg_inds.shape[0], G_M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) # Generate global mask global_mask = segm_utils.polys_to_mask_wrt_box( poly_gt, global_bbox, G_M) global_mask = np.array(global_mask > 0, dtype=np.int32) global_masks[i, :] = np.reshape(global_mask, G_M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) global_masks = -blob_utils.ones((1, G_M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) global_masks = _expand_to_class_specific_mask_targets( global_masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks blobs['global_masks_int32'] = global_masks
def add_boundary_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Boundary specific blobs to the input blob dictionary.""" # Prepare the boundary targets by associating one gt boundary to each training roi # that has a fg (non-bg) class label. M = cfg.BOUNDARY.RESOLUTION polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_boundary = blobs['labels_int32'].copy() roi_has_boundary[roi_has_boundary > 0] = 1 if fg_inds.shape[0] > 0: # Class labels for the foreground rois boundary_class_labels = blobs['labels_int32'][fg_inds] boundarys = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the boundary with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon boundary within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary boundary = get_boundary(mask) boundarys[i, :] = np.reshape(boundary, M**2) else: # If there are no fg boundarys (it does happen) # The network cannot handle empty blobs, so we must provide a boundary # We simply take the first bg roi, given it an all -1's boundary (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) boundarys = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) boundary_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a boundary roi_has_boundary[0] = 1 if cfg.BOUNDARY.CLS_SPECIFIC_MASK: boundarys = _expand_to_class_specific_boundary_targets( boundarys, boundary_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['boundary_rois'] = rois_fg blobs['roi_has_boundary_int32'] = roi_has_boundary blobs['boundary_int32'] = boundarys