def _sample_output(self, all_rois, gt_boxes, im_scale, gt_masks, mask_info, init_state0): overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Sample foreground indexes fg_inds = np.where(max_overlaps >= cfg.TRAIN.BBOX_THRESH)[0] bg_inds = np.where(max_overlaps < cfg.TRAIN.BBOX_THRESH)[0] keep_inds = np.append(fg_inds, bg_inds).astype(int) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[len(fg_inds):] = 0 rois = all_rois[keep_inds] #init_states = init_state0[keep_inds] init_states = init_state0 bbox_target_data = bbox_compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], normalize=True) bbox_target_data = np.hstack((labels[:, np.newaxis], bbox_target_data))\ .astype(np.float32, copy=False) bbox_targets, bbox_inside_weights = get_bbox_regression_label( bbox_target_data, 21) scaled_rois = rois[:, 1:5] / float(im_scale) scaled_gt_boxes = gt_boxes[:, :4] / float(im_scale) pos_masks = np.zeros((len(keep_inds), 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) top_mask_info = np.zeros((len(keep_inds), 12)) top_mask_info[len(fg_inds):, :] = -1 for i, val in enumerate(fg_inds): gt_box = scaled_gt_boxes[gt_assignment[val]] gt_box = np.around(gt_box).astype(int) ex_box = np.around(scaled_rois[i]).astype(int) gt_mask = gt_masks[gt_assignment[val]] gt_mask_info = mask_info[gt_assignment[val]] gt_mask = gt_mask[0:gt_mask_info[0], 0:gt_mask_info[1]] # regression targets is the intersection of bounding box and gt mask ex_mask = intersect_mask(ex_box, gt_box, gt_mask) pos_masks[i, ...] = ex_mask top_mask_info[i, 0] = gt_assignment[val] top_mask_info[i, 1] = gt_mask_info[0] top_mask_info[i, 2] = gt_mask_info[1] top_mask_info[i, 3] = labels[i] top_mask_info[i, 4:8] = ex_box top_mask_info[i, 8:12] = gt_box return labels, rois, fg_inds, keep_inds, pos_masks, top_mask_info, bbox_targets, bbox_inside_weights, init_states
def _sample_output(self, all_rois, gt_boxes, im_scale, gt_masks, mask_info): overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Sample foreground indexes fg_inds = np.where(max_overlaps >= cfg.TRAIN.BBOX_THRESH)[0] bg_inds = np.where(max_overlaps < cfg.TRAIN.BBOX_THRESH)[0] keep_inds = np.append(fg_inds, bg_inds).astype(int) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[len(fg_inds):] = 0 rois = all_rois[keep_inds] bbox_target_data = bbox_compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], normalize=True) bbox_target_data = np.hstack((labels[:, np.newaxis], bbox_target_data))\ .astype(np.float32, copy=False) bbox_targets, bbox_inside_weights = get_bbox_regression_label( bbox_target_data, 21) scaled_rois = rois[:, 1:5] / float(im_scale) scaled_gt_boxes = gt_boxes[:, :4] / float(im_scale) pos_masks = np.zeros((len(keep_inds), 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) top_mask_info = np.zeros((len(keep_inds), 12)) top_mask_info[len(fg_inds):, :] = -1 for i, val in enumerate(fg_inds): gt_box = scaled_gt_boxes[gt_assignment[val]] gt_box = np.around(gt_box).astype(int) ex_box = np.around(scaled_rois[i]).astype(int) gt_mask = gt_masks[gt_assignment[val]] gt_mask_info = mask_info[gt_assignment[val]] gt_mask = gt_mask[0:gt_mask_info[0], 0:gt_mask_info[1]] # regression targets is the intersection of bounding box and gt mask ex_mask = intersect_mask(ex_box, gt_box, gt_mask) pos_masks[i, ...] = ex_mask top_mask_info[i, 0] = gt_assignment[val] top_mask_info[i, 1] = gt_mask_info[0] top_mask_info[i, 2] = gt_mask_info[1] top_mask_info[i, 3] = labels[i] top_mask_info[i, 4:8] = ex_box top_mask_info[i, 8:12] = gt_box return labels, rois, fg_inds, keep_inds, pos_masks, top_mask_info, bbox_targets, bbox_inside_weights
def _sample_rois(all_rois, gt_boxes, rois_per_image, num_classes, gt_masks, im_scale, mask_info): """ Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Sample foreground indexes fg_inds = [] for i in xrange(len(cfg.TRAIN.FG_FRACTION)): cur_inds = np.where((max_overlaps >= cfg.TRAIN.FG_THRESH_LO[i]) & (max_overlaps <= cfg.TRAIN.FG_THRESH_HI[i]))[0] cur_rois_this_image = min(cur_inds.size, np.round(rois_per_image * cfg.TRAIN.FG_FRACTION[i])) if cur_inds.size > 0: cur_inds = npr.choice(cur_inds, size=cur_rois_this_image, replace=False) fg_inds = np.hstack((fg_inds, cur_inds)) fg_inds = np.unique(fg_inds) fg_rois_per_image = fg_inds.size # Sample background indexes according to number of foreground bg_rois_per_this_image = rois_per_image - fg_rois_per_image bg_inds = [] for i in xrange(len(cfg.TRAIN.BG_FRACTION)): cur_inds = np.where((max_overlaps >= cfg.TRAIN.BG_THRESH_LO[i]) & (max_overlaps <= cfg.TRAIN.BG_THRESH_HI[i]))[0] cur_rois_this_image = min(cur_inds.size, np.round(bg_rois_per_this_image * cfg.TRAIN.BG_FRACTION[i])) if cur_inds.size > 0: cur_inds = npr.choice(cur_inds, size=cur_rois_this_image, replace=False) bg_inds = np.hstack((bg_inds, cur_inds)) bg_inds = np.unique(bg_inds) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds).astype(int) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[fg_rois_per_image:] = 0 rois = all_rois[keep_inds] bbox_target_data = bbox_compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], normalize=True) bbox_target_data = np.hstack((labels[:, np.newaxis], bbox_target_data))\ .astype(np.float32, copy=False) bbox_targets, bbox_inside_weights = get_bbox_regression_label( bbox_target_data, num_classes) bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32) blobs = { 'rois': rois, 'labels': labels, 'bbox_targets': bbox_targets, 'bbox_inside_weights': bbox_inside_weights, 'bbox_outside_weights': bbox_outside_weights } if cfg.MNC_MODE: scaled_rois = rois[:, 1:5] / float(im_scale) # map to original image space scaled_gt_boxes = gt_boxes[:, :4] / float(im_scale) pos_masks = np.zeros((len(keep_inds), 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) top_mask_info = np.zeros((len(keep_inds), 12)) top_mask_info[len(fg_inds):, :] = -1 for i, val in enumerate(fg_inds): gt_box = scaled_gt_boxes[gt_assignment[val]] gt_box = np.around(gt_box).astype(int) ex_box = np.around(scaled_rois[i]).astype(int) gt_mask = gt_masks[gt_assignment[val]] gt_mask_info = mask_info[gt_assignment[val]] gt_mask = gt_mask[0:gt_mask_info[0], 0:gt_mask_info[1]] # calculate mask regression targets # (intersection of bounding box and gt mask) ex_mask = intersect_mask(ex_box, gt_box, gt_mask) pos_masks[i, ...] = ex_mask top_mask_info[i, 0] = gt_assignment[val] top_mask_info[i, 1] = gt_mask_info[0] top_mask_info[i, 2] = gt_mask_info[1] top_mask_info[i, 3] = labels[i] top_mask_info[i, 4:8] = ex_box top_mask_info[i, 8:12] = gt_box mask_weight = np.zeros((rois.shape[0], 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) # only assign box-level foreground as positive mask regression mask_weight[0:len(fg_inds), :, :, :] = 1 blobs['mask_targets'] = pos_masks blobs['mask_weight'] = mask_weight blobs['gt_masks_info'] = top_mask_info return blobs, fg_inds, bg_inds, keep_inds
def process_roidb(file_start, file_end, db): for cnt in xrange(file_start, file_end): f = file_list[cnt] full_file = os.path.join(input_dir, f) output_cache = os.path.join(output_dir, f.split('.')[0] + '.mat') timer_tic = time.time() if os.path.exists(output_cache): continue mcg_mat = sio.loadmat(full_file) mcg_mask_label = mcg_mat['labels'] mcg_superpixels = mcg_mat['superpixels'] num_proposal = len(mcg_mask_label) mcg_boxes = np.zeros((num_proposal, 4)) mcg_masks = np.zeros((num_proposal, mask_size, mask_size), dtype=np.bool) for ind_proposal in xrange(num_proposal): label = mcg_mask_label[ind_proposal][0][0] proposal = np.in1d(mcg_superpixels, label).reshape(mcg_superpixels.shape) [r, c] = np.where(proposal == 1) y1 = np.min(r) x1 = np.min(c) y2 = np.max(r) x2 = np.max(c) box = np.array([x1, y1, x2, y2]) proposal = proposal[y1:y2+1, x1:x2+1] proposal = cv2.resize(proposal.astype(np.float), (mask_size, mask_size), interpolation=cv2.INTER_NEAREST) mcg_masks[ind_proposal, :, :] = proposal mcg_boxes[ind_proposal, :] = box if top_k != -1: mcg_boxes = mcg_boxes[:top_k, :] mcg_masks = mcg_masks[:top_k, :] if db == 'val': # if we prepare validation data, we only need its masks and boxes roidb = { 'masks': (mcg_masks >= cfg.BINARIZE_THRESH).astype(bool), 'boxes': mcg_boxes } sio.savemat(output_cache, roidb) use_time = time.time() - timer_tic print '%d/%d use time %f' % (cnt, len(file_list), use_time) else: # Otherwise we need to prepare other information like overlaps num_mcg = mcg_boxes.shape[0] gt_roidb = gt_roidbs[cnt] gt_maskdb = gt_maskdbs[cnt] gt_boxes = gt_roidb['boxes'] gt_masks = gt_maskdb['gt_masks'] gt_classes = gt_roidb['gt_classes'] num_gt = gt_boxes.shape[0] num_all = num_gt + num_mcg # define output structure det_overlaps = np.zeros((num_all, 1)) seg_overlaps = np.zeros((num_all, 1)) seg_assignment = np.zeros((num_all, 1)) mask_targets = np.zeros((num_all, mask_size, mask_size)) # ------------------------------------------------------ all_boxes = np.vstack((gt_boxes[:, :4], mcg_boxes)).astype(int) all_masks = np.zeros((num_all, mask_size, mask_size)) for i in xrange(num_gt): all_masks[i, :, :] = (cv2.resize(gt_masks[i].astype(np.float), (mask_size, mask_size))) assert all_masks[num_gt:, :, :].shape == mcg_masks.shape all_masks[num_gt:, :, :] = mcg_masks # record bounding box overlaps cur_overlap = bbox_overlaps(all_boxes.astype(np.float), gt_boxes.astype(np.float)) seg_assignment = cur_overlap.argmax(axis=1) det_overlaps = cur_overlap.max(axis=1) seg_assignment[det_overlaps == 0] = -1 # record mask region overlaps seg_overlaps[:num_gt] = 1.0 for i in xrange(num_gt, num_all): cur_mask = cv2.resize(all_masks[i, :, :].astype(np.float), (all_boxes[i, 2] - all_boxes[i, 0] + 1, all_boxes[i, 3] - all_boxes[i, 1] + 1)) >= cfg.BINARIZE_THRESH for mask_ind in xrange(len(gt_masks)): gt_mask = gt_masks[mask_ind] gt_roi = gt_roidb['boxes'][mask_ind] cur_ov = mask_overlap(all_boxes[i, :], gt_roi, cur_mask, gt_mask) seg_overlaps[i] = max(seg_overlaps[i], cur_ov) output_label = np.zeros((num_all, 1)) for i in xrange(num_all): if seg_assignment[i] == -1: continue cur_ind = seg_assignment[i] output_label[i] = gt_classes[seg_assignment[i]] mask_targets[i, :, :] = intersect_mask(all_boxes[i, :], gt_roidb['boxes'][cur_ind], gt_masks[cur_ind]) # Some of the array need to insert a new axis to be consistent of savemat method roidb = { 'masks': (all_masks >= cfg.BINARIZE_THRESH).astype(bool), 'boxes': all_boxes, 'det_overlap': det_overlaps[:, np.newaxis], 'seg_overlap': seg_overlaps, 'mask_targets': (mask_targets >= cfg.BINARIZE_THRESH).astype(bool), 'gt_classes': gt_classes[:, np.newaxis], 'output_label': output_label, 'gt_assignment': seg_assignment[:, np.newaxis], 'Flip': False } sio.savemat(output_cache, roidb) use_time = time.time() - timer_tic print '%d/%d use time %f' % (cnt, len(file_list), use_time)
def process_roidb(file_start, file_end, db): for cnt in xrange(file_start, file_end): f = file_list[cnt] full_file = os.path.join(input_dir, f) output_cache = os.path.join(output_dir, f.split('.')[0] + '.mat') timer_tic = time.time() if os.path.exists(output_cache): continue mcg_mat = sio.loadmat(full_file) mcg_mask_label = mcg_mat['labels'] mcg_superpixels = mcg_mat['superpixels'] num_proposal = len(mcg_mask_label) mcg_boxes = np.zeros((num_proposal, 4)) mcg_masks = np.zeros((num_proposal, mask_size, mask_size), dtype=np.bool) for ind_proposal in xrange(num_proposal): label = mcg_mask_label[ind_proposal][0][0] proposal = np.in1d(mcg_superpixels, label).reshape(mcg_superpixels.shape) [r, c] = np.where(proposal == 1) y1 = np.min(r) x1 = np.min(c) y2 = np.max(r) x2 = np.max(c) box = np.array([x1, y1, x2, y2]) proposal = proposal[y1:y2 + 1, x1:x2 + 1] proposal = cv2.resize(proposal.astype(np.float), (mask_size, mask_size), interpolation=cv2.INTER_NEAREST) mcg_masks[ind_proposal, :, :] = proposal mcg_boxes[ind_proposal, :] = box if top_k != -1: mcg_boxes = mcg_boxes[:top_k, :] mcg_masks = mcg_masks[:top_k, :] if db == 'val': # if we prepare validation data, we only need its masks and boxes roidb = { 'masks': (mcg_masks >= cfg.BINARIZE_THRESH).astype(bool), 'boxes': mcg_boxes } sio.savemat(output_cache, roidb) use_time = time.time() - timer_tic print '%d/%d use time %f' % (cnt, len(file_list), use_time) else: # Otherwise we need to prepare other information like overlaps num_mcg = mcg_boxes.shape[0] gt_roidb = gt_roidbs[cnt] gt_maskdb = gt_maskdbs[cnt] gt_boxes = gt_roidb['boxes'] gt_masks = gt_maskdb['gt_masks'] gt_classes = gt_roidb['gt_classes'] num_gt = gt_boxes.shape[0] num_all = num_gt + num_mcg # define output structure det_overlaps = np.zeros((num_all, 1)) seg_overlaps = np.zeros((num_all, 1)) seg_assignment = np.zeros((num_all, 1)) mask_targets = np.zeros((num_all, mask_size, mask_size)) # ------------------------------------------------------ all_boxes = np.vstack((gt_boxes[:, :4], mcg_boxes)).astype(int) all_masks = np.zeros((num_all, mask_size, mask_size)) for i in xrange(num_gt): all_masks[i, :, :] = (cv2.resize(gt_masks[i].astype(np.float), (mask_size, mask_size))) assert all_masks[num_gt:, :, :].shape == mcg_masks.shape all_masks[num_gt:, :, :] = mcg_masks # record bounding box overlaps cur_overlap = bbox_overlaps(all_boxes.astype(np.float), gt_boxes.astype(np.float)) seg_assignment = cur_overlap.argmax(axis=1) det_overlaps = cur_overlap.max(axis=1) seg_assignment[det_overlaps == 0] = -1 # record mask region overlaps seg_overlaps[:num_gt] = 1.0 for i in xrange(num_gt, num_all): cur_mask = cv2.resize( all_masks[i, :, :].astype(np.float), (all_boxes[i, 2] - all_boxes[i, 0] + 1, all_boxes[i, 3] - all_boxes[i, 1] + 1)) >= cfg.BINARIZE_THRESH for mask_ind in xrange(len(gt_masks)): gt_mask = gt_masks[mask_ind] gt_roi = gt_roidb['boxes'][mask_ind] cur_ov = mask_overlap(all_boxes[i, :], gt_roi, cur_mask, gt_mask) seg_overlaps[i] = max(seg_overlaps[i], cur_ov) output_label = np.zeros((num_all, 1)) for i in xrange(num_all): if seg_assignment[i] == -1: continue cur_ind = seg_assignment[i] output_label[i] = gt_classes[seg_assignment[i]] mask_targets[i, :, :] = intersect_mask( all_boxes[i, :], gt_roidb['boxes'][cur_ind], gt_masks[cur_ind]) # Some of the array need to insert a new axis to be consistent of savemat method roidb = { 'masks': (all_masks >= cfg.BINARIZE_THRESH).astype(bool), 'boxes': all_boxes, 'det_overlap': det_overlaps[:, np.newaxis], 'seg_overlap': seg_overlaps, 'mask_targets': (mask_targets >= cfg.BINARIZE_THRESH).astype(bool), 'gt_classes': gt_classes[:, np.newaxis], 'output_label': output_label, 'gt_assignment': seg_assignment[:, np.newaxis], 'Flip': False } sio.savemat(output_cache, roidb) use_time = time.time() - timer_tic print '%d/%d use time %f' % (cnt, len(file_list), use_time)
def _sample_rois(all_rois, gt_boxes, rois_per_image, num_classes, gt_masks, im_scale, mask_info): """ Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Sample foreground indexes fg_inds = [] for i in xrange(len(cfg.TRAIN.FG_FRACTION)): cur_inds = np.where((max_overlaps >= cfg.TRAIN.FG_THRESH_LO[i]) & (max_overlaps <= cfg.TRAIN.FG_THRESH_HI[i]))[0] cur_rois_this_image = min( cur_inds.size, np.round(rois_per_image * cfg.TRAIN.FG_FRACTION[i])) if cur_inds.size > 0: cur_inds = npr.choice(cur_inds, size=cur_rois_this_image, replace=False) fg_inds = np.hstack((fg_inds, cur_inds)) fg_inds = np.unique(fg_inds) fg_rois_per_image = fg_inds.size # Sample background indexes according to number of foreground bg_rois_per_this_image = rois_per_image - fg_rois_per_image bg_inds = [] for i in xrange(len(cfg.TRAIN.BG_FRACTION)): cur_inds = np.where((max_overlaps >= cfg.TRAIN.BG_THRESH_LO[i]) & (max_overlaps <= cfg.TRAIN.BG_THRESH_HI[i]))[0] cur_rois_this_image = min( cur_inds.size, np.round(bg_rois_per_this_image * cfg.TRAIN.BG_FRACTION[i])) if cur_inds.size > 0: cur_inds = npr.choice(cur_inds, size=cur_rois_this_image, replace=False) bg_inds = np.hstack((bg_inds, cur_inds)) bg_inds = np.unique(bg_inds) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds).astype(int) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[fg_rois_per_image:] = 0 rois = all_rois[keep_inds] bbox_target_data = bbox_compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], normalize=True) bbox_target_data = np.hstack((labels[:, np.newaxis], bbox_target_data))\ .astype(np.float32, copy=False) bbox_targets, bbox_inside_weights = get_bbox_regression_label( bbox_target_data, num_classes) bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32) blobs = { 'rois': rois, 'labels': labels, 'bbox_targets': bbox_targets, 'bbox_inside_weights': bbox_inside_weights, 'bbox_outside_weights': bbox_outside_weights } if cfg.MNC_MODE: scaled_rois = rois[:, 1:5] / float(im_scale) # map to original image space scaled_gt_boxes = gt_boxes[:, :4] / float(im_scale) pos_masks = np.zeros((len(keep_inds), 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) top_mask_info = np.zeros((len(keep_inds), 12)) top_mask_info[len(fg_inds):, :] = -1 for i, val in enumerate(fg_inds): gt_box = scaled_gt_boxes[gt_assignment[val]] gt_box = np.around(gt_box).astype(int) ex_box = np.around(scaled_rois[i]).astype(int) gt_mask = gt_masks[gt_assignment[val]] gt_mask_info = mask_info[gt_assignment[val]] gt_mask = gt_mask[0:gt_mask_info[0], 0:gt_mask_info[1]] # calculate mask regression targets # (intersection of bounding box and gt mask) ex_mask = intersect_mask(ex_box, gt_box, gt_mask) pos_masks[i, ...] = ex_mask top_mask_info[i, 0] = gt_assignment[val] top_mask_info[i, 1] = gt_mask_info[0] top_mask_info[i, 2] = gt_mask_info[1] top_mask_info[i, 3] = labels[i] top_mask_info[i, 4:8] = ex_box top_mask_info[i, 8:12] = gt_box mask_weight = np.zeros( (rois.shape[0], 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) # only assign box-level foreground as positive mask regression mask_weight[0:len(fg_inds), :, :, :] = 1 blobs['mask_targets'] = pos_masks blobs['mask_weight'] = mask_weight blobs['gt_masks_info'] = top_mask_info return blobs, fg_inds, bg_inds, keep_inds