def get_proposal_gt(self, points_batch): rpn_match, rpn_bbox_deltas = build_rpn_targets(image_shape=None, anchors, gt_class_ids, gt_boxes, config) for pts in points_batch: pass return None, None
def display_rpn_targets(): # Generate RPN trainig targets resized_image, image_meta, gt_class_ids, gt_bboxes, gt_masks = \ modellib.load_image_gt(dataset, config, image_id, use_mini_mask=False) image_info = dataset.image_info[image_id] # Note: image_info 的 id 是 image 的 filename print("Image ID: {}.{} ({}) {}".format(image_info["source"], image_info["id"], image_id, dataset.image_reference(image_id))) # get_anchors 会把 pixel coordinates 赋值到 self.a normalized_anchors = model.get_anchors(resized_image.shape) anchors = model.anchors # target_rpn_match is 1 for positive anchors, -1 for negative anchors # and 0 for neutral anchors. target_rpn_match, target_rpn_deltas = modellib.build_rpn_targets( anchors, gt_class_ids, gt_bboxes, model.config) log("target_rpn_match", target_rpn_match) log("target_rpn_deltas", target_rpn_deltas) positive_anchor_ix = np.where(target_rpn_match[:] == 1)[0] negative_anchor_ix = np.where(target_rpn_match[:] == -1)[0] neutral_anchor_ix = np.where(target_rpn_match[:] == 0)[0] positive_anchors = model.anchors[positive_anchor_ix] negative_anchors = model.anchors[negative_anchor_ix] neutral_anchors = model.anchors[neutral_anchor_ix] log("positive_anchors", positive_anchors) log("negative_anchors", negative_anchors) log("neutral anchors", neutral_anchors) # Apply refinement deltas to positive anchors refined_anchors = utils.apply_box_deltas( positive_anchors, target_rpn_deltas[:positive_anchors.shape[0]] * model.config.RPN_BBOX_STD_DEV) log( "refined_anchors", refined_anchors, ) # Display positive anchors before refinement (dotted) and # after refinement (solid). visualize.draw_boxes(resized_image, boxes=positive_anchors, refined_boxes=refined_anchors, ax=get_ax()) plt.show()
# # The Region Proposal Network (RPN) runs a lightweight binary classifier on a lot of boxes (anchors) over the image and returns object/no-object scores. Anchors with high *objectness* score (positive anchors) are passed to the stage two to be classified. # # Often, even positive anchors don't cover objects fully. So the RPN also regresses a refinement (a delta in location and size) to be applied to the anchors to shift it and resize it a bit to the correct boundaries of the object. #%% [markdown] # ### 1.a RPN Targets # # The RPN targets are the training values for the RPN. To generate the targets, we start with a grid of anchors that cover the full image at different scales, and then we compute the IoU of the anchors with ground truth object. Positive anchors are those that have an IoU >= 0.7 with any ground truth object, and negative anchors are those that don't cover any object by more than 0.3 IoU. Anchors in between (i.e. cover an object by IoU >= 0.3 but < 0.7) are considered neutral and excluded from training. # # To train the RPN regressor, we also compute the shift and resizing needed to make the anchor cover the ground truth object completely. #%% # Generate RPN trainig targets # target_rpn_match is 1 for positive anchors, -1 for negative anchors # and 0 for neutral anchors. target_rpn_match, target_rpn_bbox = modellib.build_rpn_targets( image.shape, model.anchors, gt_class_id, gt_bbox, model.config) log("target_rpn_match", target_rpn_match) log("target_rpn_bbox", target_rpn_bbox) positive_anchor_ix = np.where(target_rpn_match[:] == 1)[0] negative_anchor_ix = np.where(target_rpn_match[:] == -1)[0] neutral_anchor_ix = np.where(target_rpn_match[:] == 0)[0] positive_anchors = model.anchors[positive_anchor_ix] negative_anchors = model.anchors[negative_anchor_ix] neutral_anchors = model.anchors[neutral_anchor_ix] log("positive_anchors", positive_anchors) log("negative_anchors", negative_anchors) log("neutral anchors", neutral_anchors) # Apply refinement deltas to positive anchors refined_anchors = utils.apply_box_deltas(
def siamese_data_generator(dataset, config, shuffle=True, augmentation=imgaug.augmenters.Fliplr(0.5), random_rois=0, batch_size=1, detection_targets=False, diverse=0): """A generator that returns images and corresponding target class ids, bounding box deltas, and masks. dataset: The Dataset object to pick data from config: The model config object shuffle: If True, shuffles the samples before every epoch augment: If True, applies image augmentation to images (currently only horizontal flips are supported) random_rois: If > 0 then generate proposals to be used to train the network classifier and mask heads. Useful if training the Mask RCNN part without the RPN. batch_size: How many images to return in each call detection_targets: If True, generate detection targets (class IDs, bbox deltas, and masks). Typically for debugging or visualizations because in trainig detection targets are generated by DetectionTargetLayer. diverse: Float in [0,1] indicatiing probability to draw a target from any random class instead of one from the image classes Returns a Python generator. Upon calling next() on it, the generator returns two lists, inputs and outputs. The containtes of the lists differs depending on the received arguments: inputs list: - images: [batch, H, W, C] - image_meta: [batch, size of image meta] - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral) - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas. - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width are those of the image unless use_mini_mask is True, in which case they are defined in MINI_MASK_SHAPE. outputs list: Usually empty in regular training. But if detection_targets is True then the outputs list contains target class_ids, bbox deltas, and masks. """ b = 0 # batch item index image_index = -1 image_ids = np.copy(dataset.image_ids) error_count = 0 # Anchors # [anchor_count, (y1, x1, y2, x2)] backbone_shapes = modellib.compute_backbone_shapes(config, config.IMAGE_SHAPE) anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, backbone_shapes, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE) # Keras requires a generator to run indefinately. while True: try: # Increment index to pick next image. Shuffle if at the start of an epoch. image_index = (image_index + 1) % len(image_ids) if shuffle and image_index == 0: np.random.shuffle(image_ids) # Get GT bounding boxes and masks for image. image_id = image_ids[image_index] image, image_meta, gt_class_ids, gt_boxes, gt_masks = \ modellib.load_image_gt(dataset, config, image_id, augmentation=augmentation, use_mini_mask=config.USE_MINI_MASK) # Replace class ids with foreground/background info if binary # class option is chosen # if binary_classes == True: # gt_class_ids = np.minimum(gt_class_ids, 1) # Skip images that have no instances. This can happen in cases # where we train on a subset of classes and the image doesn't # have any of the classes we care about. if not np.any(gt_class_ids > 0): continue # print(gt_class_ids) # Use only positive class_ids categories = np.unique(gt_class_ids) _idx = categories > 0 categories = categories[_idx] # Use only active classes active_categories = [] for c in categories: if any(c == dataset.ACTIVE_CLASSES): active_categories.append(c) # Skiop image if it contains no instance of any active class if not np.any(np.array(active_categories) > 0): continue # Randomly select category category = np.random.choice(active_categories) # Generate siamese target crop target = get_one_target(category, dataset, config, augmentation=augmentation) if target is None: # fix until a better ADE20K metadata is built print('skip target') continue # print(target_class_id) target_class_id = category target_class_ids = np.array([target_class_id]) idx = gt_class_ids == target_class_id siamese_class_ids = idx.astype('int8') # print(idx) # print(gt_boxes.shape, gt_masks.shape) siamese_class_ids = siamese_class_ids[idx] gt_class_ids = gt_class_ids[idx] gt_boxes = gt_boxes[idx, :] gt_masks = gt_masks[:, :, idx] image_meta = image_meta[:14] # print(gt_boxes.shape, gt_masks.shape) # RPN Targets rpn_match, rpn_bbox = modellib.build_rpn_targets( image.shape, anchors, gt_class_ids, gt_boxes, config) # Mask R-CNN Targets if random_rois: rpn_rois = modellib.generate_random_rois( image.shape, random_rois, gt_class_ids, gt_boxes) if detection_targets: rois, mrcnn_class_ids, mrcnn_bbox, mrcnn_mask =\ modellib.build_detection_targets( rpn_rois, gt_class_ids, gt_boxes, gt_masks, config) # Init batch arrays if b == 0: batch_image_meta = np.zeros((batch_size, ) + image_meta.shape, dtype=image_meta.dtype) batch_rpn_match = np.zeros([batch_size, anchors.shape[0], 1], dtype=rpn_match.dtype) batch_rpn_bbox = np.zeros( [batch_size, config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4], dtype=rpn_bbox.dtype) batch_images = np.zeros((batch_size, ) + image.shape, dtype=np.float32) batch_gt_class_ids = np.zeros( (batch_size, config.MAX_GT_INSTANCES), dtype=np.int32) batch_gt_boxes = np.zeros( (batch_size, config.MAX_GT_INSTANCES, 4), dtype=np.int32) batch_targets = np.zeros((batch_size, ) + target.shape, dtype=np.float32) # batch_target_class_ids = np.zeros( # (batch_size, config.MAX_TARGET_INSTANCES), dtype=np.int32) if config.USE_MINI_MASK: batch_gt_masks = np.zeros( (batch_size, config.MINI_MASK_SHAPE[0], config.MINI_MASK_SHAPE[1], config.MAX_GT_INSTANCES)) else: batch_gt_masks = np.zeros( (batch_size, image.shape[0], image.shape[1], config.MAX_GT_INSTANCES)) if random_rois: batch_rpn_rois = np.zeros( (batch_size, rpn_rois.shape[0], 4), dtype=rpn_rois.dtype) if detection_targets: batch_rois = np.zeros((batch_size, ) + rois.shape, dtype=rois.dtype) batch_mrcnn_class_ids = np.zeros( (batch_size, ) + mrcnn_class_ids.shape, dtype=mrcnn_class_ids.dtype) batch_mrcnn_bbox = np.zeros( (batch_size, ) + mrcnn_bbox.shape, dtype=mrcnn_bbox.dtype) batch_mrcnn_mask = np.zeros( (batch_size, ) + mrcnn_mask.shape, dtype=mrcnn_mask.dtype) # If more instances than fits in the array, sub-sample from them. if gt_boxes.shape[0] > config.MAX_GT_INSTANCES: ids = np.random.choice(np.arange(gt_boxes.shape[0]), config.MAX_GT_INSTANCES, replace=False) gt_class_ids = gt_class_ids[ids] siamese_class_ids = siamese_class_ids[ids] gt_boxes = gt_boxes[ids] gt_masks = gt_masks[:, :, ids] # Add to batch batch_image_meta[b] = image_meta batch_rpn_match[b] = rpn_match[:, np.newaxis] batch_rpn_bbox[b] = rpn_bbox batch_images[b] = modellib.mold_image(image.astype(np.float32), config) batch_targets[b] = modellib.mold_image(target.astype(np.float32), config) batch_gt_class_ids[ b, :siamese_class_ids.shape[0]] = siamese_class_ids # batch_target_class_ids[b, :target_class_ids.shape[0]] = target_class_ids batch_gt_boxes[b, :gt_boxes.shape[0]] = gt_boxes batch_gt_masks[b, :, :, :gt_masks.shape[-1]] = gt_masks if random_rois: batch_rpn_rois[b] = rpn_rois if detection_targets: batch_rois[b] = rois batch_mrcnn_class_ids[b] = mrcnn_class_ids batch_mrcnn_bbox[b] = mrcnn_bbox batch_mrcnn_mask[b] = mrcnn_mask b += 1 # Batch full? if b >= batch_size: inputs = [ batch_images, batch_image_meta, batch_targets, batch_rpn_match, batch_rpn_bbox, batch_gt_class_ids, batch_gt_boxes, batch_gt_masks ] outputs = [] if random_rois: inputs.extend([batch_rpn_rois]) if detection_targets: inputs.extend([batch_rois]) # Keras requires that output and targets have the same number of dimensions batch_mrcnn_class_ids = np.expand_dims( batch_mrcnn_class_ids, -1) outputs.extend([ batch_mrcnn_class_ids, batch_mrcnn_bbox, batch_mrcnn_mask ]) yield inputs, outputs # start a new batch b = 0 except (GeneratorExit, KeyboardInterrupt): raise except: # Log it and skip the image modellib.logging.exception("Error processing image {}".format( dataset.image_info[image_id])) error_count += 1 if error_count > 5: raise