def prepare_data(roidb, aug, aug_type="default", is_unlabled=False): fname, boxes, klass, is_crowd, img_id = roidb["file_name"], roidb[ "boxes"], roidb["class"], roidb["is_crowd"], roidb["image_id"] assert boxes.ndim == 2 and boxes.shape[1] == 4, boxes.shape boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype("float32") height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return float32 boxes!" if not self.cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height ret = {} if not is_unlabled and aug_type == "default": tfms = aug.get_transform(im) im = tfms.apply_image(im) points = box_to_point8(boxes) points = tfms.apply_coords(points) boxes = point8_to_box(points) else: # It is strong augmentation # Load box informaiton from disk if is_unlabled: pseudo_target = self.get_pseudo_gt(img_id) # has no pseudo target found assert pseudo_target is not None boxes = pseudo_target["boxes"] klass = pseudo_target["labels"].astype(np.int32) assert len( boxes) > 0, "boxes after thresholding becomes to zero" is_crowd = np.array( [0] * len(klass)) # do not ahve crowd annotations else: # it is labeled data, use boxes loaded from roidb, klass, is_crowd pass if aug_type == "default": # use default augmentations, only happend for unlabeled data tfms = self.aug.get_transform(im) im = tfms.apply_image(im) points = box_to_point8(boxes) points = tfms.apply_coords(points) boxes = point8_to_box(points) # is_crowd = np.array([0]*len(klass)) # do not ahve crowd annotations else: # use strong augmentation with extra packages # resize first tfms = self.resize.get_transform(im) im = tfms.apply_image(im) points = box_to_point8(boxes) points = tfms.apply_coords(points) boxes = point8_to_box(points) boxes_backup = boxes.copy() h, w = im.shape[:2] # strong augmentation try: assert len( boxes) > 0, "boxes after resizing becomes to zero" assert np.sum( np_area(boxes)) > 0, "boxes are all zero area!" bbs = array_to_bb(boxes) images_aug, bbs_aug, _ = aug(images=[im], bounding_boxes=[bbs], n_real_box=len(bbs)) # # convert to gt boxes array boxes = bb_to_array(bbs_aug[0]) boxes[:, 0] = np.clip(boxes[:, 0], 0, w) boxes[:, 1] = np.clip(boxes[:, 1], 0, h) boxes[:, 2] = np.clip(boxes[:, 2], 0, w) boxes[:, 3] = np.clip(boxes[:, 3], 0, h) # after affine, some boxes can be zero area. Let's remove them and their corresponding info boxes, mask = remove_empty_boxes(boxes) klass = klass[mask] is_crowd = is_crowd[mask] assert len( klass ) > 0, "Empty boxes and kclass after removing empty ones" assert klass.max() <= self.cfg.DATA.NUM_CATEGORY, \ "Invalid category {}!".format(klass.max()) assert np.min( np_area(boxes)) > 0, "Some boxes have zero area!" im = images_aug[0] except Exception as e: # if augmentation makes the boxes become empty, we switch to # non-augmented one # logger.warn("Error catched " + str(e) + # "\n Use non-augmented data.") boxes = boxes_backup ret["image"] = im # Add rpn data to dataflow: if self.cfg.MODE_FPN: multilevel_anchor_inputs = self.get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret["anchor_labels_lvl{}".format(i + 2)] = anchor_labels ret["anchor_boxes_lvl{}".format(i + 2)] = anchor_boxes else: ret["anchor_labels"], ret[ "anchor_boxes"] = self.get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret["gt_boxes"] = boxes ret["gt_labels"] = klass if is_unlabled: ret["proposals_boxes"] = pseudo_target["proposals_boxes"] # ret["proposals_scores"] = pseudo_target['proposals_scores'] return ret
def remove_empty_boxes(boxes): areas = np_area(boxes) mask = areas > 0 return boxes[mask], mask
def __call__(self, roidb): # fname, boxes, klass, is_crowd = roidb["file_name"], roidb[ "boxes"], roidb["class"], roidb["is_crowd"] assert boxes.ndim == 2 and boxes.shape[1] == 4, boxes.shape boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype("float32") height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return float32 boxes!" if not self.cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height ret = {} tfms = self.aug_weak.get_transform(im) im = tfms.apply_image(im) points = box_to_point8(boxes) points = tfms.apply_coords(points) boxes = point8_to_box(points) h, w = im.shape[:2] if self.aug_type != "default": boxes_backup = boxes.copy() try: assert len(boxes) > 0, "boxes after resizing becomes to zero" assert np.sum(np_area(boxes)) > 0, "boxes are all zero area!" bbs = array_to_bb(boxes) images_aug, bbs_aug, _ = self.aug_strong(images=[im], bounding_boxes=[bbs], n_real_box=len(bbs)) # convert to gt boxes array boxes = bb_to_array(bbs_aug[0]) boxes[:, 0] = np.clip(boxes[:, 0], 0, w) boxes[:, 1] = np.clip(boxes[:, 1], 0, h) boxes[:, 2] = np.clip(boxes[:, 2], 0, w) boxes[:, 3] = np.clip(boxes[:, 3], 0, h) # after affine, some boxes can be zero area. Let's remove them and their corresponding info boxes, mask = remove_empty_boxes(boxes) klass = klass[mask] assert len( klass ) > 0, "Empty boxes and kclass after removing empty ones" is_crowd = np.array( [0] * len(klass)) # do not ahve crowd annotations assert klass.max() <= self.cfg.DATA.NUM_CATEGORY, \ "Invalid category {}!".format(klass.max()) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" im = images_aug[0] except Exception as e: logger.warn("Error catched " + str(e) + "\n Use non-augmented data.") boxes = boxes_backup ret["image"] = im try: # Add rpn data to dataflow: if self.cfg.MODE_FPN: multilevel_anchor_inputs = self.get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret["anchor_labels_lvl{}".format(i + 2)] = anchor_labels ret["anchor_boxes_lvl{}".format(i + 2)] = anchor_boxes else: ret["anchor_labels"], ret[ "anchor_boxes"] = self.get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret["gt_boxes"] = boxes ret["gt_labels"] = klass except Exception as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), "warn") return None return ret