def test_bbox_dataset_to_prediction_roundtrip(self): """Simulate the process of reading a ground-truth box from a dataset, make predictions from proposals, convert the predictions back to the dataset format, and then use the COCO API to compute IoU overlap between the gt box and the predictions. These should have IoU of 1. """ weights = (5, 5, 10, 10) # 1/ "read" a box from a dataset in the default (x1, y1, w, h) format gt_xywh_box = [10, 20, 100, 150] # 2/ convert it to our internal (x1, y1, x2, y2) format gt_xyxy_box = box_utils.xywh_to_xyxy(gt_xywh_box) # 3/ consider nearby proposal boxes prop_xyxy_boxes = random_boxes(gt_xyxy_box, 10, 10) # 4/ compute proposal-to-gt transformation deltas deltas = box_utils.bbox_transform_inv( prop_xyxy_boxes, np.array([gt_xyxy_box]), weights=weights ) # 5/ use deltas to transform proposals to xyxy predicted box pred_xyxy_boxes = box_utils.bbox_transform( prop_xyxy_boxes, deltas, weights=weights ) # 6/ convert xyxy predicted box to xywh predicted box pred_xywh_boxes = box_utils.xyxy_to_xywh(pred_xyxy_boxes) # 7/ use COCO API to compute IoU not_crowd = [int(False)] * pred_xywh_boxes.shape[0] ious = COCOmask.iou(pred_xywh_boxes, np.array([gt_xywh_box]), not_crowd) np.testing.assert_array_almost_equal(ious, np.ones(ious.shape))
def load_detections(detection_file, dataset, thresholds): detections = json.load(open(detection_file)) roidb = dataset.get_roidb() classes = dataset.classes #apply class thresholds thres_dets = [det for det in detections if det['score'] > thresholds[classes[det['category_id']]]] #save thresholded detections to file output_dir = os.path.abspath(get_output_dir(dataset.name, training=False)) res_file = os.path.join( output_dir, 'bbox_' + dataset.name + '_results_thresh.json' ) with open(res_file,'w') as out_file: json.dump(thres_dets, out_file) #get per-image detections detections_per_image = [] for entry in roidb: #get all detections for image im_dets = [det for det in thres_dets if det['image_id'] == entry['id']] #convert bbox from xywh to xyxy format for det in im_dets: det["bbox"] = box_utils.xywh_to_xyxy(det["bbox"]) detections_per_image.append(im_dets) return detections_per_image, res_file
def _toPersonMask(anns, coco): # modify ann['segmentation'] by reference for ann in anns: if ann['ignore']: continue rle = segm_utils.GetDensePoseMask(ann['dp_masks']) per_mask = (rle > 0).astype(np.uint8) ref_box = boxes.xywh_to_xyxy(ann['bbox']) ref_box = np.array(ref_box).astype(np.int32) w = ref_box[2] - ref_box[0] + 1 h = ref_box[3] - ref_box[1] + 1 w = np.maximum(w, 1) h = np.maximum(h, 1) per_mask = cv2.resize(per_mask, (w, h)) img = coco.loadImgs(ann['image_id'])[0] im_h = img['height'] im_w = img['width'] im_mask = np.zeros((im_h, im_w), dtype=np.uint8) x_0 = max(ref_box[0], 0) x_1 = min(ref_box[2] + 1, im_w) y_0 = max(ref_box[1], 0) y_1 = min(ref_box[3] + 1, im_h) im_mask[y_0:y_1, x_0:x_1] = per_mask[(y_0 - ref_box[1]):(y_1 - ref_box[1]), (x_0 - ref_box[0]):(x_1 - ref_box[0])] rle = maskUtils.encode( np.array(im_mask[:, :, np.newaxis], order='F'))[0] ann['segmentation'] = rle rle = coco.annToRLE(ann) ann['person_mask'] = rle
def _add_gt_annotations(self, entry): """Add ground truth annotation metadata to an roidb entry.""" ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) # Sanitize bboxes -- some are invalid valid_objs = [] valid_segms = [] width = entry['width'] height = entry['height'] for obj in objs: # crowd regions are RLE encoded and stored as dicts if isinstance(obj['segmentation'], list): # Valid polygons have >= 3 points, so require >= 6 coordinates obj['segmentation'] = [ p for p in obj['segmentation'] if len(p) >= 6 ] if obj['area'] < cfg.TRAIN.GT_MIN_AREA: continue if 'ignore' in obj and obj['ignore'] == 1: continue # Convert form (x1, y1, w, h) to (x1, y1, x2, y2) x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 0 and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) valid_segms.append(obj['segmentation']) num_valid_objs = len(valid_objs) boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) gt_overlaps = np.zeros((num_valid_objs, self.num_classes), dtype=entry['gt_overlaps'].dtype) seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype) is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) box_to_gt_ind_map = np.zeros((num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype) if self.keypoints is not None: gt_keypoints = np.zeros((num_valid_objs, 3, self.num_keypoints), dtype=entry['gt_keypoints'].dtype) im_has_visible_keypoints = False for ix, obj in enumerate(valid_objs): cls = self.json_category_id_to_contiguous_id[obj['category_id']] boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls seg_areas[ix] = obj['area'] is_crowd[ix] = obj['iscrowd'] box_to_gt_ind_map[ix] = ix if self.keypoints is not None: gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj) if np.sum(gt_keypoints[ix, 2, :]) > 0: im_has_visible_keypoints = True if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training gt_overlaps[ix, :] = -1.0 else: gt_overlaps[ix, cls] = 1.0 entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) entry['segms'].extend(valid_segms) # To match the original implementation: # entry['boxes'] = np.append( # entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(), gt_overlaps, axis=0) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'], box_to_gt_ind_map) if self.keypoints is not None: entry['gt_keypoints'] = np.append(entry['gt_keypoints'], gt_keypoints, axis=0) entry['has_visible_keypoints'] = im_has_visible_keypoints
def _add_gt_annotations(self, entry): """Add ground truth annotation metadata to an roidb entry.""" ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) # Sanitize bboxes -- some are invalid valid_objs = [] valid_segms = [] #### valid_dp_x = [] valid_dp_y = [] valid_dp_I = [] valid_dp_U = [] valid_dp_V = [] valid_dp_masks = [] #### width = entry['width'] height = entry['height'] for obj in objs: # crowd regions are RLE encoded and stored as dicts if isinstance(obj['segmentation'], list): # Valid polygons have >= 3 points, so require >= 6 coordinates obj['segmentation'] = [ p for p in obj['segmentation'] if len(p) >= 6 ] if obj['area'] < cfg.TRAIN.GT_MIN_AREA: continue if 'ignore' in obj and obj['ignore'] == 1: continue # Convert form (x1, y1, w, h) to (x1, y1, x2, y2) x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width ) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 0 and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) valid_segms.append(obj['segmentation']) ### if 'dp_x' in obj.keys(): valid_dp_x.append(obj['dp_x']) valid_dp_y.append(obj['dp_y']) valid_dp_I.append(obj['dp_I']) valid_dp_U.append(obj['dp_U']) valid_dp_V.append(obj['dp_V']) valid_dp_masks.append(obj['dp_masks']) else: valid_dp_x.append([]) valid_dp_y.append([]) valid_dp_I.append([]) valid_dp_U.append([]) valid_dp_V.append([]) valid_dp_masks.append([]) ### num_valid_objs = len(valid_objs) ## boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) gt_overlaps = np.zeros( (num_valid_objs, self.num_classes), dtype=entry['gt_overlaps'].dtype ) seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype) is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) box_to_gt_ind_map = np.zeros( (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype ) if self.keypoints is not None: gt_keypoints = np.zeros( (num_valid_objs, 3, self.num_keypoints), dtype=entry['gt_keypoints'].dtype ) if cfg.MODEL.BODY_UV_ON: ignore_UV_body = np.zeros((num_valid_objs)) #Box_image_body = [None]*num_valid_objs im_has_visible_keypoints = False im_has_any_body_uv = False for ix, obj in enumerate(valid_objs): cls = self.json_category_id_to_contiguous_id[obj['category_id']] boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls seg_areas[ix] = obj['area'] is_crowd[ix] = obj['iscrowd'] box_to_gt_ind_map[ix] = ix if self.keypoints is not None: gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj) if np.sum(gt_keypoints[ix, 2, :]) > 0: im_has_visible_keypoints = True if cfg.MODEL.BODY_UV_ON: if 'dp_x' in obj: ignore_UV_body[ix] = False im_has_any_body_uv = True else: ignore_UV_body[ix] = True if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training gt_overlaps[ix, :] = -1.0 else: gt_overlaps[ix, cls] = 1.0 entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) entry['segms'].extend(valid_segms) entry['dp_x'].extend(valid_dp_x) entry['dp_y'].extend(valid_dp_y) entry['dp_I'].extend(valid_dp_I) entry['dp_U'].extend(valid_dp_U) entry['dp_V'].extend(valid_dp_V) entry['dp_masks'].extend(valid_dp_masks) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = np.append( entry['gt_overlaps'].toarray(), gt_overlaps, axis=0 ) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append( entry['box_to_gt_ind_map'], box_to_gt_ind_map ) if self.keypoints is not None: entry['gt_keypoints'] = np.append( entry['gt_keypoints'], gt_keypoints, axis=0 ) entry['has_visible_keypoints'] = im_has_visible_keypoints if cfg.MODEL.BODY_UV_ON: entry['ignore_UV_body'] = np.append(entry['ignore_UV_body'], ignore_UV_body) #entry['Box_image_links_body'].extend(Box_image_body) entry['has_body_uv'] = im_has_any_body_uv
def _add_gt_annotations(self, entry): """Add ground truth annotation metadata to an roidb entry.""" ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) # Sanitize bboxes -- some are invalid valid_objs = [] valid_segms = [] #### valid_dp_x = [] valid_dp_y = [] valid_dp_I = [] valid_dp_U = [] valid_dp_V = [] valid_dp_masks = [] #### width = entry['width'] height = entry['height'] for obj in objs: # crowd regions are RLE encoded if segm_utils.is_poly(obj['segmentation']): # Valid polygons have >= 3 points, so require >= 6 coordinates obj['segmentation'] = [ p for p in obj['segmentation'] if len(p) >= 6 ] if obj['area'] < cfg.TRAIN.GT_MIN_AREA: continue if 'ignore' in obj and obj['ignore'] == 1: continue # Convert form (x1, y1, w, h) to (x1, y1, x2, y2) x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 0 and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) valid_segms.append(obj['segmentation']) ### if 'dp_x' in obj: valid_dp_x.append(obj['dp_x']) valid_dp_y.append(obj['dp_y']) valid_dp_I.append(obj['dp_I']) valid_dp_U.append(obj['dp_U']) valid_dp_V.append(obj['dp_V']) valid_dp_masks.append(obj['dp_masks']) else: valid_dp_x.append([]) valid_dp_y.append([]) valid_dp_I.append([]) valid_dp_U.append([]) valid_dp_V.append([]) valid_dp_masks.append([]) ### num_valid_objs = len(valid_objs) boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) gt_overlaps = np.zeros((num_valid_objs, self.num_classes), dtype=entry['gt_overlaps'].dtype) seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype) is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) box_to_gt_ind_map = np.zeros((num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype) if self.keypoints is not None: gt_keypoints = np.zeros((num_valid_objs, 3, self.num_keypoints), dtype=entry['gt_keypoints'].dtype) if cfg.MODEL.BODY_UV_ON: ignore_UV_body = np.zeros((num_valid_objs), dtype=entry['ignore_UV_body'].dtype) #Box_image_body = [None]*num_valid_objs im_has_visible_keypoints = False im_has_any_body_uv = False for ix, obj in enumerate(valid_objs): cls = self.json_category_id_to_contiguous_id[obj['category_id']] boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls seg_areas[ix] = obj['area'] is_crowd[ix] = obj['iscrowd'] box_to_gt_ind_map[ix] = ix if self.keypoints is not None: gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj) if np.sum(gt_keypoints[ix, 2, :]) > 0: im_has_visible_keypoints = True if cfg.MODEL.BODY_UV_ON: if 'dp_x' in obj: ignore_UV_body[ix] = False im_has_any_body_uv = True else: ignore_UV_body[ix] = True if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training gt_overlaps[ix, :] = -1.0 else: gt_overlaps[ix, cls] = 1.0 entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) entry['segms'].extend(valid_segms) entry['dp_x'].extend(valid_dp_x) entry['dp_y'].extend(valid_dp_y) entry['dp_I'].extend(valid_dp_I) entry['dp_U'].extend(valid_dp_U) entry['dp_V'].extend(valid_dp_V) entry['dp_masks'].extend(valid_dp_masks) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(), gt_overlaps, axis=0) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'], box_to_gt_ind_map) if self.keypoints is not None: entry['gt_keypoints'] = np.append(entry['gt_keypoints'], gt_keypoints, axis=0) entry['has_visible_keypoints'] = im_has_visible_keypoints if cfg.MODEL.BODY_UV_ON: entry['ignore_UV_body'] = np.append(entry['ignore_UV_body'], ignore_UV_body) #entry['Box_image_links_body'].extend(Box_image_body) entry['has_body_uv'] = im_has_any_body_uv
def _add_gt_annotations(self, entry): """Add ground truth annotation metadata to an roidb entry.""" ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) # Sanitize bboxes -- some are invalid valid_objs = [] valid_segms = [] width = entry['width'] height = entry['height'] for obj in objs: # crowd regions are RLE encoded if segm_utils.is_poly(obj['segmentation']): # Valid polygons have >= 3 points, so require >= 6 coordinates obj['segmentation'] = [ p for p in obj['segmentation'] if len(p) >= 6 ] if obj['area'] < cfg.TRAIN.GT_MIN_AREA: continue if 'ignore' in obj and obj['ignore'] == 1: continue # Convert form (x1, y1, w, h) to (x1, y1, x2, y2) x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width ) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 0 and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) valid_segms.append(obj['segmentation']) num_valid_objs = len(valid_objs) boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) gt_overlaps = np.zeros( (num_valid_objs, self.num_classes), dtype=entry['gt_overlaps'].dtype ) seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype) is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) box_to_gt_ind_map = np.zeros( (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype ) if self.keypoints is not None: gt_keypoints = np.zeros( (num_valid_objs, 3, self.num_keypoints), dtype=entry['gt_keypoints'].dtype ) im_has_visible_keypoints = False for ix, obj in enumerate(valid_objs): cls = self.json_category_id_to_contiguous_id[obj['category_id']] boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls seg_areas[ix] = obj['area'] is_crowd[ix] = obj['iscrowd'] box_to_gt_ind_map[ix] = ix if self.keypoints is not None: gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj) if np.sum(gt_keypoints[ix, 2, :]) > 0: im_has_visible_keypoints = True if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training gt_overlaps[ix, :] = -1.0 else: gt_overlaps[ix, cls] = 1.0 entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) entry['segms'].extend(valid_segms) # To match the original implementation: # entry['boxes'] = np.append( # entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = np.append( entry['gt_overlaps'].toarray(), gt_overlaps, axis=0 ) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append( entry['box_to_gt_ind_map'], box_to_gt_ind_map ) if self.keypoints is not None: entry['gt_keypoints'] = np.append( entry['gt_keypoints'], gt_keypoints, axis=0 ) entry['has_visible_keypoints'] = im_has_visible_keypoints