def annotations_to_instances(annos, image_size, mask_format="polygon"): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Args: annos (list[dict]): a list of instance annotations in one image, each element for one instance. image_size (tuple): height, width Returns: Instances: It will contain fields "gt_boxes", "gt_classes", "gt_masks", "gt_keypoints", if they can be obtained from `annos`. This is the format that builtin models expect. """ boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos] target = Instances(image_size) boxes = target.gt_boxes = Boxes(boxes) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if len(annos) and "segmentation" in annos[0]: segms = [obj["segmentation"] for obj in annos] if mask_format == "polygon": masks = PolygonMasks(segms) else: assert mask_format == "bitmask", mask_format masks = [] for segm in segms: if isinstance(segm, list): # polygon masks.append(polygons_to_bitmask(segm, *image_size)) elif isinstance(segm, dict): # COCO RLE masks.append(mask_util.decode(segm)) elif isinstance(segm, np.ndarray): assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format( segm.ndim ) # mask array masks.append(segm) else: raise ValueError( "Cannot convert segmentation of type '{}' to BitMasks!" "Supported types are: polygons as list[list[float] or ndarray]," " COCO-style RLE as a dict, or a full-image segmentation mask " "as a 2D ndarray.".format(type(segm)) ) masks = BitMasks(torch.stack([torch.from_numpy(x) for x in masks])) target.gt_masks = masks if len(annos) and "keypoints" in annos[0]: kpts = [obj.get("keypoints", []) for obj in annos] target.gt_keypoints = Keypoints(kpts) return target
def __getitem__(self, index): ann = self.coco[index] # bbox transform. bbox = np.array([ann["bbox"]]) # xmin, ymin, w, h bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) # x1y1x2y2 bbox = Boxes(bbox) # mask transform. mask = PolygonMasks([ann["segmentation"]]) mask = mask.crop_and_resize(bbox.tensor, self.size).float() return mask
def __getitem__(self, index): ann = self.all_annotations[index] # bbox transform. bbox = np.array([ann["bbox"]]) # xmin, ymin, xmax, ymax bbox = Boxes(bbox) # mask transform. # print(bbox) # print(ann["segmentation"]) mask = PolygonMasks([ann["segmentation"]]) mask = mask.crop_and_resize(bbox.tensor, self.size).float() return mask
def annotations_to_instances_rotated(annos, image_size): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Compared to `annotations_to_instances`, this function is for rotated boxes only Args: annos (list[dict]): a list of instance annotations in one image, each element for one instance. image_size (tuple): height, width Returns: Instances: Containing fields "gt_boxes", "gt_classes", if they can be obtained from `annos`. This is the format that builtin models expect. """ boxes = [obj["bbox"] for obj in annos] masks = [obj["segmentation"] for obj in annos] target = Instances(image_size) boxes = target.gt_boxes = RotatedBoxes(boxes) masks = target.gt_masks = PolygonMasks(masks) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes return target
def annotations_to_instances(annos, image_size, mask_format="polygon"): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Args: annos (list[dict]): a list of instance annotations in one image, each element for one instance. image_size (tuple): height, width Returns: Instances: It will contain fields "gt_boxes", "gt_classes", "gt_masks", "gt_keypoints", if they can be obtained from `annos`. This is the format that builtin models expect. """ boxes = [ BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos ] target = Instances(image_size) boxes = target.gt_boxes = Boxes(boxes) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if len(annos) and "viewpoint" in annos[0]: viewpoints = np.array([obj["viewpoint"] for obj in annos]) viewpoints_class = torch.tensor(viewpoints[:, 0], dtype=torch.int64) target.gt_viewpoint = viewpoints_class if len(annos[0]["viewpoint"]) == 2: viewpoints_rads = torch.tensor(viewpoints[:, 1], dtype=torch.float32) target.gt_viewpoint_rads = viewpoints_rads if len(annos) and "bbox3D" in annos[0]: bbox3D = [obj["bbox3D"] for obj in annos] bbox3D = torch.tensor(bbox3D, dtype=torch.float) target.gt_bbox3D = bbox3D if len(annos) and "height" in annos[0]: height = [obj["height"] for obj in annos] height = torch.tensor(height, dtype=torch.float) target.gt_height = height if len(annos) and "segmentation" in annos[0]: polygons = [obj["segmentation"] for obj in annos] if mask_format == "polygon": masks = PolygonMasks(polygons) else: assert mask_format == "bitmask", mask_format masks = BitMasks.from_polygon_masks(polygons, *image_size) target.gt_masks = masks if len(annos) and "keypoints" in annos[0]: kpts = [obj.get("keypoints", []) for obj in annos] target.gt_keypoints = Keypoints(kpts) return target
def dota_annotations_to_instances(annos, image_size): target = Instances(image_size) obb_boxes = [obj["boxes"] for obj in annos] obb_boxes = target.gt_boxes = RotatedBoxes(obb_boxes) obb_boxes.clip(image_size) pt_hbb, pt_inbox, polygons = [], [], [] rotate_boxes = obb_boxes.tensor.numpy() data = [convRotaToPolyAndHbb(rotate_box) for rotate_box in rotate_boxes] for d in data: pt_hbb.append(d[0]) pt_inbox.append(d[1]) polygons.append(d[2]) target.gt_pt_inbox_boxes = Boxes(pt_inbox) target.gt_pt_hbb_boxes = Boxes(pt_hbb) classes = [obj["category_id"] + 1 for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes masks = PolygonMasks(polygons) target.gt_masks = masks if len(target) > 2000: mask = random.sample(list(range(0, len(target))), 2000) target = target[mask] return target
def add_pseudo_label(self, targets, image_path, flip): new_targets = [] if self.pseudo_gt is None: return targets if len(targets) > 0 and targets[ 0].gt_boxes.tensor.device != self.pseudo_gt.device: self.pseudo_gt = self.pseudo_gt.to( targets[0].gt_boxes.tensor.device) for i, (targets_per_image, path) in enumerate(zip(targets, image_path)): H, W = targets_per_image._image_size gt_boxes = targets_per_image.gt_boxes gt_classes = targets_per_image.gt_classes p = int(path.split('/')[-1].split('.')[0]) data = self.pseudo_gt[self.pseudo_gt[:, 0] == p] ld = len(data) if len(data) == 0: new_targets.append(targets_per_image) continue label = data[:, 1].long() boxes = data[:, 2:].clone() if flip[i] == 1: boxes[:, 0] = 1 - boxes[:, 0] boxes[:, 2] = 1 - boxes[:, 2] boxes = torch.index_select( boxes, -1, torch.as_tensor([2, 1, 0, 3], device=boxes.device)) boxes = Boxes(boxes) boxes.scale(scale_x=W, scale_y=H) new_gt_boxes = gt_boxes.cat([gt_boxes, boxes]) new_gt_masks = PolygonMasks([[]]) if hasattr(targets_per_image, 'gt_masks'): gt_masks = targets_per_image.gt_masks new_gt_masks = new_gt_masks.cat([gt_masks] + [new_gt_masks] * ld) else: new_gt_masks = new_gt_masks.cat([new_gt_masks] * ld) new_gt_classes = torch.cat((gt_classes, label)) new_target = Instances((H, W)) new_target.gt_classes = new_gt_classes new_target.gt_masks = new_gt_masks new_target.gt_boxes = new_gt_boxes new_targets.append(new_target) lbl, cnt = label.unique(return_counts=True) return new_targets
def valid_dct_source(coco, dct_dim, mask_size): dct_mask_encoding = DctMaskEncoding(dct_dim, mask_size) mIoU = [] Number = 0 for ann in coco: Number += 1 bbox = np.array([ann["bbox"]]) # xmin, ymin, w, h w, h = bbox[0][2], bbox[0][3] w, h = round(w), round(h) bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) # x1y1x2y2 bbox = Boxes(bbox) # mask transform. mask = PolygonMasks([ann["segmentation"]]) mask_source = rasterize_polygons_within_box_for_arbitrary_shape( mask.polygons[0], bbox.tensor[0].numpy(), h, w) mask_source = mask_source.numpy() # numpy [h,w] binary mask_k = mask.crop_and_resize( bbox.tensor, mask_size).float() # tensor [1,28,28],all 0 or 1 mask_k = mask_k.view([mask_size, mask_size]) dct_code = dct_mask_encoding.encode(mask_k) mask_re = dct_mask_encoding.decode(dct_code).numpy().squeeze() res = cv2.resize(mask_re.astype('float'), dsize=(mask_source.shape[1], mask_source.shape[0]), interpolation=cv2.INTER_LINEAR) res = np.where(res >= 0.5, 1, 0) res = np.reshape(res, [1, -1]) mask_source = np.reshape(mask_source, [1, -1]) res = res.astype(int) IoUevaluate = IOUMetric(2) IoUevaluate.add_batch(res, mask_source) _, _, _, mean_iu, _ = IoUevaluate.evaluate() mIoU.append(mean_iu) if Number % 1000 == 1: print(np.mean(mIoU)) return np.mean(mIoU)
def __getitem__(self, index): ann = self.coco[index] # bbox transform. bbox = np.array([ann["bbox"]]) # xmin, ymin, w, h bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) # x1y1x2y2 bbox = Boxes(bbox) # mask transform. mask = PolygonMasks([ann["segmentation"]]) mask = mask.crop_and_resize(bbox.tensor, self.size).float() if self.transform: if torch.rand(1) < 0.5: mask = mask.flip(2) # introduce several noise. noise_matrix = VALUE_NOISE * torch.rand(mask.shape) mask = torch.where(mask > noise_matrix, mask - noise_matrix, noise_matrix) return mask
def process_annotation(self, ann, mask_side_len=28): # Parse annotation data img_info = self.coco.loadImgs(ids=[ann["image_id"]])[0] height, width = img_info["height"], img_info["width"] gt_polygons = [ np.array(p, dtype=np.float64) for p in ann["segmentation"] ] gt_bbox = BoxMode.convert(ann["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) gt_bit_mask = polygons_to_bitmask(gt_polygons, height, width) # Run rasterize .. torch_gt_bbox = torch.tensor(gt_bbox).to(dtype=torch.float32).reshape( -1, 4) box_bitmasks = { "polygon": PolygonMasks([gt_polygons ]).crop_and_resize(torch_gt_bbox, mask_side_len)[0], "gridsample": rasterize_polygons_with_grid_sample(gt_bit_mask, gt_bbox, mask_side_len), "roialign": BitMasks(torch.from_numpy( gt_bit_mask[None, :, :])).crop_and_resize( torch_gt_bbox, mask_side_len)[0], } # Run paste .. results = defaultdict(dict) for k, box_bitmask in box_bitmasks.items(): padded_bitmask, scale = pad_masks(box_bitmask[None, :, :], 1) scaled_boxes = scale_boxes(torch_gt_bbox, scale) r = results[k] r["old"] = paste_mask_in_image_old(padded_bitmask[0], scaled_boxes[0], height, width, threshold=0.5) r["aligned"] = paste_masks_in_image(box_bitmask[None, :, :], Boxes(torch_gt_bbox), (height, width))[0] table = [] for rasterize_method, r in results.items(): for paste_method, mask in r.items(): mask = np.asarray(mask) iou = iou_between_full_image_bit_masks( gt_bit_mask.astype("uint8"), mask) table.append((rasterize_method, paste_method, iou)) return table
def annotations_to_instances(annos, image_size, mask_format="polygon"): boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos] target = Instances(image_size) boxes = target.gt_boxes = Boxes(boxes) boxes.clip(image_size) pan_ids = torch.tensor([obj["pan_id"] for obj in annos]) target.pan_id = pan_ids classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if len(annos) and "segmentation" in annos[0]: segms = [obj["segmentation"] for obj in annos] poly_masks = PolygonMasks(segms) masks = [] for segm in segms: if isinstance(segm, list): # polygon masks.append(polygons_to_bitmask(segm, *image_size)) elif isinstance(segm, dict): # COCO RLE masks.append(mask_util.decode(segm)) elif isinstance(segm, np.ndarray): assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format( segm.ndim ) # mask array masks.append(segm) else: raise ValueError( "Cannot convert segmentation of type '{}' to BitMasks!" "Supported types are: polygons as list[list[float] or ndarray]," " COCO-style RLE as a dict, or a full-image segmentation mask " "as a 2D ndarray.".format(type(segm)) ) # torch.from_numpy does not support array with negative stride. bit_masks = BitMasks( torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks]) ) if mask_format == "polygon": target.gt_masks = poly_masks target.bit_masks = bit_masks else: target.gt_masks = bit_masks target.poly_masks = poly_masks return target
def annotations_to_instances(annos, image_size, mask_format="polygon"): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Args: annos (list[dict]): a list of instance annotations in one image, each element for one instance. image_size (tuple): height, width Returns: Instances: It will contain fields "gt_boxes", "gt_classes", "gt_masks", "gt_keypoints", if they can be obtained from `annos`. This is the format that builtin models expect. """ boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos] target = Instances(image_size) boxes = target.gt_boxes = Boxes(boxes) if 'light' in annos[0].keys(): light = [BoxMode.convert(obj['light'],obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos] light = target.gt_light = Boxes(light) light.clip(image_size) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if len(annos) and "segmentation" in annos[0]: polygons = [obj["segmentation"] for obj in annos] if mask_format == "polygon": masks = PolygonMasks(polygons) else: assert mask_format == "bitmask", mask_format masks = BitMasks.from_polygon_masks(polygons, *image_size) target.gt_masks = masks if len(annos) and "keypoints" in annos[0]: kpts = [obj.get("keypoints", []) for obj in annos] target.gt_keypoints = Keypoints(kpts) return target
def test_polygon_area(self): # Draw polygon boxes for d in [5.0, 10.0, 1000.0]: polygon = PolygonMasks([[[0, 0, 0, d, d, d, d, 0]]]) area = polygon.area()[0] target = d**2 self.assertEqual(area, target) # Draw polygon triangles for d in [5.0, 10.0, 1000.0]: polygon = PolygonMasks([[[0, 0, 0, d, d, d]]]) area = polygon.area()[0] target = d**2 / 2 self.assertEqual(area, target)
def filter_mask_size(self, min_thresh=100, max_thresh=100000, to_rle=False): """ Remove instances with mask areas outside of the interval (min_thresh, max_thresh.) Useful for removing small instances (ie 1 or even 0 pixels in segmentation mask) or abnormally large outliers (ie many instances combined in a giant blob.) Note that this does not modify the InstanceSet in place and returns an Instances object. Parameters ----------- min_thresh, max_thresh: int, float or None only instances with mask areas greater than min thresh and smaller than max_thresh are kept. If either threshold is None, it is not applied (ie if both min_thresh and max_thresh are None then all masks are kept.) to_rle: bool if True, masks are converted to RLE before filtering. The inlier masks will be returned as RLE. Otherwise, mask format is preserved. Returns ---------- instances_filtered: detectron2.structures.Instances object Instances object only containing instances with mask areas in the threshold range. """ masks = self.instances.masks if to_rle: masks = RLEMasks(masks_to_rle(masks, self.instances.image_size)) masktype = type(masks) # determine which instances contain inlier masks areas = mask_areas(masks) if min_thresh is None: inlier_min = np.ones(areas.shape, np.bool) else: inlier_min = areas > min_thresh if max_thresh is None: inlier_max = np.ones(areas.shape, np.bool) else: inlier_max = areas < max_thresh inliers_bool = np.logical_and(inlier_min, inlier_max) new_instance_fields = {} # for key, value in self.instances._fields.items(): # print(key) # print(type(value)) # print(value) # temp = value[inliers_bool] # new_instance_fields[key] = temp # can't iterate through polygonmasks properly, case must be handled separately if masktype == PolygonMasks: polygons = [p for p, b in zip(masks.polygons, inliers_bool) if b] masks = PolygonMasks(polygons) else: masks = masks[inliers_bool] new_instance_fields = {} for key, value in self.instances._fields.items(): if key == 'masks': new_instance_fields[key] = masks else: new_instance_fields[key] = value[inliers_bool] instances_filtered = Instances(self.instances.image_size, **new_instance_fields) return instances_filtered
def convert_to_coco_dict(dataset_name): """ Convert a dataset in detectron2's standard format into COCO json format Generic dataset description can be found here: https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset COCO data format description can be found here: http://cocodataset.org/#format-data Args: dataset_name: name of the source dataset must be registered in DatastCatalog and in detectron2's standard format Returns: coco_dict: serializable dict in COCO json format """ dataset_dicts = DatasetCatalog.get(dataset_name) categories = [{ "id": id, "name": name } for id, name in enumerate( MetadataCatalog.get(dataset_name).thing_classes)] logger.info("Converting dataset dicts into COCO format") coco_images = [] coco_annotations = [] for image_dict in dataset_dicts: coco_image = { "id": image_dict["image_id"], "width": image_dict["width"], "height": image_dict["height"], "file_name": image_dict["file_name"], } coco_images.append(coco_image) anns_per_image = image_dict["annotations"] for annotation in anns_per_image: # create a new dict with only COCO fields coco_annotation = {} # COCO requirement: XYWH box format bbox = annotation["bbox"] bbox_mode = annotation["bbox_mode"] bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS) # COCO requirement: instance area if "segmentation" in annotation: # Computing areas for instances by counting the pixels segmentation = annotation["segmentation"] # TODO: check segmentation type: RLE, BinaryMask or Polygon polygons = PolygonMasks([segmentation]) area = polygons.area()[0].item() else: # Computing areas using bounding boxes area = Boxes([bbox]).area()[0].item() if "keypoints" in annotation: keypoints = annotation["keypoints"] # list[int] for idx, v in enumerate(keypoints): if idx % 3 != 2: # COCO's segmentation coordinates are floating points in [0, H or W], # but keypoint coordinates are integers in [0, H-1 or W-1] # For COCO format consistency we substract 0.5 # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163 keypoints[idx] = v - 0.5 if "num_keypoints" in annotation: num_keypoints = annotation["num_keypoints"] else: num_keypoints = sum(kp > 0 for kp in keypoints[2::3]) # COCO requirement: # linking annotations to images # "id" field must start with 1 coco_annotation["id"] = len(coco_annotations) + 1 coco_annotation["image_id"] = image_dict["image_id"] coco_annotation["bbox"] = bbox coco_annotation["area"] = area coco_annotation["category_id"] = annotation["category_id"] coco_annotation["iscrowd"] = annotation.get("iscrowd", 0) # Add optional fields if "keypoints" in annotation: coco_annotation["keypoints"] = keypoints coco_annotation["num_keypoints"] = num_keypoints if "segmentation" in annotation: coco_annotation["segmentation"] = annotation["segmentation"] coco_annotations.append(coco_annotation) logger.info( "Conversion finished, " f"num images: {len(coco_images)}, num annotations: {len(coco_annotations)}" ) info = { "date_created": str(datetime.datetime.now()), "description": "Automatically generated COCO json file for Detectron2.", } coco_dict = { "info": info, "images": coco_images, "annotations": coco_annotations, "categories": categories, "licenses": None, } return coco_dict
def annotations_to_instances(annos, image_size, mask_format="polygon"): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Args: annos (list[dict]): a list of instance annotations in one image, each element for one instance. image_size (tuple): height, width Returns: Instances: It will contain fields "gt_boxes", "gt_classes", "gt_masks", "gt_keypoints", if they can be obtained from `annos`. This is the format that builtin models expect. """ boxes = [ BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos ] target = Instances(image_size) boxes = target.gt_boxes = Boxes(boxes) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if len(annos) and "segmentation" in annos[0]: segm = [obj["segmentation"] for obj in annos] # it may be bitmask instead of polygon visible_segm = [obj["visible_mask"] for obj in annos ] # it may be bitmask instead of polygon if mask_format == "polygon": masks = PolygonMasks(segm) if not isinstance(visible_segm[0], list): visible_masks = visible_segm visible_masks = BitMasks( torch.stack([torch.from_numpy(x) for x in visible_masks])) else: # visible_masks = BitMasks.from_polygon_masks(visible_polygons, *image_size) visible_masks = PolygonMasks(visible_segm) else: assert mask_format == "bitmask", mask_format if not isinstance(segm[0], list): masks = BitMasks( torch.stack([torch.from_numpy(x) for x in segm])) # visible_masks = visible_polygons # visible_masks = BitMasks(torch.stack([torch.from_numpy(x) for x in visible_masks])) else: masks = BitMasks.from_polygon_masks(segm, *image_size) # visible_masks = BitMasks.from_polygon_masks(visible_polygons, *image_size) # print('masks:{}'.format(polygons)) if not isinstance(visible_segm[0], list): visible_masks = visible_segm visible_masks = BitMasks( torch.stack([torch.from_numpy(x) for x in visible_masks])) else: # print('visible_masks:{}'.format(visible_polygons)) visible_masks = BitMasks.from_polygon_masks( visible_segm, *image_size) target.gt_masks = masks target.gt_visible_masks = visible_masks if len(annos) and "keypoints" in annos[0]: kpts = [obj.get("keypoints", []) for obj in annos] target.gt_keypoints = Keypoints(kpts) return target
def convert_to_coco_dict_from_detdict(dataset_dicts, metadata): """ See `convert_to_coco_dict`. """ # dataset_dicts = DatasetCatalog.get(dataset_name) # metadata = MetadataCatalog.get(dataset_name) # unmap the category mapping ids for COCO if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): reverse_id_mapping = {v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items()} reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[contiguous_id] # noqa else: reverse_id_mapper = lambda contiguous_id: contiguous_id # noqa categories = [ {"id": reverse_id_mapper(id), "name": name} for id, name in enumerate(metadata.thing_classes) ] logger.info("Converting dataset dicts into COCO format") coco_images = [] coco_annotations = [] for image_id, image_dict in enumerate(dataset_dicts): coco_image = { "id": image_dict.get("image_id", image_id), "width": image_dict["width"], "height": image_dict["height"], "file_name": image_dict["file_name"], } coco_images.append(coco_image) anns_per_image = image_dict["annotations"] for annotation in anns_per_image: # create a new dict with only COCO fields coco_annotation = {} # COCO requirement: XYWH box format bbox = annotation["bbox"] bbox_mode = annotation["bbox_mode"] bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS) # COCO requirement: instance area if "segmentation" in annotation: # Computing areas for instances by counting the pixels segmentation = annotation["segmentation"] # TODO: check segmentation type: RLE, BinaryMask or Polygon polygons = PolygonMasks([segmentation]) area = polygons.area()[0].item() else: # Computing areas using bounding boxes bbox_xy = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) area = Boxes([bbox_xy]).area()[0].item() if "keypoints" in annotation: keypoints = annotation["keypoints"] # list[int] for idx, v in enumerate(keypoints): if idx % 3 != 2: # COCO's segmentation coordinates are floating points in [0, H or W], # but keypoint coordinates are integers in [0, H-1 or W-1] # For COCO format consistency we substract 0.5 # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163 keypoints[idx] = v - 0.5 if "num_keypoints" in annotation: num_keypoints = annotation["num_keypoints"] else: num_keypoints = sum(kp > 0 for kp in keypoints[2::3]) # COCO requirement: # linking annotations to images # "id" field must start with 1 coco_annotation["id"] = len(coco_annotations) + 1 coco_annotation["image_id"] = coco_image["id"] coco_annotation["bbox"] = [round(float(x), 3) for x in bbox] coco_annotation["area"] = area coco_annotation["iscrowd"] = annotation.get("iscrowd", 0) coco_annotation["category_id"] = reverse_id_mapper(annotation["category_id"]) # Add optional fields if "keypoints" in annotation: coco_annotation["keypoints"] = keypoints coco_annotation["num_keypoints"] = num_keypoints if "segmentation" in annotation: coco_annotation["segmentation"] = annotation["segmentation"] coco_annotations.append(coco_annotation) logger.info( "Conversion finished, " f"num images: {len(coco_images)}, num annotations: {len(coco_annotations)}" ) info = { "date_created": str(datetime.datetime.now()), "description": "Automatically generated COCO json file for Detectron2.", } coco_dict = { "info": info, "images": coco_images, "annotations": coco_annotations, "categories": categories, "licenses": None, } return coco_dict
def convert_to_coco_dict(dataset_name): """ Convert a generic dataset into COCO json format Generic dataset description can be found here: https://github.com/facebookresearch/detectron2/blob/master/docs/tutorials/datasets.md#register-a-dataset COCO data format description can be found here: http://cocodataset.org/#format-data Args: dataset_name: name of the source dataset Returns: coco_dict: serializable dict in COCO json format """ dataset_dicts = DatasetCatalog.get(dataset_name) categories = [{ "id": id, "name": name } for id, name in enumerate( MetadataCatalog.get(dataset_name).thing_classes)] logger.info("Converting dataset dicts into COCO format") images = [] annotations = [] # just for logging purposes _annotation_keys = Counter() for image_dict in dataset_dicts: image = { "id": image_dict["image_id"], "width": image_dict["width"], "height": image_dict["height"], "file_name": image_dict["file_name"], } images.append(image) # deep-copying various annotations from the original format # can be bbox, segmentation, keypoint, etc. anns_per_image = deepcopy(image_dict["annotations"]) for annotation in anns_per_image: # COCO requirement: linking annotations to images annotation["id"] = len(annotations) + 1 annotation["image_id"] = image_dict["image_id"] # COCO requirement: XYWH box format bbox = annotation["bbox"] bbox_mode = annotation["bbox_mode"] bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS) del annotation["bbox_mode"] # TODO: make BBOX_MODE serializable, otherwise remove it annotation["bbox"] = bbox annotation["iscrowd"] = 0 # COCO requirement: instance area if "segmentation" in annotation: # Computing areas for instances by counting the pixels segmentation = annotation["segmentation"] # TODO: check segmentation type: RLE, BinaryMask or Polygon polygons = PolygonMasks([segmentation]) area = polygons.area()[0] else: # Computing areas using bounding boxes area = Boxes([bbox]).area()[0] annotation["area"] = float(area) # Keeping track of fields present in instances _annotation_keys.update(annotation.keys()) annotations.append(annotation) logger.info( "Conversion finished, " f"num images: {len(images)}, num annotations: {len(annotations)}") logger.info(f"Annotation fields: {_annotation_keys}") if len(_annotation_keys.most_common()) != len(_annotation_keys): logger.warning( f"Annotation fields are not homogenous between instances") info = { "date_created": str(datetime.datetime.now()), "description": "Automatically generated COCO json file for Detectron2.", } coco_dict = { "info": info, "images": images, "annotations": annotations, "categories": categories, "licenses": None, } return coco_dict
def read_from_ddict(self, ddict, inplace=True): """ test """ """ Read ground truth annotations from data dicts. """ """ Reads data dicts and stores the information as attributes of the InstanceSet object. The descriptions of the attributes are provided in the documentation for self.__init__(). Parameters ----------- ddict: list List of data dicts in format described below in Notes. inplace: bool If True, the object is modified in-place. Else, the InstanceSet object is returned. Returns ----------- self (optinal): InstanceSet only returned if inplace == False Notes ------ Data dicts should have the following format: -'file_name': str or Path object path to image corresponding to annotations -'mask_format': str 'polygonmask' if segmentation masks are lists of XY coordinates, or 'bitmask' if segmentation masks are RLE encoded segmentation masks -'height': int image height in pixels -'width': int image width in pixels -'annotations': list(dic) list of annotations. See the annotation format below. -'num_instances': int equal to len(annotations)- number of instances present in the image The dictionary format for the annotation dictionaries is as follows: -'category_id': int numeric class label for the instance. -'bbox_mode': detectron2.structures.BoxMode object describes the format of the bounding box coordinates. The default is BoxMode.XYXY_ABS. -'bbox': list(int) 4-element list of bbox coordinates -'segmentation': list list containing: - a list of polygon coordinates (mask format is polygonmasks) - dictionaries of RLE mask encodings (mask format is bitmasks) """ # default values-always set self.pred_or_gt = 'gt' # ddict assumed to be ground truth labels from get_ddict function # required values- function will error out if these are not set self.filepath = Path(ddict['file_name']) self.mask_format = ddict['mask_format'] image_size = (ddict['height'], ddict['width']) # instances_gt = annotations_to_instances(ddict['annotations'], image_size, self.mask_format) class_idx = np.asarray( [anno['category_id'] for anno in ddict['annotations']], np.int) bbox = np.stack([anno['bbox'] for anno in ddict['annotations']]) segs = [anno['segmentation'] for anno in ddict['annotations']] segtype = type(segs[0]) if segtype == dict: # RLE encoded mask masks = RLEMasks(segs) elif segtype == np.ndarray: if segs[0].dtype == np.bool: # bitmask masks = BitMasks(np.stack(segs)) else: # list of (list or array) of coords in format [x0,y0,x1,y1,...xn,yn] masks = PolygonMasks(segs) instances = Instances( image_size, **{ 'masks': masks, 'boxes': bbox, 'class_idx': class_idx }) self.instances = instances self.instances.colors = visualize.random_colors( len(instances), self.randomstate) # optional values- default to None if not in ddict self.dataset_class = ddict.get('dataset_class', None) HFW = ddict.get('HFW', None) HFW_units = None if HFW is not None: try: HFW = float(HFW) except ValueError: split = HFW.split(' ') if len(split) == 2: HFW = float(split[0]) HFW_units = split[1] self.HFW = HFW self.HFW_units = HFW_units if not inplace: return self return
def annotations_to_instances(annos, image_size, mask_format="polygon"): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Args: annos (list[dict]): a list of instance annotations in one image, each element for one instance. image_size (tuple): height, width Returns: Instances: It will contain fields "gt_boxes", "gt_classes", "gt_masks", "gt_keypoints", if they can be obtained from `annos`. This is the format that builtin models expect. """ boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos] target = Instances(image_size) boxes = target.gt_boxes = Boxes(boxes) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if len(annos) and "segmentation" in annos[0]: segm = [obj["segmentation"] for obj in annos] visible = [obj["visible_mask"] for obj in annos] invisible = [] for obj in annos: if "invisible_mask" in obj: invisible.append(obj["invisible_mask"]) else: invisible.append([[0.0,0.0,0.0,0.0,0.0,0.0]]) if mask_format == "polygon": # gt amodal masks per image a_masks = PolygonMasks(segm) # gt visible masks per image v_masks = PolygonMasks(visible) # gt invisible masks per image i_masks = PolygonMasks(invisible) else: assert mask_format == "bitmask", mask_format a_masks = [] v_masks = [] i_masks = [] for segm in segms: if isinstance(segm, list): # polygon a_masks.append(polygons_to_bitmask(segm, *image_size)) v_masks.append(polygons_to_bitmask(visible, *image_size)) i_masks.append(polygons_to_bitmask(invisible, *image_size)) elif isinstance(segm, dict): # COCO RLE a_masks.append(mask_util.decode(segm)) v_masks.append(mask_util.decode(visible)) i_masks.append(mask_util.decode(invisible)) elif isinstance(segm, np.ndarray): assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format( segm.ndim ) # mask array a_masks.append(segm) v_masks.append(visible) i_masks.append(invisible) else: raise ValueError( "Cannot convert segmentation of type '{}' to BitMasks!" "Supported types are: polygons as list[list[float] or ndarray]," " COCO-style RLE as a dict, or a full-image segmentation mask " "as a 2D ndarray.".format(type(segm)) ) # torch.from_numpy does not support array with negative stride. a_masks = BitMasks( torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in a_masks]) ) v_masks = BitMasks( torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in v_masks]) ) i_masks = BitMasks( torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in i_masks]) ) # original mask head now is amodal mask head target.gt_masks = a_masks target.gt_v_masks = v_masks target.gt_i_masks = i_masks if len(annos) and "keypoints" in annos[0]: kpts = [obj.get("keypoints", []) for obj in annos] target.gt_keypoints = Keypoints(kpts) return target
def annotations_to_instances_with_attributes(annos, image_size, mask_format="polygon", load_attributes=False, max_attr_per_ins=16): """ Extend the function annotations_to_instances() to support attributes """ boxes = [ BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos ] target = Instances(image_size) boxes = target.gt_boxes = Boxes(boxes) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if len(annos) and "segmentation" in annos[0]: segms = [obj["segmentation"] for obj in annos] if mask_format == "polygon": masks = PolygonMasks(segms) else: assert mask_format == "bitmask", mask_format masks = [] for segm in segms: if isinstance(segm, list): # polygon masks.append(polygons_to_bitmask(segm, *image_size)) elif isinstance(segm, dict): # COCO RLE masks.append(mask_util.decode(segm)) elif isinstance(segm, np.ndarray): assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format( segm.ndim) # mask array masks.append(segm) else: raise ValueError( "Cannot convert segmentation of type '{}' to BitMasks!" "Supported types are: polygons as list[list[float] or ndarray]," " COCO-style RLE as a dict, or a full-image segmentation mask " "as a 2D ndarray.".format(type(segm))) masks = BitMasks( torch.stack([ torch.from_numpy(np.ascontiguousarray(x)) for x in masks ])) target.gt_masks = masks if len(annos) and "keypoints" in annos[0]: kpts = [obj.get("keypoints", []) for obj in annos] target.gt_keypoints = Keypoints(kpts) if len(annos) and load_attributes: attributes = -torch.ones( (len(annos), max_attr_per_ins), dtype=torch.int64) for idx, anno in enumerate(annos): if "attribute_ids" in anno: for jdx, attr_id in enumerate(anno["attribute_ids"]): attributes[idx, jdx] = attr_id target.gt_attributes = attributes return target
def convert_to_coco_dict(dataset_name): """ Convert a dataset in detectron2's standard format into COCO json format Generic dataset description can be found here: https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset COCO data format description can be found here: http://cocodataset.org/#format-data Args: dataset_name: name of the source dataset must be registered in DatastCatalog and in detectron2's standard format Returns: coco_dict: serializable dict in COCO json format """ dataset_dicts = DatasetCatalog.get(dataset_name) metadata = MetadataCatalog.get(dataset_name) # unmap the category mapping ids for COCO if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): reverse_id_mapping = { v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items() } reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[ contiguous_id] # noqa else: reverse_id_mapper = lambda contiguous_id: contiguous_id # noqa categories = [{ "id": reverse_id_mapper(id), "name": name } for id, name in enumerate(metadata.thing_classes)] logger.info("Converting dataset dicts into COCO format") coco_images = [] coco_annotations = [] for image_id, image_dict in enumerate(dataset_dicts): coco_image = { "id": image_dict.get("image_id", image_id), "width": image_dict["width"], "height": image_dict["height"], "file_name": image_dict["file_name"], } coco_images.append(coco_image) anns_per_image = image_dict["annotations"] for annotation in anns_per_image: # create a new dict with only COCO fields coco_annotation = {} # COCO requirement: XYWH box format bbox = annotation["bbox"] bbox_mode = annotation["bbox_mode"] bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS) # COCO requirement: instance area if "segmentation" in annotation: # Computing areas for instances by counting the pixels segmentation = annotation["segmentation"] # TODO: check segmentation type: RLE, BinaryMask or Polygon polygons = PolygonMasks([segmentation]) area = polygons.area()[0].item() else: # Computing areas using bounding boxes bbox_xy = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) area = Boxes([bbox_xy]).area()[0].item() # COCO requirement: # linking annotations to images # "id" field must start with 1 coco_annotation["id"] = len(coco_annotations) + 1 coco_annotation["image_id"] = coco_image["id"] coco_annotation["bbox"] = [round(float(x), 3) for x in bbox] coco_annotation["area"] = area coco_annotation["iscrowd"] = annotation.get("iscrowd", 0) coco_annotation["category_id"] = reverse_id_mapper( annotation["category_id"]) if "segmentation" in annotation: coco_annotation["segmentation"] = annotation["segmentation"] coco_annotations.append(coco_annotation) logger.info( "Conversion finished, " f"num images: {len(coco_images)}, num annotations: {len(coco_annotations)}" ) info = { "date_created": str(datetime.datetime.now()), "description": "Automatically generated COCO json file for Detectron2.", } coco_dict = { "info": info, "images": coco_images, "annotations": coco_annotations, "categories": categories, "licenses": None, } return coco_dict
def convert_to_coco_dict(dataset_name): """ Convert an instance detection/segmentation or keypoint detection dataset in detectron2's standard format into COCO json format. Generic dataset description can be found here: https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset COCO data format description can be found here: http://cocodataset.org/#format-data Args: dataset_name (str): name of the source dataset Must be registered in DatastCatalog and in detectron2's standard format. Must have corresponding metadata "thing_classes" Returns: coco_dict: serializable dict in COCO json format """ dataset_dicts = DatasetCatalog.get(dataset_name) metadata = MetadataCatalog.get(dataset_name) # unmap the category mapping ids for COCO if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): reverse_id_mapping = {v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items()} reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[contiguous_id] # noqa else: reverse_id_mapper = lambda contiguous_id: contiguous_id # noqa # categories = [ # {"id": reverse_id_mapper(id), "name": name} # for id, name in enumerate(metadata.thing_classes) # ] categories = [ {"id": 1, "name": 'lesion'} ] logger.info("Converting dataset dicts into COCO format") coco_images = [] coco_annotations = [] for image_id, image_dict in enumerate(dataset_dicts): coco_image = { "id": image_dict.get("image_id", image_id), "width": int(image_dict["width"]), "height": int(image_dict["height"]), "file_name": str(image_dict["file_name"]), } coco_images.append(coco_image) anns_per_image = image_dict.get("annotations", []) for annotation in anns_per_image: # create a new dict with only COCO fields coco_annotation = {} # COCO requirement: XYWH box format for axis-align and XYWHA for rotated bbox = annotation["bbox"] if isinstance(bbox, np.ndarray): if bbox.ndim != 1: raise ValueError(f"bbox has to be 1-dimensional. Got shape={bbox.shape}.") bbox = bbox.tolist() if len(bbox) not in [4, 5]: raise ValueError(f"bbox has to has length 4 or 5. Got {bbox}.") from_bbox_mode = annotation["bbox_mode"] to_bbox_mode = BoxMode.XYWH_ABS if len(bbox) == 4 else BoxMode.XYWHA_ABS bbox = BoxMode.convert(bbox, from_bbox_mode, to_bbox_mode) # COCO requirement: instance area if "segmentation" in annotation: # Computing areas for instances by counting the pixels segmentation = annotation["segmentation"] # TODO: check segmentation type: RLE, BinaryMask or Polygon if isinstance(segmentation, list): polygons = PolygonMasks([segmentation]) area = polygons.area()[0].item() elif isinstance(segmentation, dict): # RLE area = mask_util.area(segmentation).item() else: raise TypeError(f"Unknown segmentation type {type(segmentation)}!") else: # Computing areas using bounding boxes if to_bbox_mode == BoxMode.XYWH_ABS: bbox_xy = BoxMode.convert(bbox, to_bbox_mode, BoxMode.XYXY_ABS) area = Boxes([bbox_xy]).area()[0].item() else: area = RotatedBoxes([bbox]).area()[0].item() if "keypoints" in annotation: keypoints = annotation["keypoints"] # list[int] for idx, v in enumerate(keypoints): if idx % 3 != 2: # COCO's segmentation coordinates are floating points in [0, H or W], # but keypoint coordinates are integers in [0, H-1 or W-1] # For COCO format consistency we substract 0.5 # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163 keypoints[idx] = v - 0.5 if "num_keypoints" in annotation: num_keypoints = annotation["num_keypoints"] else: num_keypoints = sum(kp > 0 for kp in keypoints[2::3]) # COCO requirement: # linking annotations to images # "id" field must start with 1 coco_annotation["id"] = len(coco_annotations) + 1 coco_annotation["image_id"] = coco_image["id"] coco_annotation["bbox"] = [round(float(x), 3) for x in bbox] coco_annotation["area"] = float(area) coco_annotation["iscrowd"] = int(annotation.get("iscrowd", 0)) coco_annotation["category_id"] = int(reverse_id_mapper(annotation["category_id"])) # Add optional fields if "keypoints" in annotation: coco_annotation["keypoints"] = keypoints coco_annotation["num_keypoints"] = num_keypoints if "segmentation" in annotation: seg = coco_annotation["segmentation"] = annotation["segmentation"] if isinstance(seg, dict): # RLE counts = seg["counts"] if not isinstance(counts, str): # make it json-serializable seg["counts"] = counts.decode("ascii") coco_annotations.append(coco_annotation) logger.info( "Conversion finished, " f"#images: {len(coco_images)}, #annotations: {len(coco_annotations)}" ) info = { "date_created": str(datetime.datetime.now()), "description": "Automatically generated COCO json file for Detectron2.", } coco_dict = {"info": info, "images": coco_images, "categories": categories, "licenses": None} if len(coco_annotations) > 0: coco_dict["annotations"] = coco_annotations return coco_dict
def annotations_to_instances(annos, image_size, mask_format="polygon", max_num_planes=20): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Args: annos (list[dict]): a list of annotations, one per instance. image_size (tuple): height, width Returns: Instances: It will contains fields "gt_boxes", "gt_classes", "gt_masks", "gt_keypoints", if they can be obtained from `annos`. """ boxes = [ BoxMode.convert(obj["bbox"], BoxMode(obj["bbox_mode"]), BoxMode.XYXY_ABS) for obj in annos ] target = Instances(image_size) boxes = target.gt_boxes = Boxes(boxes) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if len(annos) and "segmentation" in annos[0]: segms = [obj["segmentation"] for obj in annos] if mask_format == "polygon": masks = PolygonMasks(segms) else: assert mask_format == "bitmask", mask_format masks = [] for segm in segms: if isinstance(segm, list): # polygon masks.append(polygons_to_bitmask(segm, *image_size)) elif isinstance(segm, dict): # COCO RLE masks.append(mask_util.decode(segm)) elif isinstance(segm, np.ndarray): assert ( segm.ndim == 2 ), "Expect segmentation of 2 dimensions, got {}.".format( segm.ndim) # mask array masks.append(segm) else: raise ValueError( "Cannot convert segmentation of type '{}' to BitMasks!" "Supported types are: polygons as list[list[float] or ndarray]," " COCO-style RLE as a dict, or a full-image segmentation mask " "as a 2D ndarray.".format(type(segm))) # torch.from_numpy does not support array with negative stride. masks = BitMasks( torch.stack([ torch.from_numpy(np.ascontiguousarray(x)) for x in masks ])) target.gt_masks = masks if len(annos) and "plane" in annos[0]: plane = [torch.tensor(obj["plane"]) for obj in annos] plane_idx = [torch.tensor([i]) for i in range(len(plane))] target.gt_planes = torch.stack(plane, dim=0) target.gt_plane_idx = torch.stack(plane_idx, dim=0) return target