def create_instances(predictions, image_size): ret = Instances(image_size) score = np.asarray([x["score"] for x in predictions]) chosen = (score > args.conf_threshold).nonzero()[0] score = score[chosen] bbox = np.asarray([predictions[i]["bbox"] for i in chosen]).reshape(-1, 4) bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) labels = np.asarray( [dataset_id_map(predictions[i]["category_id"]) for i in chosen]) ret.scores = score ret.pred_boxes = Boxes(bbox) ret.pred_classes = labels try: ret.pred_masks = [predictions[i]["segmentation"] for i in chosen] except KeyError: pass return ret
def boxlist_to_tensor(boxlist, output_box_dim): if type(boxlist) == np.ndarray: box_tensor = torch.from_numpy(boxlist) elif type(boxlist) == list: if boxlist == []: return torch.zeros((0, output_box_dim), dtype=torch.float32) else: box_tensor = torch.FloatTensor(boxlist) else: raise Exception("Unrecognized boxlist type") input_box_dim = box_tensor.shape[1] if input_box_dim != output_box_dim: if input_box_dim == 4 and output_box_dim == 5: box_tensor = BoxMode.convert(box_tensor, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS) else: raise Exception( "Unable to convert from {}-dim box to {}-dim box".format( input_box_dim, output_box_dim)) return box_tensor
def gen_crop_transform_with_instance(crop_size, image_size, instances, crop_box=True): """ Generate a CropTransform so that the cropping region contains the center of the given instance. Args: crop_size (tuple): h, w in pixels image_size (tuple): h, w instance (dict): an annotation dict of one instance, in Detectron2's dataset format. """ instance = np.random.choice(instances), instance = instance[0] crop_size = np.asarray(crop_size, dtype=np.int32) bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS) center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5 assert (image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1] ), "The annotation bounding box is outside of the image!" assert (image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1] ), "Crop size is larger than image size!" min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0) max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0) max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32)) y0 = np.random.randint(min_yx[0], max_yx[0] + 1) x0 = np.random.randint(min_yx[1], max_yx[1] + 1) # if some instance is cropped extend the box if not crop_box: modified = True while modified: modified, x0, y0, crop_size = adjust_crop(x0, y0, crop_size, instances) return T.CropTransform(x0, y0, crop_size[1], crop_size[0])
def instances_to_json(instances, img_id=None): num_instance = len(instances) if num_instance == 0: return [] boxes = instances.pred_boxes.tensor.numpy() boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) boxes = boxes.tolist() scores = instances.scores.tolist() classes = instances.pred_classes.tolist() has_mask = instances.has("pred_masks_rle") if has_mask: rles = instances.pred_masks_rle has_keypoints = instances.has("pred_keypoints") if has_keypoints: keypoints = instances.pred_keypoints results = [] for k in range(num_instance): result = { "category_id": classes[k], "bbox": boxes[k], "score": scores[k], } if img_id: result["image_id"] = img_id if has_mask: result["segmentation"] = rles[k] if has_keypoints: # In COCO annotations, # keypoints coordinates are pixel indices. # However our predictions are floating point coordinates. # Therefore we subtract 0.5 to be consistent with the annotation format. # This is the inverse of data loading logic in `datasets/coco.py`. keypoints[k][:, :2] -= 0.5 result["keypoints"] = keypoints[k].flatten().tolist() results.append(result) return results
def prediction_to_dict(instances, img_id): """ Args: instances (Instances): the output of the model img_id (str): the image id in COCO Returns: list[dict]: the results in densepose evaluation format """ scores = instances.scores.tolist() segmentations = ToMaskConverter.convert( instances.pred_densepose, instances.pred_boxes, instances.image_size ) raw_boxes_xywh = BoxMode.convert( instances.pred_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS ) results = [] for k in range(len(instances)): densepose_results_quantized = quantize_densepose_chart_result( ToChartResultConverter.convert(instances.pred_densepose[k], instances.pred_boxes[k]) ) densepose_results_quantized.labels_uv_uint8 = ( densepose_results_quantized.labels_uv_uint8.cpu() ) segmentation = segmentations.tensor[k] segmentation_encoded = mask_utils.encode( np.require(segmentation.numpy(), dtype=np.uint8, requirements=["F"]) ) segmentation_encoded["counts"] = segmentation_encoded["counts"].decode("utf-8") result = { "image_id": img_id, "category_id": 1, # densepose only has one class "bbox": raw_boxes_xywh[k].tolist(), "score": scores[k], "densepose": densepose_results_quantized, "segmentation": segmentation_encoded, } results.append(result) return results
def draw_dataset_dict(self, dic): """ Draw annotations/segmentaions in Detectron2 Dataset format. Args: dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format. Returns: output (VisImage): image object with visualizations. """ annos = dic.get("annotations", None) if annos: boxes = [ BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS) if x["bbox_mode"] != BoxMode.XYWHA_ABS else x["bbox"] for x in annos ] labels = [x["category_id"] for x in annos] colors = None if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get( "thing_colors"): colors = [ self._jitter( [x / 255 for x in self.metadata.thing_colors[c]]) for c in labels ] names = self.metadata.get("thing_classes", None) if names: labels = [names[i] for i in labels] labels = [ "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "") for i, a in zip(labels, annos) ] self.overlay_instances(labels=labels, boxes=boxes, assigned_colors=colors) return self.output
def draw_dataset_dict(self, dic): """ Draw annotations/segmentaions in Detectron2 Dataset format. Args: dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format. Returns: output (VisImage): image object with visualizations. """ annos = dic.get("annotations", None) if annos: if "segmentation" in annos[0]: masks = [x["segmentation"] for x in annos] else: masks = None if "keypoints" in annos[0]: keypts = [x["keypoints"] for x in annos] keypts = np.array(keypts).reshape(len(annos), -1, 3) else: keypts = None boxes = [BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS) for x in annos] labels = [x["category_id"] for x in annos] names = self.metadata.get("thing_classes", None) if names: labels = [names[i] for i in labels] labels = [ "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "") for i, a in zip(labels, annos) ] self.overlay_instances(labels=labels, boxes=boxes, masks=masks, keypoints=keypts) sem_seg = dic.get("sem_seg", None) if sem_seg is None and "sem_seg_file_name" in dic: sem_seg = cv2.imread(dic["sem_seg_file_name"], cv2.IMREAD_GRAYSCALE) if sem_seg is not None: self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5) return self.output
def transform_instance_annotations( annotation, transforms, image_size, *, add_meta_infos=False ): """ Apply transforms to box and meta_infos annotations of a single instance. It will use `transforms.apply_box` for the box, and `transforms.apply_coords` for segmentation polygons & keypoints. If you need anything more specially designed for each data structure, you'll need to implement your own version of this function or the transforms. Args: annotation (dict): dict of instance annotations for a single instance. It will be modified in-place. transforms (TransformList or list[Transform]): image_size (tuple): the height, width of the transformed image add_meta_infos (bool): Whether to apply meta_infos. Returns: dict: the same input dict with fields "bbox", "meta_infos" transformed according to `transforms`. The "bbox_mode" field will be set to XYXY_ABS. """ if isinstance(transforms, (tuple, list)): transforms = T.TransformList(transforms) # bbox is 1d (per-instance bounding box) bbox = BoxMode.convert( annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS) # clip transformed bbox to image size bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0) annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1]) annotation["bbox_mode"] = BoxMode.XYXY_ABS # add meta_infos if add_meta_infos: meta_infos = dict() meta_infos = transforms.apply_meta_infos(meta_infos) annotation["meta_infos"] = meta_infos return annotation
def transform_proposals(dataset_dict, image_shape, transforms, min_box_side_len, proposal_topk): """ Apply transformations to the proposals in dataset_dict, if any. Args: dataset_dict (dict): a dict read from the dataset, possibly contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode" image_shape (tuple): height, width transforms (TransformList): min_box_side_len (int): keep proposals with at least this size proposal_topk (int): only keep top-K scoring proposals The input dict is modified in-place, with abovementioned keys removed. A new key "proposals" will be added. Its value is an `Instances` object which contains the transformed proposals in its field "proposal_boxes" and "objectness_logits". """ if "proposal_boxes" in dataset_dict: # Transform proposal boxes boxes = transforms.apply_box( BoxMode.convert( dataset_dict.pop("proposal_boxes"), dataset_dict.pop("proposal_bbox_mode"), BoxMode.XYXY_ABS, ) ) boxes = Boxes(boxes) objectness_logits = torch.as_tensor( dataset_dict.pop("proposal_objectness_logits").astype("float32") ) boxes.clip(image_shape) keep = boxes.nonempty(threshold=min_box_side_len) boxes = boxes[keep] objectness_logits = objectness_logits[keep] proposals = Instances(image_shape) proposals.proposal_boxes = boxes[:proposal_topk] proposals.objectness_logits = objectness_logits[:proposal_topk] dataset_dict["proposals"] = proposals
def transform_instance_annotations( annotation, transforms, image_size, *, keypoint_hflip_indices=None ): """ Apply transforms to box, segmentation and keypoints of annotations of a single instance. It will use `transforms.apply_box` for the box, and `transforms.apply_coords` for segmentation polygons & keypoints. If you need anything more specially designed for each data structure, you'll need to implement your own version of this function or the transforms. Args: annotation (dict): dict of instance annotations for a single instance. transforms (TransformList): image_size (tuple): the height, width of the transformed image keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`. Returns: dict: the same input dict with fields "bbox", "segmentation", "keypoints" transformed according to `transforms`. The "bbox_mode" field will be set to XYXY_ABS. """ bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS) # Note that bbox is 1d (per-instance bounding box) annotation["bbox"] = transforms.apply_box([bbox])[0] annotation["bbox_mode"] = BoxMode.XYXY_ABS if "segmentation" in annotation: # each instance contains 1 or more polygons polygons = [np.asarray(p).reshape(-1, 2) for p in annotation["segmentation"]] annotation["segmentation"] = [p.reshape(-1) for p in transforms.apply_polygons(polygons)] if "keypoints" in annotation: keypoints = transform_keypoint_annotations( annotation["keypoints"], transforms, image_size, keypoint_hflip_indices ) annotation["keypoints"] = keypoints return annotation
def process_annotation(self, ann, mask_side_len=28): # Parse annotation data img_info = self.coco.loadImgs(ids=[ann["image_id"]])[0] height, width = img_info["height"], img_info["width"] gt_polygons = [np.array(p, dtype=np.float64) for p in ann["segmentation"]] gt_bbox = BoxMode.convert(ann["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) gt_bit_mask = polygons_to_bitmask(gt_polygons, height, width) # Run rasterize .. torch_gt_bbox = torch.tensor(gt_bbox).to(dtype=torch.float32).reshape(-1, 4) box_bitmasks = { "polygon": PolygonMasks([gt_polygons]).crop_and_resize(torch_gt_bbox, mask_side_len)[0], "gridsample": rasterize_polygons_with_grid_sample(gt_bit_mask, gt_bbox, mask_side_len), "roialign": BitMasks(torch.from_numpy(gt_bit_mask[None, :, :])).crop_and_resize( torch_gt_bbox, mask_side_len )[0], } # Run paste .. results = defaultdict(dict) for k, box_bitmask in box_bitmasks.items(): padded_bitmask, scale = pad_masks(box_bitmask[None, :, :], 1) scaled_boxes = scale_boxes(torch_gt_bbox, scale) r = results[k] r["old"] = paste_mask_in_image_old( padded_bitmask[0], scaled_boxes[0], height, width, threshold=0.5 ) r["aligned"] = paste_masks_in_image( box_bitmask[None, :, :], Boxes(torch_gt_bbox), (height, width) )[0] table = [] for rasterize_method, r in results.items(): for paste_method, mask in r.items(): mask = np.asarray(mask) iou = iou_between_full_image_bit_masks(gt_bit_mask.astype("uint8"), mask) table.append((rasterize_method, paste_method, iou)) return table
def __call__(self, instances: Instances) -> DensePoseList: """ Convert DensePose predictions (an instance of `DensePoseOutput`) into DensePose annotations data (an instance of `DensePoseList`) """ boxes_xyxy_abs = instances.pred_boxes.tensor.clone().cpu() boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) dp_datas = [] for i, box_xywh in enumerate(boxes_xywh_abs): labels_i, result_i = resample_output_to_bbox( instances.pred_densepose[i], box_xywh, self._confidence_channels()) annotation_i = self._sample(labels_i.cpu(), result_i.cpu(), box_xywh) annotation_i[DensePoseDataRelative.S_KEY] = self._resample_mask( instances.pred_densepose[i]) dp_datas.append(DensePoseDataRelative(annotation_i)) # create densepose annotations on CPU dp_list = DensePoseList(dp_datas, boxes_xyxy_abs, instances.image_size) return dp_list
def __getitem__(self, index): ann = self.coco[index] # bbox transform. bbox = np.array([ann["bbox"]]) # xmin, ymin, w, h bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) # x1y1x2y2 bbox = Boxes(bbox) # mask transform. mask = PolygonMasks([ann["segmentation"]]) mask = mask.crop_and_resize(bbox.tensor, self.size).float() if self.transform: if torch.rand(1) < 0.5: mask = mask.flip(2) # introduce several noise. noise_matrix = VALUE_NOISE * torch.rand(mask.shape) mask = torch.where(mask > noise_matrix, mask - noise_matrix, noise_matrix) return mask
def annotations_to_instances(annos, image_size, mask_format="polygon"): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Args: annos (list[dict]): a list of instance annotations in one image, each element for one instance. image_size (tuple): height, width Returns: Instances: It will contain fields "gt_boxes", "gt_classes", "gt_masks", "gt_keypoints", if they can be obtained from `annos`. This is the format that builtin models expect. """ boxes = [ BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos ] target = Instances(image_size) boxes = target.gt_boxes = Boxes(boxes) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes # attributes = [obj["attributes"] for obj in annos] attributes = [] for obj in annos: if "attributes" in obj.keys(): attributes.append(obj["attributes"]) else: attributes.append([-1] * 16) attributes = torch.tensor(attributes, dtype=torch.int64) target.gt_attributes = attributes return target
def instances_to_json(instances): num_instance = len(instances) if num_instance == 0: return [] boxes = instances.gt_boxes.tensor.numpy() if boxes.shape[1] == 4: boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) boxes = boxes.tolist() # scores = instances.scores.tolist() classes = instances.gt_classes.tolist() results = [] for k in range(num_instance): result = { "category_id": classes[k], "bbox": boxes[k], "bbox_mode": BoxMode.XYWH_ABS, } results.append(result) return results
def _add_densepose_masks_as_segmentation(self, annotations: Dict[str, Any], image_shape_hw: Tuple[int, int]): for obj in annotations: if ("densepose" not in obj) or ("segmentation" in obj): continue # DP segmentation: torch.Tensor [S, S] of float32, S=256 segm_dp = torch.zeros_like(obj["densepose"].segm) segm_dp[obj["densepose"].segm > 0] = 1 segm_h, segm_w = segm_dp.shape bbox_segm_dp = torch.tensor((0, 0, segm_h - 1, segm_w - 1), dtype=torch.float32) # image bbox x0, y0, x1, y1 = (v.item() for v in BoxMode.convert( obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)) segm_aligned = (ROIAlign( (y1 - y0, x1 - x0), 1.0, 0, aligned=True).forward(segm_dp.view(1, 1, *segm_dp.shape), bbox_segm_dp).squeeze()) image_mask = torch.zeros(*image_shape_hw, dtype=torch.float32) image_mask[y0:y1, x0:x1] = segm_aligned # segmentation for BitMask: np.array [H, W] of np.bool obj["segmentation"] = image_mask >= 0.5
def gen_crop_transform_with_instance(crop_size, image_size, instance): """ Generate a CropTransform so that the cropping region contains the center of the given instance. Args: crop_size (tuple): h, w in pixels image_size (tuple): h, w instance (dict): an annotation dict of one instance, in Detectron2's dataset format. """ crop_size = np.asarray(crop_size, dtype=np.int32) bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS) center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5 min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0) max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0) max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32)) y0 = np.random.randint(min_yx[0], max_yx[0] + 1) x0 = np.random.randint(min_yx[1], max_yx[1] + 1) return T.CropTransform(x0, y0, crop_size[1], crop_size[0])
def annotations_to_instances(bboxes, bbox_classes, image_size): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Args: bboxes (ndarray): numpy array of shape (K, 4) where K denotes no. of objects in the image and 4 bounding box coordinate for each object bbox_classes (ndarray): numpy array of shape (K,) holding dummy values for class label where K denotes no. of objects in the image image_size (tuple): height, width Returns: Instances: It will contain fields "bboxes", "classes", This is the format that builtin Detectron models expect. """ boxes = [ BoxMode.convert(obj, BoxMode.XYXY_ABS, BoxMode.XYXY_ABS) for obj in bboxes ] target = Instances(image_size) target.bboxes = Boxes(boxes) classes = [int(obj) for obj in bbox_classes] classes = torch.tensor(classes, dtype=torch.int64) target.classes = classes return target
def create_instances(prediction, image_size): ret = Instances(image_size) scores = [] pred_boxes = [] pred_classes = [] for instance in prediction["instances"]: scores.append(instance["score"]) pred_boxes.append(instance["bbox"]) pred_classes.append(instance["category_id"]) scores = np.asarray(scores) pred_boxes = np.asarray(pred_boxes).reshape(-1, 4) pred_boxes = BoxMode.convert(pred_boxes, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) labels = np.asarray(pred_classes) ret.scores = scores ret.pred_boxes = Boxes(pred_boxes) ret.pred_classes = labels return ret
def annotations_to_instances(annos, image_size, mask_format="polygon"): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Args: annos (list[dict]): a list of instance annotations in one image, each element for one instance. image_size (tuple): height, width Returns: Instances: It will contain fields "gt_boxes", "gt_classes", "gt_masks", "gt_keypoints", if they can be obtained from `annos`. This is the format that builtin models expect. """ boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos] target = Instances(image_size) boxes = target.gt_boxes = Boxes(boxes) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if len(annos) and "segmentation" in annos[0]: polygons = [obj["segmentation"] for obj in annos] if mask_format == "polygon": masks = PolygonMasks(polygons) else: assert mask_format == "bitmask", mask_format masks = BitMasks.from_polygon_masks(polygons, *image_size) target.gt_masks = masks if len(annos) and "keypoints" in annos[0]: kpts = [obj.get("keypoints", []) for obj in annos] target.gt_keypoints = Keypoints(kpts) return target
def annotations_to_instances(annos, image_size): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Args: annos (list[dict]): a list of instance annotations in one image, each element for one instance. image_size (tuple): height, width Returns: Instances: It will contain fields "gt_boxes", "gt_classes", "isactive", "gt_actions" if they can be obtained from `annos`. This is the format that builtin models expect. """ boxes = [ BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos ] target = Instances(image_size) boxes = target.gt_boxes = Boxes(boxes) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if len(annos) and "isactive" in annos[0]: isactive = [obj["isactive"] for obj in annos] isactive = torch.tensor(isactive, dtype=torch.int64) target.gt_isactive = isactive if len(annos) and "actions" in annos[0]: actions = np.stack([obj.get("actions", []) for obj in annos], axis=0) target.gt_actions = Interactions(actions) return target
def instances_to_json(self, instances, img_id): num_instance = len(instances) if num_instance == 0: return [] boxes = instances.pred_boxes.tensor.numpy() if boxes.shape[1] == 4: boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) boxes = boxes.tolist() scores = instances.scores.tolist() classes = instances.pred_classes.tolist() results = [] for k in range(num_instance): result = { "image_id": img_id, "category_id": classes[k], "bbox": boxes[k], "score": scores[k], } results.append(result) return results
def predictor_output_with_fine_and_coarse_segm_to_mask( predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType) -> BitMasks: """ Convert predictor output with coarse and fine segmentation to a mask. Assumes that predictor output has the following attributes: - coarse_segm (tensor of size [N, D, H, W]): coarse segmentation unnormalized scores for N instances; D is the number of coarse segmentation labels, H and W is the resolution of the estimate - fine_segm (tensor of size [N, C, H, W]): fine segmentation unnormalized scores for N instances; C is the number of fine segmentation labels, H and W is the resolution of the estimate Args: predictor_output: DensePose predictor output to be converted to mask boxes (Boxes): bounding boxes that correspond to the DensePose predictor outputs image_size_hw (tuple [int, int]): image height Himg and width Wimg Return: BitMasks that contain a bool tensor of size [N, Himg, Wimg] with a mask of the size of the image for each instance """ H, W = image_size_hw boxes_xyxy_abs = boxes.tensor.clone() boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) N = len(boxes_xywh_abs) masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device) for i in range(len(boxes_xywh_abs)): box_xywh = make_int_box(boxes_xywh_abs[i]) labels_i = resample_fine_and_coarse_segm_to_bbox( predictor_output[i], box_xywh) x, y, w, h = box_xywh masks[i, y:y + h, x:x + w] = labels_i > 0 return BitMasks(masks)
def _add_densepose_body_semantics(self, annotations: List[Any], image_shape_hw: Tuple[int, int]): body_semantics = torch.zeros( [1, 1, image_shape_hw[0], image_shape_hw[1]], dtype=torch.int) for obj in annotations: if ("densepose" not in obj) or obj["densepose"] is None: continue segm_dp = obj["densepose"].segm # segm_h, segm_w = segm_dp.shape # image bbox x0, y0, x1, y1 = (v.round().astype( np.int).item() for v in BoxMode.convert( obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)) y1 = min(y1, image_shape_hw[0] - 1) x1 = min(x1, image_shape_hw[1] - 1) segm_dp = F.interpolate(segm_dp.view(1, 1, *segm_dp.shape), size=(y1 - y0, x1 - x0)) body_semantics_tmp = torch.zeros_like(body_semantics) body_semantics_tmp[:, :, y0:y1, x0:x1] = segm_dp # pdb.set_trace() body_semantics_tmp *= (body_semantics == 0).int() body_semantics += body_semantics_tmp # rois = torch.tensor([[0, x0,y0,x1-x0,y1-y0]]) # rois = torch.tensor([[0, 100,100,500,500]]) # segm_aligned = ( # ROIAlign(image_shape_hw, 1.0, 0, aligned=True).forward(segm_dp.view(1, 1, *segm_dp.shape), rois) # .squeeze() # ) # pdb.set_trace() # # image_mask = torch.zeros(*image_shape_hw, dtype=torch.float32) # # image_mask[y0:y1, x0:x1] = segm_aligned # # # segmentation for BitMask: np.array [H, W] of np.bool # # pdb.set_trace() # # obj["body_semantics"] = image_mask >= 0.5 # obj["body_semantics"] = obj["densepose"].segm return body_semantics.squeeze()
def get_transform(self, image: np.ndarray, annotations: List[Any]) -> Transform: """ This function will modify instances to set the iscrowd flag to 1 for annotations not picked. It relies on the dataset mapper to filter those items out """ assert isinstance(annotations, (list, tuple)), annotations assert all("bbox" in x for x in annotations), annotations assert all("bbox_mode" in x for x in annotations), annotations image_size = image.shape[:2] # filter out iscrowd annotations = [x for x in annotations if x.get("iscrowd", 0) == 0] if len(annotations) == 0: return NoOpTransform() sel_index = np.random.randint(len(annotations)) # set iscrowd flag of other annotations to 1 so that they will be # filtered out by the datset mapper (https://fburl.com/diffusion/fg64cb4h) for idx, instance in enumerate(annotations): if idx != sel_index: instance["iscrowd"] = 1 instance = annotations[sel_index] bbox_xywh = BoxMode.convert( instance["bbox"], instance["bbox_mode"], BoxMode.XYWH_ABS ) scale = np.random.uniform(*self.crop_scale) bbox_xywh = bu.scale_bbox_center(bbox_xywh, scale) bbox_xywh = bu.clip_box_xywh(bbox_xywh, image_size).int() return CropTransform( *bbox_xywh.tolist(), orig_h=image_size[0], orig_w=image_size[1] )
def prediction_to_json(instances, img_id): """ Args: instances (Instances): the output of the model img_id (str): the image id in COCO Returns: list[dict]: the results in densepose evaluation format """ scores = instances.scores.tolist() segmentations = densepose_to_mask(instances) boxes = instances.pred_boxes.tensor.clone() boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) instances.pred_densepose = instances.pred_densepose.to_result(boxes) results = [] for k in range(len(instances)): densepose = instances.pred_densepose[k] segmentation = segmentations.tensor[k] segmentation_encoded = mask_utils.encode( np.require(segmentation.numpy(), dtype=np.uint8, requirements=["F"])) segmentation_encoded["counts"] = segmentation_encoded["counts"].decode( "utf-8") result = { "image_id": img_id, "category_id": 1, # densepose only has one class "bbox": densepose[1], "score": scores[k], "densepose": densepose, "segmentation": segmentation_encoded, } results.append(result) return results
def aug_gt_instances_to_coco_json(instances, img_id, output_height, output_width): num_instance = len(instances) if num_instance == 0: return [] # 1. scale box to output size img_size = instances.image_size # h, w scale_x, scale_y = (output_width / img_size[1], output_height / img_size[0]) results = Instances((output_height, output_width), **instances.get_fields()) output_boxes = instances.gt_boxes output_boxes.scale(scale_x, scale_y) # xyxy output_boxes.clip(results.image_size) instances = results[output_boxes.nonempty()] # 2. convert to coco boxes = instances.gt_boxes.tensor.numpy() boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) boxes = boxes.tolist() classes = instances.gt_classes.tolist() results = [] for k in range(num_instance): result = { "image_id": img_id, "category_id": classes[k] + 1, "bbox": boxes[k], "area": boxes[k][2] * boxes[k][3], "iscrowd": 0, } results.append(result) return results
def instances_to_coco_json(instances, img_id): """ Dump an "Instances" object to a COCO-format json that's used for evaluation. Args: instances (Instances): img_id (int): the image id Returns: list[dict]: list of json annotations in COCO format. """ num_instance = len(instances) if num_instance == 0: print("no pre") return [] boxes = instances.pred_boxes.tensor.numpy() boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) boxes = boxes.tolist() scores = instances.scores.tolist() classes = instances.pred_classes.tolist() results = [] id_1,id_2,id_3,id_4=0,0,0,0 for k in range(num_instance): if classes[k]==0: id_1+=1 if classes[k]==1: id_2+=1 if classes[k]==2: id_3+=1 if classes[k]==3: id_4+=1 result = {"image_id": img_id,"category_id": classes[k]+1,"bbox": boxes[k],"score": scores[k],} results.append(result) cid=[id_1,id_2,id_3,id_4] return results,num_instance,cid
def transform_instance_annotations(annotation, transforms, image_size): """ Apply transforms to box of annotations of a single instance. It will use `transforms.apply_box` for the box,. If you need anything more specially designed for each data structure, you'll need to implement your own version of this function or the transforms. Args: annotation (dict): dict of instance annotations for a single instance. transforms (TransformList): image_size (tuple): the height, width of the transformed image Returns: dict: the same input dict with fields "bbox" transformed according to `transforms`. The "bbox_mode" field will be set to XYXY_ABS. """ bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS) # Note that bbox is 1d (per-instance bounding box) annotation["bbox"] = transforms.apply_box([bbox])[0] annotation["bbox_mode"] = BoxMode.XYXY_ABS return annotation
def _evaluate_box_proposals(dataset_predictions, coco_api, thresholds=None, area="all", limit=None): """ Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { "all": 0, "small": 1, "medium": 2, "large": 3, "96-128": 4, "128-256": 5, "256-512": 6, "512-inf": 7, } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], ] # 512-inf assert area in areas, "Unknown area range: {}".format(area) area_range = area_ranges[areas[area]] gt_overlaps = [] num_pos = 0 for prediction_dict in dataset_predictions: predictions = prediction_dict["proposals"] # sort predictions in descending order # TODO maybe remove this and make it explicit in the documentation inds = predictions.objectness_logits.sort(descending=True)[1] predictions = predictions[inds] ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"]) anno = coco_api.loadAnns(ann_ids) gt_boxes = [ BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno if obj["iscrowd"] == 0 ] gt_boxes = torch.as_tensor(gt_boxes).reshape( -1, 4) # guard against no boxes gt_boxes = Boxes(gt_boxes) gt_areas = torch.as_tensor( [obj["area"] for obj in anno if obj["iscrowd"] == 0]) if len(gt_boxes) == 0 or len(predictions) == 0: continue valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) gt_boxes = gt_boxes[valid_gt_inds] num_pos += len(gt_boxes) if len(gt_boxes) == 0: continue if limit is not None and len(predictions) > limit: predictions = predictions[:limit] overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes) _gt_overlaps = torch.zeros(len(gt_boxes)) for j in range(min(len(predictions), len(gt_boxes))): # find which proposal box maximally covers each gt box # and get the iou amount of coverage for each gt box max_overlaps, argmax_overlaps = overlaps.max(dim=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ovr, gt_ind = max_overlaps.max(dim=0) assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps.append(_gt_overlaps) gt_overlaps = (torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32)) gt_overlaps, _ = torch.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) recalls = torch.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { "ar": ar, "recalls": recalls, "thresholds": thresholds, "gt_overlaps": gt_overlaps, "num_pos": num_pos, }