def load_input_dataset(self, dataset): dataset_dict = {} dataset_list = list(DatasetCatalog.get(dataset)) for dic in dataset_list: key0 = dic["0"]["image_id"] key1 = dic["1"]["image_id"] key = key0 + "__" + key1 for i in range(len(dic["0"]["annotations"])): dic["0"]["annotations"][i]["bbox_mode"] = BoxMode( dic["0"]["annotations"][i]["bbox_mode"]) for i in range(len(dic["1"]["annotations"])): dic["1"]["annotations"][i]["bbox_mode"] = BoxMode( dic["1"]["annotations"][i]["bbox_mode"]) dataset_dict[key] = dic self.dataset_dict = dataset_dict
def test_json_deserializable(self): payload = '{"box_mode": 2}' obj = json.loads(payload) try: obj["box_mode"] = BoxMode(obj["box_mode"]) except Exception: self.fail("JSON deserialization failed")
def get_coco_dicts(data_dir): if 'train' in data_dir: file_path = '/media/tangyp/Data/coco/train2014' elif 'val' in data_dir: file_path = '/media/tangyp/Data/coco/val2014' json_file = data_dir coco = COCO(json_file) catIds = coco.getCatIds(catNms=['person']) imgIds = coco.getImgIds(catIds=catIds) imgs = coco.loadImgs(imgIds) dataset_dicts = [] for img in imgs: dataset_dict = {} new_img = {'file_name': os.path.join(file_path, img['file_name']), 'height': img['height'], 'width': img['width'], 'image_id': img['id']} annId = coco.getAnnIds(imgIds=img['id']) anns = coco.loadAnns(ids=annId) annotation = {} annotation['annotations'] = [] for ann in anns: new_ann = {'iscrowd': ann['iscrowd'], 'bbox': ann['bbox'], 'category_id': ann['category_id'], 'segmentation': ann['segmentation'], 'bbox_mode': BoxMode(1)} annotation['annotations'].append(new_ann) dataset_dict.update(new_img) dataset_dict.update(annotation) dataset_dicts.append(dataset_dict) return dataset_dicts
def get_custom_dicts(data_dir): if 'train' in data_dir: file_path = '/media/tangyp/Data/coco/train2014' elif 'val' in data_dir: file_path = '/media/tangyp/Data/coco/val2014' json_file = data_dir coco = COCO(json_file) with open(json_file) as f: imgs_anns = json.load(f) dataset_dicts = [] imgs = imgs_anns['images'] for img in imgs: dataset_dict = {} # new_img = {'file_name': '/media/tangyp/Data/coco/train2014' + '/' + img['file_name'], 'height': img['height'], 'width': img['width'], # 'image_id': img['id']} new_img = {'file_name': os.path.join(file_path, img['file_name']), 'height': img['height'], 'width': img['width'], 'image_id': img['id']} annId = coco.getAnnIds(imgIds=img['id']) anns = coco.loadAnns(ids=annId) annotation = {} annotation['annotations'] = [] for ann in anns: new_ann = {'iscrowd': ann['iscrowd'], 'bbox': ann['bbox'], 'category_id': ann['category_id'], 'segmentation': ann['segmentation'], 'bbox_mode': BoxMode(1)} annotation['annotations'].append(new_ann) dataset_dict.update(new_img) dataset_dict.update(annotation) dataset_dicts.append(dataset_dict) debug = 1 return dataset_dicts
def load_proposals_into_dataset(dataset_dicts, proposal_file): """ Load precomputed object proposals into the dataset. The proposal file should be a pickled dict with the following keys: - "ids": list[int] or list[str], the image ids - "boxes": list[np.ndarray], each is an Nx4 array of boxes corresponding to the image id - "objectness_logits": list[np.ndarray], each is an N sized array of objectness scores corresponding to the boxes. - "bbox_mode": the BoxMode of the boxes array. Defaults to ``BoxMode.XYXY_ABS``. Args: dataset_dicts (list[dict]): annotations in Detectron2 Dataset format. proposal_file (str): file path of pre-computed proposals, in pkl format. Returns: list[dict]: the same format as dataset_dicts, but added proposal field. """ logger = logging.getLogger(__name__) logger.info("Loading proposals from: {}".format(proposal_file)) with PathManager.open(proposal_file, "rb") as f: proposals = pickle.load(f, encoding="latin1") # Rename the key names in D1 proposal files rename_keys = {"indexes": "ids", "scores": "objectness_logits"} for key in rename_keys: if key in proposals: proposals[rename_keys[key]] = proposals.pop(key) # Fetch the indexes of all proposals that are in the dataset # Convert image_id to str since they could be int. img_ids = set({str(record["image_id"]) for record in dataset_dicts}) id_to_index = { str(id): i for i, id in enumerate(proposals["ids"]) if str(id) in img_ids } # Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS' bbox_mode = BoxMode(proposals["bbox_mode"] ) if "bbox_mode" in proposals else BoxMode.XYXY_ABS for record in dataset_dicts: print(record) print(id_to_index) # Get the index of the proposal i = id_to_index[str(record["image_id"])] boxes = proposals["boxes"][i] objectness_logits = proposals["objectness_logits"][i] # Sort the proposals in descending order of the scores inds = objectness_logits.argsort()[::-1] record["proposal_boxes"] = boxes[inds] record["proposal_objectness_logits"] = objectness_logits[inds] record["proposal_bbox_mode"] = bbox_mode return dataset_dicts
def load_proposals_into_dataset(dataset_dicts, proposal_file): """ Load precomputed object proposals into the dataset. Args: dataset_dicts (list[dict]): annotations in Detectron2 Dataset format. proposal_file (str): file path of pre-computed proposals, in pkl format. Returns: list[dict]: the same format as dataset_dicts, but added proposal field. """ logger = logging.getLogger(__name__) logger.info("Loading proposals from: {}".format(proposal_file)) with PathManager.open(proposal_file, "rb") as f: proposals = pickle.load(f, encoding="latin1") # Rename the key names in D1 proposal files rename_keys = {"indexes": "ids", "scores": "objectness_logits"} for key in rename_keys: if key in proposals: proposals[rename_keys[key]] = proposals.pop(key) # Remove proposals whose ids are not in dataset img_ids = set({entry["image_id"] for entry in dataset_dicts}) keep = [i for i, id in enumerate(proposals["ids"]) if id in img_ids] # Sort proposals by ids following the image order in dataset keep = sorted(keep) for key in ["boxes", "ids", "objectness_logits"]: proposals[key] = [proposals[key][i] for i in keep] # Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS' bbox_mode = BoxMode(proposals["bbox_mode"] ) if "bbox_mode" in proposals else BoxMode.XYXY_ABS for i, record in enumerate(dataset_dicts): # Sanity check that these proposals are for the correct image id assert record["image_id"] == proposals["ids"][i] boxes = proposals["boxes"][i] objectness_logits = proposals["objectness_logits"][i] # Sort the proposals in descending order of the scores inds = objectness_logits.argsort()[::-1] record["proposal_boxes"] = boxes[inds] record["proposal_objectness_logits"] = objectness_logits[inds] record["proposal_bbox_mode"] = bbox_mode return dataset_dicts
def bbox_convert(bboxes): from detectron2.structures import BoxMode bboxes_converted = BoxMode(0).convert(bboxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) # XYXY -> XYWH return bboxes_converted
def process(self, inputs, outputs): """ Args: inputs: the inputs to a model (e.g., GeneralizedRCNN). It is a list of dict. Each dict corresponds to an image and contains keys like "height", "width", "file_name", "image_id". outputs: the outputs of a model. It is a list of dicts with key "instances" that contains :class:`Instances`. """ for input, output in zip(inputs, outputs): prediction = {"0": {}, "1": {}} tmp_instances = {"0": {}, "1": {}} for i in range(2): # TODO this is ugly prediction[str(i)]["image_id"] = input[str(i)]["image_id"] prediction[str(i)]["file_name"] = input[str(i)]["file_name"] if "instances" in output[str(i)]: instances = output[str(i)]["instances"].to( self._cpu_device) prediction[str(i)]["instances"] = instances_to_coco_json( instances, input[str(i)]["image_id"]) tmp_instances[str(i)]["embeddingbox"] = { "pred_boxes": instances.pred_boxes, "scores": instances.scores, } if "proposals" in output[str(i)]: prediction[str(i)]["proposals"] = output[str( i)]["proposals"].to(self._cpu_device) if "annotations" in input[str(i)]: tmp_instances[str(i)]["gt_bbox"] = [ ann["bbox"] for ann in input[str(i)]["annotations"] ] if len(input[str(i)]["annotations"]) > 0: tmp_instances[str(i)]["gt_bbox"] = np.array( tmp_instances[str(i)]["gt_bbox"]).reshape( -1, 4) # xywh from coco original_mode = input[str( i)]["annotations"][0]["bbox_mode"] tmp_instances[str(i)]["gt_bbox"] = BoxMode.convert( tmp_instances[str(i)]["gt_bbox"], BoxMode(original_mode), BoxMode.XYXY_ABS, ) if hasattr(output[str(i)]["instances"], "pred_plane"): prediction[str(i)]["pred_plane"] = output[str( i)]["instances"].pred_plane.to( self._cpu_device) if output["depth"][str(i)] is not None: prediction[str(i)]["pred_depth"] = output["depth"][str( i)].to(self._cpu_device) xyz = self.depth2XYZ(output["depth"][str(i)]) prediction[str(i)] = self.override_offset( xyz, prediction[str(i)], output[str(i)]) depth_rst = get_depth_err( output["depth"][str(i)], input[str(i)]["depth"].to(self._device)) prediction[str(i)]["depth_l1_dist"] = depth_rst.to( self._cpu_device) if "pred_aff" in output: tmp_instances["pred_aff"] = output["pred_aff"].to( self._cpu_device) if "geo_aff" in output: tmp_instances["geo_aff"] = output["geo_aff"].to( self._cpu_device) if "emb_aff" in output: tmp_instances["emb_aff"] = output["emb_aff"].to( self._cpu_device) if "gt_corrs" in input: tmp_instances["gt_corrs"] = input["gt_corrs"] prediction["corrs"] = tmp_instances if "embedding" in self._plane_tasks: if self._eval_gt_box: aff_rst = get_affinity_label_score( tmp_instances, filter_iou=self._filter_iou, filter_score=self._filter_score, device=self._device, ) else: aff_rst = get_affinity_label_score( tmp_instances, hungarian_threshold=[], filter_iou=self._filter_iou, filter_score=self._filter_score, device=self._device, ) prediction.update(aff_rst) if "camera" in self._plane_tasks: camera_dict = { "logits": { "tran": output["camera"]["tran"].to(self._cpu_device), "rot": output["camera"]["rot"].to(self._cpu_device), }, "gts": { "tran": input["rel_pose"]["position"], "rot": input["rel_pose"]["rotation"], "tran_cls": input["rel_pose"]["tran_cls"], "rot_cls": input["rel_pose"]["rot_cls"], }, } prediction["camera"] = camera_dict self._predictions.append(prediction)
def annotations_to_instances(annos, image_size, mask_format="polygon", max_num_planes=20): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Args: annos (list[dict]): a list of annotations, one per instance. image_size (tuple): height, width Returns: Instances: It will contains fields "gt_boxes", "gt_classes", "gt_masks", "gt_keypoints", if they can be obtained from `annos`. """ boxes = [ BoxMode.convert(obj["bbox"], BoxMode(obj["bbox_mode"]), BoxMode.XYXY_ABS) for obj in annos ] target = Instances(image_size) boxes = target.gt_boxes = Boxes(boxes) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if len(annos) and "segmentation" in annos[0]: segms = [obj["segmentation"] for obj in annos] if mask_format == "polygon": masks = PolygonMasks(segms) else: assert mask_format == "bitmask", mask_format masks = [] for segm in segms: if isinstance(segm, list): # polygon masks.append(polygons_to_bitmask(segm, *image_size)) elif isinstance(segm, dict): # COCO RLE masks.append(mask_util.decode(segm)) elif isinstance(segm, np.ndarray): assert ( segm.ndim == 2 ), "Expect segmentation of 2 dimensions, got {}.".format( segm.ndim) # mask array masks.append(segm) else: raise ValueError( "Cannot convert segmentation of type '{}' to BitMasks!" "Supported types are: polygons as list[list[float] or ndarray]," " COCO-style RLE as a dict, or a full-image segmentation mask " "as a 2D ndarray.".format(type(segm))) # torch.from_numpy does not support array with negative stride. masks = BitMasks( torch.stack([ torch.from_numpy(np.ascontiguousarray(x)) for x in masks ])) target.gt_masks = masks if len(annos) and "plane" in annos[0]: plane = [torch.tensor(obj["plane"]) for obj in annos] plane_idx = [torch.tensor([i]) for i in range(len(plane))] target.gt_planes = torch.stack(plane, dim=0) target.gt_plane_idx = torch.stack(plane_idx, dim=0) return target