def test_vis(): dset_name = sys.argv[1] assert dset_name in DatasetCatalog.list() meta = MetadataCatalog.get(dset_name) dprint("MetadataCatalog: ", meta) objs = meta.objs t_start = time.perf_counter() dicts = DatasetCatalog.get(dset_name) logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start)) dirname = "output/{}-data-vis".format(dset_name) os.makedirs(dirname, exist_ok=True) for d in dicts: img = read_image_cv2(d["file_name"], format="BGR") depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0 anno = d["annotations"][0] # only one instance per image imH, imW = img.shape[:2] mask = cocosegm2mask(anno["segmentation"], imH, imW) bbox = anno["bbox"] bbox_mode = anno["bbox_mode"] bbox_xyxy = np.array(BoxMode.convert(bbox, bbox_mode, BoxMode.XYXY_ABS)) kpt3d = anno["bbox3d_and_center"] quat = anno["quat"] trans = anno["trans"] R = quat2mat(quat) # 0-based label cat_id = anno["category_id"] K = d["cam"] kpt_2d = misc.project_pts(kpt3d, K, R, trans) # # TODO: visualize pose and keypoints label = objs[cat_id] # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels) img_vis = vis_image_mask_bbox_cv2(img, [mask], bboxes=[bbox_xyxy], labels=[label]) img_vis_kpt2d = img.copy() img_vis_kpt2d = misc.draw_projected_box3d( img_vis_kpt2d, kpt_2d, middle_color=None, bottom_color=(128, 128, 128) ) xyz_info = mmcv.load(anno["xyz_path"]) xyz = np.zeros((imH, imW, 3), dtype=np.float32) xyz_crop = xyz_info["xyz_crop"].astype(np.float32) x1, y1, x2, y2 = xyz_info["xyxy"] xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop xyz_show = get_emb_show(xyz) grid_show( [img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]], img_vis_kpt2d[:, :, [2, 1, 0]], depth, xyz_show], ["img", "vis_img", "img_vis_kpts2d", "depth", "emb_show"], row=2, col=3, )
def transform_instance_annotations(annotation, transforms, image_size, *, keypoint_hflip_indices=None): """ NOTE: Adapted from detection_utils. Apply transforms to box, segmentation, keypoints, etc. of annotations of a single instance. It will use `transforms.apply_box` for the box, and `transforms.apply_coords` for segmentation polygons & keypoints. If you need anything more specially designed for each data structure, you'll need to implement your own version of this function or the transforms. Args: annotation (dict): dict of instance annotations for a single instance. transforms (TransformList): image_size (tuple): the height, width of the transformed image keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`. Returns: dict: the same input dict with fields "bbox", "segmentation", "keypoints" transformed according to `transforms`. The "bbox_mode" field will be set to XYXY_ABS. """ im_H, im_W = image_size bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS) # Note that bbox is 1d (per-instance bounding box) annotation["bbox"] = np.array(transforms.apply_box([bbox])[0]) annotation["bbox_mode"] = BoxMode.XYXY_ABS if "segmentation" in annotation: # NOTE: here we transform segms to binary masks (interp is nearest by default) mask = transforms.apply_segmentation( cocosegm2mask(annotation["segmentation"], h=im_H, w=im_W)) annotation["segmentation"] = mask if "keypoints" in annotation: keypoints = utils.transform_keypoint_annotations( annotation["keypoints"], transforms, image_size, keypoint_hflip_indices) annotation["keypoints"] = keypoints if "centroid_2d" in annotation: annotation["centroid_2d"] = transforms.apply_coords( np.array(annotation["centroid_2d"]).reshape(1, 2)).flatten() return annotation
def read_data(self, dataset_dict): """load image and annos random shift & scale bbox; crop, rescale.""" cfg = self.cfg r_head_cfg = cfg.MODEL.CDPN.ROT_HEAD pnp_net_cfg = cfg.MODEL.CDPN.PNP_NET dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below dataset_name = dataset_dict["dataset_name"] image = read_image_cv2(dataset_dict["file_name"], format=self.img_format) # should be consistent with the size in dataset_dict utils.check_image_size(dataset_dict, image) im_H_ori, im_W_ori = image.shape[:2] # currently only replace bg for train ############################### if self.split == "train": # some synthetic data already has bg, img_type should be real or something else but not syn img_type = dataset_dict.get("img_type", "real") if img_type == "syn": log_first_n(logging.WARNING, "replace bg", n=10) assert "segmentation" in dataset_dict["inst_infos"] mask = cocosegm2mask( dataset_dict["inst_infos"]["segmentation"], im_H_ori, im_W_ori) image, mask_trunc = self.replace_bg(image.copy(), mask, return_mask=True) else: # real image if np.random.rand() < cfg.INPUT.CHANGE_BG_PROB: log_first_n(logging.WARNING, "replace bg for real", n=10) assert "segmentation" in dataset_dict["inst_infos"] mask = cocosegm2mask( dataset_dict["inst_infos"]["segmentation"], im_H_ori, im_W_ori) image, mask_trunc = self.replace_bg(image.copy(), mask, return_mask=True) else: mask_trunc = None # NOTE: maybe add or change color augment here =================================== if self.split == "train" and self.color_aug_prob > 0 and self.color_augmentor is not None: if np.random.rand() < self.color_aug_prob: if cfg.INPUT.COLOR_AUG_SYN_ONLY and img_type not in ["real"]: image = self._color_aug(image, self.color_aug_type) else: image = self._color_aug(image, self.color_aug_type) # other transforms (mainly geometric ones); # for 6d pose task, flip is now allowed in general except for some 2d keypoints methods image, transforms = T.apply_augmentations(self.augmentation, image) im_H, im_W = image_shape = image.shape[:2] # h, w # NOTE: scale camera intrinsic if necessary ================================ scale_x = im_W / im_W_ori scale_y = im_H / im_H_ori # NOTE: generally scale_x should be equal to scale_y if "cam" in dataset_dict: if im_W != im_W_ori or im_H != im_H_ori: dataset_dict["cam"][0] *= scale_x dataset_dict["cam"][1] *= scale_y K = dataset_dict["cam"].astype("float32") dataset_dict["cam"] = torch.as_tensor(K) input_res = cfg.MODEL.CDPN.BACKBONE.INPUT_RES out_res = cfg.MODEL.CDPN.BACKBONE.OUTPUT_RES # CHW -> HWC coord_2d = get_2d_coord_np(im_W, im_H, low=0, high=1).transpose(1, 2, 0) ################################################################################# if self.split != "train": # don't load annotations at test time test_bbox_type = cfg.TEST.TEST_BBOX_TYPE if test_bbox_type == "gt": bbox_key = "bbox" else: bbox_key = f"bbox_{test_bbox_type}" assert not self.flatten, "Do not use flattened dicts for test!" # here get batched rois roi_infos = {} # yapf: disable roi_keys = ["scene_im_id", "file_name", "cam", "im_H", "im_W", "roi_img", "inst_id", "roi_coord_2d", "roi_cls", "score", "roi_extent", bbox_key, "bbox_mode", "bbox_center", "roi_wh", "scale", "resize_ratio", "model_info", ] for _key in roi_keys: roi_infos[_key] = [] # yapf: enable # TODO: how to handle image without detections # filter those when load annotations or detections, implement a function for this # "annotations" means detections for inst_i, inst_infos in enumerate(dataset_dict["annotations"]): # inherent image-level infos roi_infos["scene_im_id"].append(dataset_dict["scene_im_id"]) roi_infos["file_name"].append(dataset_dict["file_name"]) roi_infos["im_H"].append(im_H) roi_infos["im_W"].append(im_W) roi_infos["cam"].append(dataset_dict["cam"].cpu().numpy()) # roi-level infos roi_infos["inst_id"].append(inst_i) roi_infos["model_info"].append(inst_infos["model_info"]) roi_cls = inst_infos["category_id"] roi_infos["roi_cls"].append(roi_cls) roi_infos["score"].append(inst_infos["score"]) # extent roi_extent = self._get_extents(dataset_name)[roi_cls] roi_infos["roi_extent"].append(roi_extent) bbox = BoxMode.convert(inst_infos[bbox_key], inst_infos["bbox_mode"], BoxMode.XYXY_ABS) bbox = np.array(transforms.apply_box([bbox])[0]) roi_infos[bbox_key].append(bbox) roi_infos["bbox_mode"].append(BoxMode.XYXY_ABS) x1, y1, x2, y2 = bbox bbox_center = np.array([0.5 * (x1 + x2), 0.5 * (y1 + y2)]) bw = max(x2 - x1, 1) bh = max(y2 - y1, 1) scale = max(bh, bw) * cfg.INPUT.DZI_PAD_SCALE scale = min(scale, max(im_H, im_W)) * 1.0 roi_infos["bbox_center"].append(bbox_center.astype("float32")) roi_infos["scale"].append(scale) roi_infos["roi_wh"].append(np.array([bw, bh], dtype=np.float32)) roi_infos["resize_ratio"].append(out_res / scale) # CHW, float32 tensor # roi_image roi_img = crop_resize_by_warp_affine( image, bbox_center, scale, input_res, interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1) roi_img = self.normalize_image(cfg, roi_img) roi_infos["roi_img"].append(roi_img.astype("float32")) # roi_coord_2d roi_coord_2d = crop_resize_by_warp_affine( coord_2d, bbox_center, scale, out_res, interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1) # HWC -> CHW roi_infos["roi_coord_2d"].append( roi_coord_2d.astype("float32")) for _key in roi_keys: if _key in ["roi_img", "roi_coord_2d"]: dataset_dict[_key] = torch.as_tensor( roi_infos[_key]).contiguous() elif _key in ["model_info", "scene_im_id", "file_name"]: # can not convert to tensor dataset_dict[_key] = roi_infos[_key] else: dataset_dict[_key] = torch.tensor(roi_infos[_key]) return dataset_dict ####################################################################################### # NOTE: currently assume flattened dicts for train assert self.flatten, "Only support flattened dicts for train now" inst_infos = dataset_dict.pop("inst_infos") dataset_dict["roi_cls"] = roi_cls = inst_infos["category_id"] # extent roi_extent = self._get_extents(dataset_name)[roi_cls] dataset_dict["roi_extent"] = torch.tensor(roi_extent, dtype=torch.float32) # load xyz ======================================================= xyz_info = mmcv.load(inst_infos["xyz_path"]) x1, y1, x2, y2 = xyz_info["xyxy"] # float16 does not affect performance (classification/regresion) xyz_crop = xyz_info["xyz_crop"] xyz = np.zeros((im_H, im_W, 3), dtype=np.float32) xyz[y1:y2 + 1, x1:x2 + 1, :] = xyz_crop # NOTE: full mask mask_obj = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype(np.bool).astype(np.float32) if cfg.INPUT.SMOOTH_XYZ: xyz = self.smooth_xyz(xyz) if cfg.TRAIN.VIS: xyz = self.smooth_xyz(xyz) # override bbox info using xyz_infos inst_infos["bbox"] = [x1, y1, x2, y2] inst_infos["bbox_mode"] = BoxMode.XYXY_ABS # USER: Implement additional transformations if you have other types of data # inst_infos.pop("segmentation") # NOTE: use mask from xyz anno = transform_instance_annotations(inst_infos, transforms, image_shape, keypoint_hflip_indices=None) # augment bbox =================================================== bbox_xyxy = anno["bbox"] bbox_center, scale = self.aug_bbox(cfg, bbox_xyxy, im_H, im_W) bw = max(bbox_xyxy[2] - bbox_xyxy[0], 1) bh = max(bbox_xyxy[3] - bbox_xyxy[1], 1) # CHW, float32 tensor ## roi_image ------------------------------------ roi_img = crop_resize_by_warp_affine( image, bbox_center, scale, input_res, interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1) roi_img = self.normalize_image(cfg, roi_img) # roi_coord_2d ---------------------------------------------------- roi_coord_2d = crop_resize_by_warp_affine( coord_2d, bbox_center, scale, out_res, interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1) ## roi_mask --------------------------------------- # (mask_trunc < mask_visib < mask_obj) mask_visib = anno["segmentation"].astype("float32") * mask_obj if mask_trunc is None: mask_trunc = mask_visib else: mask_trunc = mask_visib * mask_trunc.astype("float32") if cfg.TRAIN.VIS: mask_xyz_interp = cv2.INTER_LINEAR else: mask_xyz_interp = cv2.INTER_NEAREST # maybe truncated mask (true mask for rgb) roi_mask_trunc = crop_resize_by_warp_affine( mask_trunc[:, :, None], bbox_center, scale, out_res, interpolation=mask_xyz_interp) # use original visible mask to calculate xyz loss (try full obj mask?) roi_mask_visib = crop_resize_by_warp_affine( mask_visib[:, :, None], bbox_center, scale, out_res, interpolation=mask_xyz_interp) roi_mask_obj = crop_resize_by_warp_affine( mask_obj[:, :, None], bbox_center, scale, out_res, interpolation=mask_xyz_interp) ## roi_xyz ---------------------------------------------------- roi_xyz = crop_resize_by_warp_affine(xyz, bbox_center, scale, out_res, interpolation=mask_xyz_interp) # region label if r_head_cfg.NUM_REGIONS > 1: fps_points = self._get_fps_points(dataset_name)[roi_cls] roi_region = xyz_to_region(roi_xyz, fps_points) # HW dataset_dict["roi_region"] = torch.as_tensor( roi_region.astype(np.int32)).contiguous() roi_xyz = roi_xyz.transpose(2, 0, 1) # HWC-->CHW # normalize xyz to [0, 1] using extent roi_xyz[0] = roi_xyz[0] / roi_extent[0] + 0.5 roi_xyz[1] = roi_xyz[1] / roi_extent[1] + 0.5 roi_xyz[2] = roi_xyz[2] / roi_extent[2] + 0.5 if ("CE" in r_head_cfg.XYZ_LOSS_TYPE) or ( "cls" in cfg.MODEL.CDPN.NAME): # convert target to int for cls # assume roi_xyz has been normalized in [0, 1] roi_xyz_bin = np.zeros_like(roi_xyz) roi_x_norm = roi_xyz[0] roi_x_norm[roi_x_norm < 0] = 0 # clip roi_x_norm[roi_x_norm > 0.999999] = 0.999999 # [0, BIN-1] roi_xyz_bin[0] = np.asarray(roi_x_norm * r_head_cfg.XYZ_BIN, dtype=np.uint8) roi_y_norm = roi_xyz[1] roi_y_norm[roi_y_norm < 0] = 0 roi_y_norm[roi_y_norm > 0.999999] = 0.999999 roi_xyz_bin[1] = np.asarray(roi_y_norm * r_head_cfg.XYZ_BIN, dtype=np.uint8) roi_z_norm = roi_xyz[2] roi_z_norm[roi_z_norm < 0] = 0 roi_z_norm[roi_z_norm > 0.999999] = 0.999999 roi_xyz_bin[2] = np.asarray(roi_z_norm * r_head_cfg.XYZ_BIN, dtype=np.uint8) # the last bin is for bg roi_masks = { "trunc": roi_mask_trunc, "visib": roi_mask_visib, "obj": roi_mask_obj } roi_mask_xyz = roi_masks[r_head_cfg.XYZ_LOSS_MASK_GT] roi_xyz_bin[0][roi_mask_xyz == 0] = r_head_cfg.XYZ_BIN roi_xyz_bin[1][roi_mask_xyz == 0] = r_head_cfg.XYZ_BIN roi_xyz_bin[2][roi_mask_xyz == 0] = r_head_cfg.XYZ_BIN if "CE" in r_head_cfg.XYZ_LOSS_TYPE: dataset_dict["roi_xyz_bin"] = torch.as_tensor( roi_xyz_bin.astype("uint8")).contiguous() if "/" in r_head_cfg.XYZ_LOSS_TYPE and len( r_head_cfg.XYZ_LOSS_TYPE.split("/")[1]) > 0: dataset_dict["roi_xyz"] = torch.as_tensor( roi_xyz.astype("float32")).contiguous() else: dataset_dict["roi_xyz"] = torch.as_tensor( roi_xyz.astype("float32")).contiguous() # pose targets ---------------------------------------------------------------------- pose = inst_infos["pose"] allo_pose = egocentric_to_allocentric(pose) quat = inst_infos["quat"] allo_quat = mat2quat(allo_pose[:3, :3]) # ====== actually not needed ========== if pnp_net_cfg.ROT_TYPE == "allo_quat": dataset_dict["allo_quat"] = torch.as_tensor( allo_quat.astype("float32")) elif pnp_net_cfg.ROT_TYPE == "ego_quat": dataset_dict["ego_quat"] = torch.as_tensor(quat.astype("float32")) # rot6d elif pnp_net_cfg.ROT_TYPE == "ego_rot6d": dataset_dict["ego_rot6d"] = torch.as_tensor( mat_to_ortho6d_np(pose[:3, :3].astype("float32"))) elif pnp_net_cfg.ROT_TYPE == "allo_rot6d": dataset_dict["allo_rot6d"] = torch.as_tensor( mat_to_ortho6d_np(allo_pose[:3, :3].astype("float32"))) # log quat elif pnp_net_cfg.ROT_TYPE == "ego_log_quat": dataset_dict["ego_log_quat"] = quaternion_lf.qlog( torch.as_tensor(quat.astype("float32"))[None])[0] elif pnp_net_cfg.ROT_TYPE == "allo_log_quat": dataset_dict["allo_log_quat"] = quaternion_lf.qlog( torch.as_tensor(allo_quat.astype("float32"))[None])[0] # lie vec elif pnp_net_cfg.ROT_TYPE == "ego_lie_vec": dataset_dict["ego_lie_vec"] = lie_algebra.rot_to_lie_vec( torch.as_tensor(pose[:3, :3].astype("float32")[None]))[0] elif pnp_net_cfg.ROT_TYPE == "allo_lie_vec": dataset_dict["allo_lie_vec"] = lie_algebra.rot_to_lie_vec( torch.as_tensor(allo_pose[:3, :3].astype("float32"))[None])[0] else: raise ValueError(f"Unknown rot type: {pnp_net_cfg.ROT_TYPE}") dataset_dict["ego_rot"] = torch.as_tensor( pose[:3, :3].astype("float32")) dataset_dict["trans"] = torch.as_tensor( inst_infos["trans"].astype("float32")) dataset_dict["roi_points"] = torch.as_tensor( self._get_model_points(dataset_name)[roi_cls].astype("float32")) dataset_dict["sym_info"] = self._get_sym_infos(dataset_name)[roi_cls] dataset_dict["roi_img"] = torch.as_tensor( roi_img.astype("float32")).contiguous() dataset_dict["roi_coord_2d"] = torch.as_tensor( roi_coord_2d.astype("float32")).contiguous() dataset_dict["roi_mask_trunc"] = torch.as_tensor( roi_mask_trunc.astype("float32")).contiguous() dataset_dict["roi_mask_visib"] = torch.as_tensor( roi_mask_visib.astype("float32")).contiguous() dataset_dict["roi_mask_obj"] = torch.as_tensor( roi_mask_obj.astype("float32")).contiguous() dataset_dict["bbox_center"] = torch.as_tensor(bbox_center, dtype=torch.float32) dataset_dict["scale"] = scale dataset_dict["bbox"] = anno["bbox"] # NOTE: original bbox dataset_dict["roi_wh"] = torch.as_tensor( np.array([bw, bh], dtype=np.float32)) dataset_dict["resize_ratio"] = resize_ratio = out_res / scale z_ratio = inst_infos["trans"][2] / resize_ratio obj_center = anno["centroid_2d"] delta_c = obj_center - bbox_center dataset_dict["trans_ratio"] = torch.as_tensor( [delta_c[0] / bw, delta_c[1] / bh, z_ratio]).to(torch.float32) return dataset_dict
def test_vis(): dset_name = sys.argv[1] assert dset_name in DatasetCatalog.list() meta = MetadataCatalog.get(dset_name) dprint("MetadataCatalog: ", meta) objs = meta.objs t_start = time.perf_counter() dicts = DatasetCatalog.get(dset_name) logger.info("Done loading {} samples with {:.3f}s.".format( len(dicts), time.perf_counter() - t_start)) dirname = "output/{}-data-vis".format(dset_name) os.makedirs(dirname, exist_ok=True) for d in dicts: img = read_image_cv2(d["file_name"], format="BGR") depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0 imH, imW = img.shape[:2] annos = d["annotations"] masks = [ cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos ] bboxes = [anno["bbox"] for anno in annos] bbox_modes = [anno["bbox_mode"] for anno in annos] bboxes_xyxy = np.array([ BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes) ]) kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos] quats = [anno["quat"] for anno in annos] transes = [anno["trans"] for anno in annos] Rs = [quat2mat(quat) for quat in quats] # 0-based label cat_ids = [anno["category_id"] for anno in annos] K = d["cam"] kpts_2d = [ misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes) ] # # TODO: visualize pose and keypoints labels = [objs[cat_id] for cat_id in cat_ids] for _i in range(len(annos)): img_vis = vis_image_mask_bbox_cv2(img, masks[_i:_i + 1], bboxes=bboxes_xyxy[_i:_i + 1], labels=labels[_i:_i + 1]) img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i]) if "test" not in dset_name: xyz_path = annos[_i]["xyz_path"] xyz_info = mmcv.load(xyz_path) x1, y1, x2, y2 = xyz_info["xyxy"] xyz_crop = xyz_info["xyz_crop"].astype(np.float32) xyz = np.zeros((imH, imW, 3), dtype=np.float32) xyz[y1:y2 + 1, x1:x2 + 1, :] = xyz_crop xyz_show = get_emb_show(xyz) xyz_crop_show = get_emb_show(xyz_crop) img_xyz = img.copy() / 255.0 mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype("uint8") fg_idx = np.where(mask_xyz != 0) img_xyz[fg_idx[0], fg_idx[1], :] = xyz_show[fg_idx[0], fg_idx[1], :3] img_xyz_crop = img_xyz[y1:y2 + 1, x1:x2 + 1, :] img_vis_crop = img_vis[y1:y2 + 1, x1:x2 + 1, :] # diff mask diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1:y2 + 1, x1:x2 + 1] grid_show( [ img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]], img_vis_kpts2d[:, :, [2, 1, 0]], depth, # xyz_show, diff_mask_xyz, xyz_crop_show, img_xyz[:, :, [2, 1, 0]], img_xyz_crop[:, :, [2, 1, 0]], img_vis_crop, ], [ "img", "vis_img", "img_vis_kpts2d", "depth", "diff_mask_xyz", "xyz_crop_show", "img_xyz", "img_xyz_crop", "img_vis_crop", ], row=3, col=3, ) else: grid_show( [ img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]], img_vis_kpts2d[:, :, [2, 1, 0]], depth ], ["img", "vis_img", "img_vis_kpts2d", "depth"], row=2, col=2, )
def test_vis(): dset_name = sys.argv[1] assert dset_name in DatasetCatalog.list() meta = MetadataCatalog.get(dset_name) dprint("MetadataCatalog: ", meta) objs = meta.objs t_start = time.perf_counter() dicts = DatasetCatalog.get(dset_name) logger.info("Done loading {} samples with {:.3f}s.".format( len(dicts), time.perf_counter() - t_start)) dirname = "output/{}-data-vis".format(dset_name) os.makedirs(dirname, exist_ok=True) for d in dicts: img = read_image_cv2(d["file_name"], format="BGR") depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0 imH, imW = img.shape[:2] annos = d["annotations"] masks = [ cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos ] bboxes = [anno["bbox"] for anno in annos] bbox_modes = [anno["bbox_mode"] for anno in annos] bboxes_xyxy = np.array([ BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes) ]) kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos] quats = [anno["quat"] for anno in annos] transes = [anno["trans"] for anno in annos] Rs = [quat2mat(quat) for quat in quats] # 0-based label cat_ids = [anno["category_id"] for anno in annos] K = d["cam"] kpts_2d = [ misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes) ] # # TODO: visualize pose and keypoints labels = [objs[cat_id] for cat_id in cat_ids] # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels) img_vis = vis_image_mask_bbox_cv2(img, masks, bboxes=bboxes_xyxy, labels=labels) img_vis_kpts2d = img.copy() for anno_i in range(len(annos)): img_vis_kpts2d = misc.draw_projected_box3d(img_vis_kpts2d, kpts_2d[anno_i]) grid_show( [ img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]], img_vis_kpts2d[:, :, [2, 1, 0]], depth ], [f"img:{d['file_name']}", "vis_img", "img_vis_kpts2d", "depth"], row=2, col=2, )