def proj(R_est, t_est, R_gt, t_gt, K, pts): """Average distance of projections of object model vertices [px] - by Brachmann et al. (CVPR'16). :param R_est: 3x3 ndarray with the estimated rotation matrix. :param t_est: 3x1 ndarray with the estimated translation vector. :param R_gt: 3x3 ndarray with the ground-truth rotation matrix. :param t_gt: 3x1 ndarray with the ground-truth translation vector. :param K: 3x3 ndarray with an intrinsic camera matrix. :param pts: nx3 ndarray with 3D model points. :return: The calculated error. """ proj_est = misc.project_pts(pts, K, R_est, t_est) proj_gt = misc.project_pts(pts, K, R_gt, t_gt) e = np.linalg.norm(proj_est - proj_gt, axis=1).mean() return e
def test_vis(): dset_name = sys.argv[1] assert dset_name in DatasetCatalog.list() meta = MetadataCatalog.get(dset_name) dprint("MetadataCatalog: ", meta) objs = meta.objs t_start = time.perf_counter() dicts = DatasetCatalog.get(dset_name) logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start)) dirname = "output/{}-data-vis".format(dset_name) os.makedirs(dirname, exist_ok=True) for d in dicts: img = read_image_cv2(d["file_name"], format="BGR") depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0 anno = d["annotations"][0] # only one instance per image imH, imW = img.shape[:2] mask = cocosegm2mask(anno["segmentation"], imH, imW) bbox = anno["bbox"] bbox_mode = anno["bbox_mode"] bbox_xyxy = np.array(BoxMode.convert(bbox, bbox_mode, BoxMode.XYXY_ABS)) kpt3d = anno["bbox3d_and_center"] quat = anno["quat"] trans = anno["trans"] R = quat2mat(quat) # 0-based label cat_id = anno["category_id"] K = d["cam"] kpt_2d = misc.project_pts(kpt3d, K, R, trans) # # TODO: visualize pose and keypoints label = objs[cat_id] # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels) img_vis = vis_image_mask_bbox_cv2(img, [mask], bboxes=[bbox_xyxy], labels=[label]) img_vis_kpt2d = img.copy() img_vis_kpt2d = misc.draw_projected_box3d( img_vis_kpt2d, kpt_2d, middle_color=None, bottom_color=(128, 128, 128) ) xyz_info = mmcv.load(anno["xyz_path"]) xyz = np.zeros((imH, imW, 3), dtype=np.float32) xyz_crop = xyz_info["xyz_crop"].astype(np.float32) x1, y1, x2, y2 = xyz_info["xyxy"] xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop xyz_show = get_emb_show(xyz) grid_show( [img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]], img_vis_kpt2d[:, :, [2, 1, 0]], depth, xyz_show], ["img", "vis_img", "img_vis_kpts2d", "depth", "emb_show"], row=2, col=3, )
def proj_sym(R_est, t_est, R_gt, t_gt, K, pts, syms): """Average distance of projections of object model vertices [px] - by Brachmann et al. (CVPR'16). :param R_est: 3x3 ndarray with the estimated rotation matrix. :param t_est: 3x1 ndarray with the estimated translation vector. :param R_gt: 3x3 ndarray with the ground-truth rotation matrix. :param t_gt: 3x1 ndarray with the ground-truth translation vector. :param K: 3x3 ndarray with an intrinsic camera matrix. :param pts: nx3 ndarray with 3D model points. :return: The calculated error. """ proj_est = misc.project_pts(pts, K, R_est, t_est) es = [] for sym in syms: R_gt_sym = R_gt.dot(sym["R"]) t_gt_sym = R_gt.dot(sym["t"]) + t_gt proj_gt_sym = misc.project_pts(pts, K, R_gt_sym, t_gt_sym) e = np.linalg.norm(proj_est - proj_gt_sym, axis=1).mean() es.append(e) return min(es)
def mspd(R_est, t_est, R_gt, t_gt, K, pts, syms): """Maximum Symmetry-Aware Projection Distance (MSPD). See: http://bop.felk.cvut.cz/challenges/bop-challenge-2019/ :param R_est: 3x3 ndarray with the estimated rotation matrix. :param t_est: 3x1 ndarray with the estimated translation vector. :param R_gt: 3x3 ndarray with the ground-truth rotation matrix. :param t_gt: 3x1 ndarray with the ground-truth translation vector. :param K: 3x3 ndarray with the intrinsic camera matrix. :param pts: nx3 ndarray with 3D model points. :param syms: Set of symmetry transformations, each given by a dictionary with: - 'R': 3x3 ndarray with the rotation matrix. - 't': 3x1 ndarray with the translation vector. :return: The calculated error. """ proj_est = misc.project_pts(pts, K, R_est, t_est) es = [] for sym in syms: R_gt_sym = R_gt.dot(sym["R"]) t_gt_sym = R_gt.dot(sym["t"]) + t_gt proj_gt_sym = misc.project_pts(pts, K, R_gt_sym, t_gt_sym) es.append(np.linalg.norm(proj_est - proj_gt_sym, axis=1).max()) return min(es)
def test_vis(): dset_name = sys.argv[1] assert dset_name in DatasetCatalog.list() meta = MetadataCatalog.get(dset_name) dprint("MetadataCatalog: ", meta) objs = meta.objs t_start = time.perf_counter() dicts = DatasetCatalog.get(dset_name) logger.info("Done loading {} samples with {:.3f}s.".format( len(dicts), time.perf_counter() - t_start)) dirname = "output/{}-data-vis".format(dset_name) os.makedirs(dirname, exist_ok=True) for d in dicts: img = read_image_cv2(d["file_name"], format="BGR") depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0 imH, imW = img.shape[:2] annos = d["annotations"] masks = [ cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos ] bboxes = [anno["bbox"] for anno in annos] bbox_modes = [anno["bbox_mode"] for anno in annos] bboxes_xyxy = np.array([ BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes) ]) kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos] quats = [anno["quat"] for anno in annos] transes = [anno["trans"] for anno in annos] Rs = [quat2mat(quat) for quat in quats] # 0-based label cat_ids = [anno["category_id"] for anno in annos] K = d["cam"] kpts_2d = [ misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes) ] # # TODO: visualize pose and keypoints labels = [objs[cat_id] for cat_id in cat_ids] for _i in range(len(annos)): img_vis = vis_image_mask_bbox_cv2(img, masks[_i:_i + 1], bboxes=bboxes_xyxy[_i:_i + 1], labels=labels[_i:_i + 1]) img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i]) if "test" not in dset_name: xyz_path = annos[_i]["xyz_path"] xyz_info = mmcv.load(xyz_path) x1, y1, x2, y2 = xyz_info["xyxy"] xyz_crop = xyz_info["xyz_crop"].astype(np.float32) xyz = np.zeros((imH, imW, 3), dtype=np.float32) xyz[y1:y2 + 1, x1:x2 + 1, :] = xyz_crop xyz_show = get_emb_show(xyz) xyz_crop_show = get_emb_show(xyz_crop) img_xyz = img.copy() / 255.0 mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype("uint8") fg_idx = np.where(mask_xyz != 0) img_xyz[fg_idx[0], fg_idx[1], :] = xyz_show[fg_idx[0], fg_idx[1], :3] img_xyz_crop = img_xyz[y1:y2 + 1, x1:x2 + 1, :] img_vis_crop = img_vis[y1:y2 + 1, x1:x2 + 1, :] # diff mask diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1:y2 + 1, x1:x2 + 1] grid_show( [ img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]], img_vis_kpts2d[:, :, [2, 1, 0]], depth, # xyz_show, diff_mask_xyz, xyz_crop_show, img_xyz[:, :, [2, 1, 0]], img_xyz_crop[:, :, [2, 1, 0]], img_vis_crop, ], [ "img", "vis_img", "img_vis_kpts2d", "depth", "diff_mask_xyz", "xyz_crop_show", "img_xyz", "img_xyz_crop", "img_vis_crop", ], row=3, col=3, ) else: grid_show( [ img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]], img_vis_kpts2d[:, :, [2, 1, 0]], depth ], ["img", "vis_img", "img_vis_kpts2d", "depth"], row=2, col=2, )
def process(self, inputs, outputs, out_dict): """ Args: inputs: the inputs to a model. It is a list of dict. Each dict corresponds to an image and contains keys like "height", "width", "file_name", "image_id", "scene_id". outputs: """ cfg = self.cfg if cfg.TEST.USE_PNP: if cfg.TEST.PNP_TYPE.lower() == "ransac_pnp": return self.process_pnp_ransac(inputs, outputs, out_dict) elif cfg.TEST.PNP_TYPE.lower() == "net_iter_pnp": return self.process_net_and_pnp(inputs, outputs, out_dict, pnp_type="iter") elif cfg.TEST.PNP_TYPE.lower() == "net_ransac_pnp": return self.process_net_and_pnp(inputs, outputs, out_dict, pnp_type="ransac") elif cfg.TEST.PNP_TYPE.lower() == "net_ransac_pnp_rot": # use rot from PnP/RANSAC and translation from Net return self.process_net_and_pnp(inputs, outputs, out_dict, pnp_type="ransac_rot") else: raise NotImplementedError out_rots = out_dict["rot"].detach().to(self._cpu_device).numpy() out_transes = out_dict["trans"].detach().to(self._cpu_device).numpy() out_i = -1 for i, (_input, output) in enumerate(zip(inputs, outputs)): start_process_time = time.perf_counter() for inst_i in range(len(_input["roi_img"])): out_i += 1 file_name = _input["file_name"][inst_i] scene_im_id_split = _input["scene_im_id"][inst_i].split("/") K = _input["cam"][inst_i].cpu().numpy().copy() roi_label = _input["roi_cls"][inst_i] # 0-based label score = _input["score"][inst_i] roi_label, cls_name = self._maybe_adapt_label_cls_name( roi_label) if cls_name is None: continue scene_id = scene_im_id_split[0] im_id = int(scene_im_id_split[1]) # get pose rot_est = out_rots[inst_i] trans_est = out_transes[inst_i] if cfg.DEBUG: # visualize pose pose_est = np.hstack([rot_est, trans_est.reshape(3, 1)]) file_name = _input["file_name"][inst_i] if f"{int(scene_id)}/{im_id}" != "9/499": continue im_ori = mmcv.imread(file_name, "color") bbox = _input["bbox_est"][inst_i].cpu().numpy().copy() x1, y1, x2, y2 = bbox # center = np.array([(x1 + x2) / 2, (y1 + y2) / 2]) # scale = max(x2 - x1, y2 - y1) * 1.5 test_label = _input["roi_cls"][inst_i] kpt_3d = self.kpts_3d[test_label] # kpt_3d = self.kpts_axis_3d[test_label] kpt_2d = misc.project_pts(kpt_3d, K, rot_est, trans_est) gt_dict = self.gts[cls_name][file_name] gt_rot = gt_dict["R"] gt_trans = gt_dict["t"] kpt_2d_gt = misc.project_pts(kpt_3d, K, gt_rot, gt_trans) maxx, maxy, minx, miny = 0, 0, 1000, 1000 for i in range(len(kpt_2d)): maxx, maxy, minx, miny = ( max(maxx, kpt_2d[i][0]), max(maxy, kpt_2d[i][1]), min(minx, kpt_2d[i][0]), min(miny, kpt_2d[i][1]), ) maxx, maxy, minx, miny = ( max(maxx, kpt_2d_gt[i][0]), max(maxy, kpt_2d_gt[i][1]), min(minx, kpt_2d_gt[i][0]), min(miny, kpt_2d_gt[i][1]), ) center = np.array([(minx + maxx) / 2, (miny + maxy) / 2]) scale = max(maxx - minx, maxy - miny) + 5 out_size = 256 zoomed_im = crop_resize_by_warp_affine( im_ori, center, scale, out_size) save_path = osp.join( cfg.OUTPUT_DIR, "vis", "{}_{}_{:06d}_no_bbox.png".format( cls_name, scene_id, im_id)) mmcv.mkdir_or_exist(osp.dirname(save_path)) mmcv.imwrite(zoomed_im, save_path) # yapf: disable kpt_2d = np.array( [ [(x - (center[0] - scale / 2)) * out_size / scale, (y - (center[1] - scale / 2)) * out_size / scale] for [x, y] in kpt_2d ] ) kpt_2d_gt = np.array( [ [(x - (center[0] - scale / 2)) * out_size / scale, (y - (center[1] - scale / 2)) * out_size / scale] for [x, y] in kpt_2d_gt ] ) # yapf: enable # draw est bbox linewidth = 3 visualizer = MyVisualizer(zoomed_im[:, :, ::-1], self._metadata) # zoomed_im_vis = visualizer.draw_axis3d_and_center( # kpt_2d, linewidth=linewidth, draw_center=True # ) # visualizer.draw_bbox3d_and_center( # kpt_2d_gt, top_color=_BLUE, bottom_color=_GREY, linewidth=linewidth, draw_center=True # ) zoomed_im_vis = visualizer.draw_bbox3d_and_center( kpt_2d, top_color=_GREEN, bottom_color=_GREY, linewidth=linewidth, draw_center=True) save_path = osp.join( cfg.OUTPUT_DIR, "vis", "{}_{}_{:06d}_gt_est.png".format( cls_name, scene_id, im_id)) mmcv.mkdir_or_exist(osp.dirname(save_path)) zoomed_im_vis.save(save_path) print("zoomed_in_vis saved to:", save_path) im_vis = vis_image_bboxes_cv2(im_ori, [bbox], [f"{cls_name}_{score}"]) self.ren.clear() self.ren.draw_background( mmcv.bgr2gray(im_ori, keepdim=True)) self.ren.draw_model( self.ren_models[self.data_ref.objects.index(cls_name)], pose_est) ren_im, _ = self.ren.finish() grid_show( [ren_im[:, :, ::-1], im_vis[:, :, ::-1]], [f"ren_im_{cls_name}", f"{scene_id}/{im_id}_{score}"], row=1, col=2, ) output["time"] += time.perf_counter() - start_process_time if cls_name not in self._predictions: self._predictions[cls_name] = OrderedDict() result = { "score": score, "R": rot_est, "t": trans_est, "time": output["time"] } self._predictions[cls_name][file_name] = result
def test_vis(): dset_name = sys.argv[1] assert dset_name in DatasetCatalog.list() meta = MetadataCatalog.get(dset_name) dprint("MetadataCatalog: ", meta) objs = meta.objs t_start = time.perf_counter() dicts = DatasetCatalog.get(dset_name) logger.info("Done loading {} samples with {:.3f}s.".format( len(dicts), time.perf_counter() - t_start)) dirname = "output/{}-data-vis".format(dset_name) os.makedirs(dirname, exist_ok=True) for d in dicts: img = read_image_cv2(d["file_name"], format="BGR") depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0 imH, imW = img.shape[:2] annos = d["annotations"] masks = [ cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos ] bboxes = [anno["bbox"] for anno in annos] bbox_modes = [anno["bbox_mode"] for anno in annos] bboxes_xyxy = np.array([ BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes) ]) kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos] quats = [anno["quat"] for anno in annos] transes = [anno["trans"] for anno in annos] Rs = [quat2mat(quat) for quat in quats] # 0-based label cat_ids = [anno["category_id"] for anno in annos] K = d["cam"] kpts_2d = [ misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes) ] # # TODO: visualize pose and keypoints labels = [objs[cat_id] for cat_id in cat_ids] # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels) img_vis = vis_image_mask_bbox_cv2(img, masks, bboxes=bboxes_xyxy, labels=labels) img_vis_kpts2d = img.copy() for anno_i in range(len(annos)): img_vis_kpts2d = misc.draw_projected_box3d(img_vis_kpts2d, kpts_2d[anno_i]) grid_show( [ img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]], img_vis_kpts2d[:, :, [2, 1, 0]], depth ], [f"img:{d['file_name']}", "vis_img", "img_vis_kpts2d", "depth"], row=2, col=2, )