# Load models of objects that appear in the current scene obj_ids = set([gt['obj_id'] for gts in scene_gt.values() for gt in gts]) models = {} for obj_id in obj_ids: models[obj_id] = inout.load_ply(par['model_mpath'].format(obj_id)) # Visualize GT poses in the selected images im_ids_curr = sorted(scene_info.keys()) if im_ids: im_ids_curr = set(im_ids_curr).intersection(im_ids) for im_id in im_ids_curr: print('scene: {}, im: {}'.format(scene_id, im_id)) # Load the images rgb = inout.read_im(par['test_rgb_mpath'].format(scene_id, im_id)) depth = inout.read_depth(par['test_depth_mpath'].format( scene_id, im_id)) depth = depth.astype(np.float) # [mm] depth *= par['cam']['depth_scale'] # to [mm] # Render the objects at the ground truth poses im_size = (depth.shape[1], depth.shape[0]) ren_rgb = np.zeros(rgb.shape, np.float) ren_depth = np.zeros(depth.shape, np.float) gt_ids_curr = range(len(scene_gt[im_id])) if gt_ids: gt_ids_curr = set(gt_ids_curr).intersection(gt_ids) for gt_id in gt_ids_curr: gt = scene_gt[im_id][gt_id] model = models[gt['obj_id']]
for res_path in res_paths: #t = time.time() # Parse image ID and object ID from the file name filename = os.path.basename(res_path).split('.')[0] im_id_prev = im_id im_id, obj_id = map(int, filename.split('_')) print('Calculating {} error - scene: {}, im: {}, obj: {}'.format( error_type, scene_id, im_id, obj_id)) # Load depth image if VSD is selected if error_type == 'vsd': if im_id != im_id_prev: depth_path = dp['test_depth_mpath'].format(scene_id, im_id) depth_im = inout.read_depth(depth_path) depth_im *= dp['cam']['depth_scale'] # to [mm] # Load camera matrix if error_type in ['vsd', 'cou']: K = scene_info[im_id]['cam_K'] # Load pose estimates ests = inout.load_poses(res_path) # Sort the estimates by score (in descending order) ests_sorted = sorted(enumerate(ests), key=lambda x: x[1]['score'], reverse=True) # Consider only the top N estimated poses
obj_id = scene_id # The object id is the same as scene id for this dataset model = inout.load_ply(model_mpath.format(obj_id)) # Transformation which was applied to the object models (its inverse will # be applied to the GT poses): # 1) Translate the bounding box center to the origin t_model = bbox_cens[obj_id - 1, :].reshape((3, 1)) im_id_out = 0 for im_id in im_ids: # if im_id % 10 == 0: print('scene,view: ' + str(scene_id) + ',' + str(im_id)) # Load the RGB and depth image rgb = inout.read_im(rgb_in_mpath.format(scene_id, im_id)) depth = inout.read_depth(depth_in_mpath.format(scene_id, im_id)) depth *= 10.0 # Convert depth map to [100um] # Save the RGB and depth image inout.write_im(rgb_out_mpath.format(scene_id, im_id_out), rgb) inout.write_depth(depth_out_mpath.format(scene_id, im_id_out), depth) scene_info[im_id_out] = { 'cam_K': par['cam']['K'].flatten().tolist() } # Process the GT poses poses = load_tejani_poses(pose_mpath.format(scene_id, im_id)) scene_gt[im_id_out] = [] for pose in poses: