def get_model_poses(self, scene_id, ann_id): ''' **Input:** - scene_id: int of the scen index. - ann_id: int of the annotation index. **Output:** - obj_list: list of int of object index. - pose_list: list of 4x4 matrices of object poses. - camera_pose: 4x4 matrix of the camera pose relative to the first frame. - align mat: 4x4 matrix of camera relative to the table. ''' scene_dir = os.path.join(self.root, 'scenes') camera_poses_path = os.path.join(self.root, 'scenes', get_scene_name(scene_id), self.camera, 'camera_poses.npy') camera_poses = np.load(camera_poses_path) camera_pose = camera_poses[ann_id] align_mat_path = os.path.join(self.root, 'scenes', get_scene_name(scene_id), self.camera, 'cam0_wrt_table.npy') align_mat = np.load(align_mat_path) # print('Scene {}, {}'.format(scene_id, camera)) scene_reader = xmlReader( os.path.join(scene_dir, get_scene_name(scene_id), self.camera, 'annotations', '%04d.xml' % (ann_id, ))) posevectors = scene_reader.getposevectorlist() obj_list = [] pose_list = [] for posevector in posevectors: obj_idx, mat = parse_posevector(posevector) obj_list.append(obj_idx) pose_list.append(mat) return obj_list, pose_list, camera_pose, align_mat
def continuous_eval_scene(self, scene_id, annid_offset, views, grasp_group, init=False, test_time=False): scene_id = get_sceneId4det(scene_id) eval_dir = os.path.join(self.root, 'scenes', get_scene_name(scene_id), self.camera, 'eval', self.offset(annid_offset)) if not os.path.exists(eval_dir): os.mkdir(eval_dir) eval_path = os.path.join(eval_dir, self.views2filename(views)) if not os.path.exists(eval_path): scene_accuracy = self.eval_scene(scene_id, grasp_group=grasp_group)[0] log_str( f'Scene: {scene_id:04} +{annid_offset:02} ({eval_path[-20:]}) Accuracy: {np.mean(scene_accuracy):.2f} {views}' ) np.save(eval_path, scene_accuracy) # elif test_time: # scene_accuracy = self.eval_scene(scene_id, grasp_group=grasp_group)[0] if init: return not os.path.exists(eval_path) else: scene_accuracy = np.load(eval_path) return scene_accuracy
def get_scene_models(self, scene_id, ann_id): ''' return models in model coordinate ''' model_dir = os.path.join(self.root, 'models') # print('Scene {}, {}'.format(scene_id, camera)) scene_reader = xmlReader( os.path.join(self.root, 'scenes', get_scene_name(scene_id), self.camera, 'annotations', '%04d.xml' % (ann_id, ))) posevectors = scene_reader.getposevectorlist() obj_list = [] model_list = [] dexmodel_list = [] for posevector in posevectors: obj_idx, _ = parse_posevector(posevector) obj_list.append(obj_idx) for obj_idx in obj_list: model = o3d.io.read_point_cloud( os.path.join(model_dir, '%03d' % obj_idx, 'nontextured.ply')) dex_cache_path = os.path.join(self.root, 'dex_models', '%03d.pkl' % obj_idx) if os.path.exists(dex_cache_path): with open(dex_cache_path, 'rb') as f: dexmodel = pickle.load(f) else: dexmodel = load_dexnet_model( os.path.join(model_dir, '%03d' % obj_idx, 'textured')) points = np.array(model.points) model_list.append(points) dexmodel_list.append(dexmodel) return model_list, dexmodel_list, obj_list
def get_grasp_group(self, scene_id, ann_id): # print(ann_id) # import ipdb; ipdb.set_trace() camera_poses_path = os.path.join(self.dataset_root, 'scenes', get_scene_name(scene_id), self.camera, 'camera_poses.npy') if self.quick: # use cached grasp files npy_file_path = os.path.join(self.dump_dir, get_scene_name(scene_id), self.camera, '%04d.npy' % (ann_id, )) gg = AroundViewGraspGroup().from_npy(npy_file_path, camera_poses_path) else: data_idx = scene_id * 256 + ann_id data = self.dataset.__getitem__(data_idx) data['point_clouds'] = to_var(data['point_clouds']).unsqueeze(0) data['cloud_colors'] = to_var(data['cloud_colors']).unsqueeze(0) # Forward pass with torch.no_grad(): end_points = self.net(data) grasp_preds = pred_decode(end_points) preds = grasp_preds[0].detach().cpu().numpy() gg = GraspGroup(preds) # collision detection if self.collision_thresh > 0: cloud, _ = self.dataset.get_data(data_idx, return_raw_cloud=True) mfcdetector = ModelFreeCollisionDetector( cloud, voxel_size=self.voxel_size) collision_mask = mfcdetector.detect( gg, approach_dist=0.05, collision_thresh=self.collision_thresh) gg = gg[~collision_mask] gg = AroundViewGraspGroup(np.array([ann_id] * len(gg)), np.load(camera_poses_path), gg.grasp_group_array) return gg
def eval_scene(self, scene_id, dump_folder, TOP_K=50, return_list=False, vis=False, max_width=0.1): ''' **Input:** - scene_id: int of the scene index. - dump_folder: string of the folder that saves the dumped npy files. - TOP_K: int of the top number of grasp to evaluate - return_list: bool of whether to return the result list. - vis: bool of whether to show the result - max_width: float of the maximum gripper width in evaluation **Output:** - scene_accuracy: np.array of shape (256, 50, 6) of the accuracy tensor. ''' config = get_config() table = create_table_points(1.0, 1.0, 0.05, dx=-0.5, dy=-0.5, dz=-0.05, grid_size=0.008) list_coe_of_friction = [0.2, 0.4, 0.6, 0.8, 1.0, 1.2] model_list, dexmodel_list, _ = self.get_scene_models(scene_id, ann_id=0) model_sampled_list = list() for model in model_list: model_sampled = voxel_sample_points(model, 0.008) model_sampled_list.append(model_sampled) scene_accuracy = [] grasp_list_list = [] score_list_list = [] collision_list_list = [] # let ann_id == 0, and transform it to all ann_ids camera_poses_path = os.path.join(self.root, 'scenes', get_scene_name(scene_id), self.camera, 'camera_poses.npy') grasp_group_0 = AroundViewGraspGroup().from_npy( os.path.join(dump_folder, get_scene_name(scene_id), self.camera, '%04d.npy' % (0, )), camera_poses_path) # grasp_group_0 = grasp_group_0[:2] for ann_id in range(0, 10): grasp_group = grasp_group_0.to_view(ann_id) _, pose_list, camera_pose, align_mat = self.get_model_poses( scene_id, ann_id) table_trans = transform_points( table, np.linalg.inv(np.matmul(align_mat, camera_pose))) # clip width to [0,max_width] gg_array = grasp_group.grasp_group_array min_width_mask = (gg_array[:, 1] < 0) max_width_mask = (gg_array[:, 1] > max_width) gg_array[min_width_mask, 1] = 0 gg_array[max_width_mask, 1] = max_width grasp_group.grasp_group_array = gg_array grasp_list, score_list, collision_mask_list = eval_grasp( grasp_group, model_sampled_list, dexmodel_list, pose_list, config, table=table_trans, voxel_size=0.008, TOP_K=TOP_K) # remove empty grasp_list = [x for x in grasp_list if len(x) != 0] score_list = [x for x in score_list if len(x) != 0] collision_mask_list = [ x for x in collision_mask_list if len(x) != 0 ] if len(grasp_list) == 0: grasp_accuracy = np.zeros((TOP_K, len(list_coe_of_friction))) scene_accuracy.append(grasp_accuracy) grasp_list_list.append([]) score_list_list.append([]) collision_list_list.append([]) print('\rMean Accuracy for scene:{} ann:{}='.format( scene_id, ann_id), np.mean(grasp_accuracy[:, :]), end='') continue # concat into scene level grasp_list, score_list, collision_mask_list = np.concatenate( grasp_list), np.concatenate(score_list), np.concatenate( collision_mask_list) if vis: t = o3d.geometry.PointCloud() t.points = o3d.utility.Vector3dVector(table_trans) model_list = generate_scene_model(self.root, 'scene_%04d' % scene_id, ann_id, return_poses=False, align=False, camera=self.camera) import copy gg = AroundViewGraspGroup(copy.deepcopy(grasp_list)) scores = np.array(score_list) scores = scores / 2 + 0.5 # -1 -> 0, 0 -> 0.5, 1 -> 1 scores[collision_mask_list] = 0.3 gg.scores = scores gg.widths = 0.1 * np.ones((len(gg)), dtype=np.float32) grasps_geometry = gg.to_open3d_geometry_list() pcd = self.loadScenePointCloud(scene_id, self.camera, ann_id) o3d.visualization.draw_geometries([pcd, *grasps_geometry]) # o3d.visualization.draw_geometries([pcd, *grasps_geometry, *model_list]) # o3d.visualization.draw_geometries([*grasps_geometry, *model_list, t]) # sort in scene level grasp_confidence = grasp_list[:, 0] indices = np.argsort(-grasp_confidence) grasp_list, score_list, collision_mask_list = grasp_list[ indices], score_list[indices], collision_mask_list[indices] grasp_list_list.append(grasp_list) score_list_list.append(score_list) collision_list_list.append(collision_mask_list) #calculate AP grasp_accuracy = np.zeros((TOP_K, len(list_coe_of_friction))) for fric_idx, fric in enumerate(list_coe_of_friction): for k in range(0, TOP_K): if k + 1 > len(score_list): grasp_accuracy[k, fric_idx] = np.sum( ((score_list <= fric) & (score_list > 0)).astype(int)) / (k + 1) else: grasp_accuracy[k, fric_idx] = np.sum( ((score_list[0:k + 1] <= fric) & (score_list[0:k + 1] > 0)).astype(int)) / (k + 1) print('\rMean Accuracy for scene:%04d ann:%04d = %.3f' % (scene_id, ann_id, 100.0 * np.mean(grasp_accuracy[:, :])), end='', flush=True) import ipdb ipdb.set_trace() scene_accuracy.append(grasp_accuracy) if not return_list: return scene_accuracy else: return scene_accuracy, grasp_list_list, score_list_list, collision_list_list