def visualize_predictions(self, datum, output, iteration): # FIXME: Refactor visualization code. coords = datum['coords'].numpy() batch_size = coords[:, 0].max() + 1 output_path = pathlib.Path(self.config.visualize_path) output_path.mkdir(exist_ok=True) for i in range(batch_size): # Visualize RGB input. coords_mask = coords[:, 0] == i coords_b = coords[coords_mask, 1:] if datum['input'].shape[1] > 3: rgb_b = ((datum['input'][:, :3] + 0.5) * 255).numpy() else: rgb_b = ((datum['input'].repeat(1, 3) - datum['input'].min()) / (datum['input'].max() - datum['input'].min()) * 255).numpy() ptc = np.hstack((coords_b, rgb_b)) bbox_scores = output['detection'][i][:, -1] bbox_mask = bbox_scores > self.config.visualize_min_confidence if self.dataset.IS_ROTATION_BBOX: bboxes = pc_utils.visualize_bboxes(output['detection'][i][bbox_mask][:, :7], output['detection'][i][bbox_mask][:, 7], bbox_param='xyzxyzr') else: bboxes = pc_utils.visualize_bboxes(output['detection'][i][bbox_mask][:, :6], output['detection'][i][bbox_mask][:, 6]) pc_utils.visualize_pcd(ptc, bboxes)
scene_f = SCANNET_ALIGNMENT_PATH % (filename, filename) alignment_txt = [ l for l in read_txt(scene_f) if l.startswith('axisAlignment = ') ][0] rot = np.array([float(x) for x in alignment_txt[16:].split()]).reshape(4, 4) mesh.transform(rot) pred_ours = np.load(os.path.join(OURS_PRED_PATH, 'out_%03d.npy.npz' % i)) gt = pc_utils.visualize_bboxes(pred_ours['gt'][:, :6], pred_ours['gt'][:, 6], num_points=NUM_BBOX_POINTS) pred_ours = pc_utils.visualize_bboxes(pred_ours['pred'][:, :6], pred_ours['pred'][:, 6], num_points=NUM_BBOX_POINTS) params = pc_utils.visualize_pcd(gt, mesh, save_image=f'viz_{filename}_gt.png') pc_utils.visualize_pcd(mesh, camera_params=params, save_image=f'viz_{filename}_input.png') pc_utils.visualize_pcd(pred_ours, mesh, camera_params=params, save_image=f'viz_{filename}_ours.png') pred_votenet = np.load(os.path.join(VOTENET_PRED_PATH, f'{filename}.npy'), allow_pickle=True)[0] votenet_preds = [] for pred_cls, pred_bbox, pred_score in pred_votenet: pred_bbox = pred_bbox[:, (0, 2, 1)] pred_bbox[:, -1] *= -1 votenet_preds.append(
import os import numpy as np import lib.pc_utils as pc_utils from lib.utils import read_txt OURS_PRED_PATH = 'outputs/visualization/ours_stanford' STANFORD_PATH = '/cvgl/group/Stanford3dDataset_v1.2/Area_5' NUM_BBOX_POINTS = 1000 files = sorted(read_txt('/scr/jgwak/Datasets/stanford3d/test.txt')) for i, fn in enumerate(files): area_name = os.path.splitext(fn.split(os.sep)[-1])[0] area_name = '_'.join(area_name.split('_')[1:])[:-2] # area_name = 'office_34' # i = [i for i, fn in enumerate(files) if area_name in fn][0] if area_name.startswith('WC_') or area_name.startswith('hallway_'): continue ptc_fn = os.path.join(STANFORD_PATH, area_name, f'{area_name}.txt') ptc = np.array([l.split() for l in read_txt(ptc_fn)]).astype(float) pred_ours = np.load(os.path.join(OURS_PRED_PATH, 'out_%03d.npy.npz' % i)) gt = pc_utils.visualize_bboxes(pred_ours['gt'][:, :6], pred_ours['gt'][:, 6], num_points=NUM_BBOX_POINTS) pred_ours = pc_utils.visualize_bboxes(pred_ours['pred'][:, :6], pred_ours['pred'][:, 6], num_points=NUM_BBOX_POINTS) params = pc_utils.visualize_pcd(gt, ptc, save_image=f'viz_Area5_{area_name}_gt.png') pc_utils.visualize_pcd(ptc, camera_params=params, save_image=f'viz_Area5_{area_name}_input.png') pc_utils.visualize_pcd(pred_ours, ptc, camera_params=params, save_image=f'viz_Area5_{area_name}_ours.png')
def main(): pcd = o3d.io.read_point_cloud(INPUT_PCD) pcd_xyz, pcd_feats = np.asarray(pcd.points), np.asarray(pcd.colors) print(f'Finished reading {INPUT_PCD}:') print(f'# points: {pcd_xyz.shape[0]} points') print(f'volume: {np.prod(pcd_xyz.max(0) - pcd_xyz.min(0))} m^3') sparse_voxelizer = Voxelizer(voxel_size=0.05) height = pcd_xyz[:, LOCFEAT_IDX].copy() height -= np.percentile(height, 0.99) pcd_feats = np.hstack((pcd_feats, height[:, None])) preprocess = [] for i in range(7): start = time.time() coords, feats, labels, transformation = sparse_voxelizer.voxelize( pcd_xyz, pcd_feats, None) preprocess.append(time.time() - start) print('Voxelization time average: ', np.mean(preprocess[2:])) coords = ME.utils.batched_coordinates([torch.from_numpy(coords).int()]) feats = torch.from_numpy(feats.astype(np.float32)).to('cuda') config = get_config() DatasetClass = load_dataset(config.dataset) dataloader = initialize_data_loader(DatasetClass, config, threads=config.threads, phase=config.test_phase, augment_data=False, shuffle=False, repeat=False, batch_size=config.test_batch_size, limit_numpoints=False) pipeline_model = load_pipeline(config, dataloader.dataset) if config.weights.lower() != 'none': state = torch.load(config.weights) pipeline_model.load_state_dict( state['state_dict'], strict=(not config.lenient_weight_loading)) pipeline_model.eval() evaltime = [] for i in range(7): start = time.time() sinput = ME.SparseTensor(feats, coords).to('cuda') datum = {'sinput': sinput, 'anchor_match_coords': None} outputs = pipeline_model(datum, False) evaltime.append(time.time() - start) print('Network runtime average: ', np.mean(evaltime[2:])) pred = outputs['detection'][0] pred_mask = pred[:, -1] > MIN_CONF pred = pred[pred_mask] print(f'Detected {pred.shape[0]} instances') bbox_xyz = pred[:, :6] bbox_xyz += 0.5 bbox_xyz[:, :3] += 0.5 bbox_xyz[:, 3:] -= 0.5 bbox_xyz[:, 3:] = np.maximum(bbox_xyz[:, 3:], bbox_xyz[:, :3] + 0.1) bbox_xyz = bbox_xyz.reshape(-1, 3) bbox_xyz1 = np.hstack((bbox_xyz, np.ones((bbox_xyz.shape[0], 1)))) bbox_xyz = np.linalg.solve(transformation.reshape(4, 4), bbox_xyz1.T).T[:, :3].reshape(-1, 6) pred = np.hstack((bbox_xyz, pred[:, 6:])) pred_pcd = pc_utils.visualize_bboxes(pred[:, :6], pred[:, 6], num_points=1000) mesh = o3d.io.read_triangle_mesh(INPUT_MESH) mesh.compute_vertex_normals() pc_utils.visualize_pcd(mesh, pred_pcd)