def forward(self, point_clouds, target_transl, target_rot, transl_err, rot_err): """ Points Distance Error Args: point_cloud: list of B Point Clouds, each in the relative GT frame transl_err: network estimate of the translations rot_err: network estimate of the rotations Returns: The mean distance between 3D points """ #start = time.time() total_loss = torch.tensor([0.0]).to(transl_err.device) for i in range(len(point_clouds)): point_cloud_gt = point_clouds[i].to(transl_err.device) point_cloud_out = point_clouds[i].clone() R_target = quat2mat(target_rot[i]) T_target = tvector2mat(target_transl[i]) RT_target = torch.mm(T_target, R_target) R_predicted = quat2mat(rot_err[i]) T_predicted = tvector2mat(transl_err[i]) RT_predicted = torch.mm(T_predicted, R_predicted) RT_total = torch.mm(RT_target.inverse(), RT_predicted) point_cloud_out = rotate_forward(point_cloud_out, RT_total) error = (point_cloud_out - point_cloud_gt).norm(dim=0) error.clamp(100.) total_loss += error.mean() #end = time.time() #print("3D Distance Time: ", end-start) return total_loss/target_transl.shape[0]
def main(_config, seed): global EPOCH, weights if _config['weight'] is not None: weights = _config['weight'] dataset_class = DatasetVisibilityKittiSingle img_shape = (384, 1280) split = 'test' if _config['random_initial_pose']: split = 'test_random' maps_folder = 'local_maps' if _config['maps_folder'] is not None: maps_folder = _config['maps_folder'] if _config['test_sequence'] is None: raise TypeError('test_sequences cannot be None') else: if isinstance(_config['test_sequence'], int): _config['test_sequence'] = f"{_config['test_sequence']:02d}" dataset_val = dataset_class(_config['data_folder'], max_r=_config['max_r'], max_t=_config['max_t'], split=split, use_reflectance=_config['use_reflectance'], maps_folder=maps_folder, test_sequence=_config['test_sequence']) np.random.seed(seed) torch.random.manual_seed(seed) def init_fn(x): return _init_fn(x, seed) num_worker = 6 batch_size = 1 TestImgLoader = torch.utils.data.DataLoader(dataset=dataset_val, shuffle=False, batch_size=batch_size, num_workers=num_worker, worker_init_fn=init_fn, collate_fn=merge_inputs, drop_last=False, pin_memory=False) print(len(TestImgLoader)) models = [] for i in range(len(weights)): if _config['network'].startswith('PWC'): feat = 1 md = 4 split = _config['network'].split('_') for item in split[1:]: if item.startswith('f'): feat = int(item[-1]) elif item.startswith('md'): md = int(item[2:]) assert 0 < feat < 7, "Feature Number from PWC have to be between 1 and 6" assert 0 < md, "md must be positive" model = CMRNet(img_shape, use_feat_from=feat, md=md, use_reflectance=_config['use_reflectance']) else: raise TypeError("Network unknown") checkpoint = torch.load(weights[i], map_location='cpu') saved_state_dict = checkpoint['state_dict'] model.load_state_dict(saved_state_dict) model = model.to(device) model.eval() models.append(model) if i == 0: _config['occlusion_threshold'] = checkpoint['config'][ 'occlusion_threshold'] _config['occlusion_kernel'] = checkpoint['config'][ 'occlusion_kernel'] else: assert _config['occlusion_threshold'] == checkpoint['config'][ 'occlusion_threshold'] assert _config['occlusion_kernel'] == checkpoint['config'][ 'occlusion_kernel'] if _config['save_log']: log_file = f'./results_for_paper/log_seq{_config["test_sequence"]}.csv' log_file = open(log_file, 'w') log_file = csv.writer(log_file) header = ['frame'] for i in range(len(weights) + 1): header += [ f'iter{i}_error_t', f'iter{i}_error_r', f'iter{i}_error_x', f'iter{i}_error_y', f'iter{i}_error_z', f'iter{i}_error_r', f'iter{i}_error_p', f'iter{i}_error_y' ] log_file.writerow(header) show = _config['show'] show = True errors_r = [] errors_t = [] errors_t2 = [] errors_rpy = [] all_RTs = [] prev_tr_error = None prev_rot_error = None for i in range(len(weights) + 1): errors_r.append([]) errors_t.append([]) errors_t2.append([]) errors_rpy.append([]) for batch_idx, sample in enumerate(TestImgLoader): log_string = [str(batch_idx)] lidar_input = [] rgb_input = [] shape_pad = [0, 0, 0, 0] if batch_idx == 0 or not _config['use_prev_output']: # Qui dare posizione di input del frame corrente rispetto alla GT sample['tr_error'] = sample['tr_error'].cuda() sample['rot_error'] = sample['rot_error'].cuda() else: sample['tr_error'] = prev_tr_error sample['rot_error'] = prev_rot_error for idx in range(len(sample['rgb'])): real_shape = [ sample['rgb'][idx].shape[1], sample['rgb'][idx].shape[2], sample['rgb'][idx].shape[0] ] # ProjectPointCloud in RT-pose sample['point_cloud'][idx] = sample['point_cloud'][idx].cuda() pc_rotated = sample['point_cloud'][idx].clone() reflectance = None if _config['use_reflectance']: reflectance = sample['reflectance'][idx].cuda() R = mathutils.Quaternion(sample['rot_error'][idx]) T = mathutils.Vector(sample['tr_error'][idx]) pc_rotated = rotate_back(pc_rotated, R, T) cam_params = sample['calib'][idx].cuda() cam_model = CameraModel() cam_model.focal_length = cam_params[:2] cam_model.principal_point = cam_params[2:] uv, depth, points, refl = cam_model.project_pytorch( pc_rotated, real_shape, reflectance) uv = uv.t().int() depth_img = torch.zeros(real_shape[:2], device='cuda', dtype=torch.float) depth_img += 1000. depth_img = visibility.depth_image(uv, depth, depth_img, uv.shape[0], real_shape[1], real_shape[0]) depth_img[depth_img == 1000.] = 0. projected_points = torch.zeros_like(depth_img, device='cuda') projected_points = visibility.visibility2( depth_img, cam_params, projected_points, depth_img.shape[1], depth_img.shape[0], _config['occlusion_threshold'], _config['occlusion_kernel']) if _config['use_reflectance']: uv = uv.long() indexes = projected_points[uv[:, 1], uv[:, 0]] == depth refl_img = torch.zeros(real_shape[:2], device='cuda', dtype=torch.float) refl_img[uv[indexes, 1], uv[indexes, 0]] = refl[0, indexes] projected_points /= 100. if not _config['use_reflectance']: projected_points = projected_points.unsqueeze(0) else: projected_points = torch.stack((projected_points, refl_img)) rgb = sample['rgb'][idx].cuda() shape_pad[3] = (img_shape[0] - rgb.shape[1]) shape_pad[1] = (img_shape[1] - rgb.shape[2]) rgb = F.pad(rgb, shape_pad) projected_points = F.pad(projected_points, shape_pad) rgb_input.append(rgb) lidar_input.append(projected_points) lidar_input = torch.stack(lidar_input) rgb_input = torch.stack(rgb_input) if show: out0 = overlay_imgs(rgb, lidar_input) cv2.imshow("INPUT", out0[:, :, [2, 1, 0]]) cv2.waitKey(1) pc_GT = sample['point_cloud'][idx].clone() uv, depth, _, refl = cam_model.project_pytorch(pc_GT, real_shape) uv = uv.t().int() depth_img = torch.zeros(real_shape[:2], device='cuda', dtype=torch.float) depth_img += 1000. depth_img = visibility.depth_image(uv, depth, depth_img, uv.shape[0], real_shape[1], real_shape[0]) depth_img[depth_img == 1000.] = 0. projected_points = torch.zeros_like(depth_img, device='cuda') projected_points = visibility.visibility2( depth_img, cam_params, projected_points, depth_img.shape[1], depth_img.shape[0], _config['occlusion_threshold'], _config['occlusion_kernel']) projected_points /= 100. projected_points = F.pad(projected_points, shape_pad) lidar_GT = projected_points.unsqueeze(0).unsqueeze(0) out1 = overlay_imgs(rgb_input[0], lidar_GT) cv2.imshow("GT", out1[:, :, [2, 1, 0]]) # plt.figure() # plt.imshow(out1) # if batch_idx == 0: # # import ipdb; ipdb.set_trace() # out2 = overlay_imgs(sample['rgb'][0], lidar_input[:,:,:,1241]) # plt.figure() # plt.imshow(out2) # io.imshow(lidar_input[0][0].cpu().numpy(), cmap='jet') # io.show() rgb = rgb_input.to(device) lidar = lidar_input.to(device) target_transl = sample['tr_error'].to(device) target_rot = sample['rot_error'].to(device) point_cloud = sample['point_cloud'][0].to(device) reflectance = None if _config['use_reflectance']: reflectance = sample['reflectance'][0].to(device) camera_model = cam_model R = quat2mat(target_rot[0]) T = tvector2mat(target_transl[0]) RT1_inv = torch.mm(T, R) RT1 = RT1_inv.clone().inverse() rotated_point_cloud = rotate_forward(point_cloud, RT1) RTs = [RT1] T_composed = RT1[:3, 3] R_composed = quaternion_from_matrix(RT1) errors_t[0].append(T_composed.norm().item()) errors_t2[0].append(T_composed) errors_r[0].append( quaternion_distance( R_composed.unsqueeze(0), torch.tensor([1., 0., 0., 0.], device=R_composed.device).unsqueeze(0), R_composed.device)) # rpy_error = quaternion_to_tait_bryan(R_composed) rpy_error = mat2xyzrpy(RT1)[3:] rpy_error *= (180.0 / 3.141592) errors_rpy[0].append(rpy_error) log_string += [ str(errors_t[0][-1]), str(errors_r[0][-1]), str(errors_t2[0][-1][0].item()), str(errors_t2[0][-1][1].item()), str(errors_t2[0][-1][2].item()), str(errors_rpy[0][-1][0].item()), str(errors_rpy[0][-1][1].item()), str(errors_rpy[0][-1][2].item()) ] if batch_idx == 0.: print(f'Initial T_erorr: {errors_t[0]}') print(f'Initial R_erorr: {errors_r[0]}') start = 0 # Run model with torch.no_grad(): for iteration in range(start, len(weights)): # Run the i-th network T_predicted, R_predicted = models[iteration](rgb, lidar) if _config['rot_transl_separated'] and iteration == 0: T_predicted = torch.tensor([[0., 0., 0.]], device='cuda') if _config['rot_transl_separated'] and iteration == 1: R_predicted = torch.tensor([[1., 0., 0., 0.]], device='cuda') # Project the points in the new pose predicted by the i-th network R_predicted = quat2mat(R_predicted[0]) T_predicted = tvector2mat(T_predicted[0]) RT_predicted = torch.mm(T_predicted, R_predicted) RTs.append(torch.mm(RTs[iteration], RT_predicted)) rotated_point_cloud = rotate_forward(rotated_point_cloud, RT_predicted) uv2, depth2, _, refl = camera_model.project_pytorch( rotated_point_cloud, real_shape, reflectance) uv2 = uv2.t().int() depth_img2 = torch.zeros(real_shape[:2], device=device) depth_img2 += 1000. depth_img2 = visibility.depth_image(uv2, depth2, depth_img2, uv2.shape[0], real_shape[1], real_shape[0]) depth_img2[depth_img2 == 1000.] = 0. out_cuda2 = torch.zeros_like(depth_img2, device=device) out_cuda2 = visibility.visibility2( depth_img2, cam_params, out_cuda2, depth_img2.shape[1], depth_img2.shape[0], _config['occlusion_threshold'], _config['occlusion_kernel']) if _config['use_reflectance']: uv = uv.long() indexes = projected_points[uv[:, 1], uv[:, 0]] == depth refl_img = torch.zeros(real_shape[:2], device='cuda', dtype=torch.float) refl_img[uv[indexes, 1], uv[indexes, 0]] = refl[0, indexes] refl_img = F.pad(refl_img, shape_pad) out_cuda2 = F.pad(out_cuda2, shape_pad) lidar = out_cuda2.clone() lidar /= 100. if not _config['use_reflectance']: lidar = lidar.unsqueeze(0) else: lidar = torch.stack((lidar, refl_img)) lidar = lidar.unsqueeze(0) if show: out3 = overlay_imgs(rgb[0], lidar, idx=batch_idx) cv2.imshow(f'Iter_{iteration}', out3[:, :, [2, 1, 0]]) cv2.waitKey(1) # if iter == 1: # plt.figure() # plt.imshow(out3) # io.imshow(lidar.cpu().numpy()[0,0], cmap='jet') # io.show() T_composed = RTs[iteration + 1][:3, 3] R_composed = quaternion_from_matrix(RTs[iteration + 1]) errors_t[iteration + 1].append(T_composed.norm().item()) errors_t2[iteration + 1].append(T_composed) errors_r[iteration + 1].append( quaternion_distance( R_composed.unsqueeze(0), torch.tensor([1., 0., 0., 0.], device=R_composed.device).unsqueeze(0), R_composed.device)) # rpy_error = quaternion_to_tait_bryan(R_composed) rpy_error = mat2xyzrpy(RTs[iteration + 1])[3:] rpy_error *= (180.0 / 3.141592) errors_rpy[iteration + 1].append(rpy_error) log_string += [ str(errors_t[iteration + 1][-1]), str(errors_r[iteration + 1][-1]), str(errors_t2[iteration + 1][-1][0].item()), str(errors_t2[iteration + 1][-1][1].item()), str(errors_t2[iteration + 1][-1][2].item()), str(errors_rpy[iteration + 1][-1][0].item()), str(errors_rpy[iteration + 1][-1][1].item()), str(errors_rpy[iteration + 1][-1][2].item()) ] all_RTs.append(RTs[-1]) prev_RT = RTs[-1].inverse() prev_tr_error = prev_RT[:3, 3].unsqueeze(0) prev_rot_error = quaternion_from_matrix(prev_RT).unsqueeze(0) # Qui prev_rt è quanto si discosta l'output della rete rispetto alla GT if _config['save_log']: log_file.writerow(log_string) if _config['save_log']: log_file.close() print("Iterative refinement: ") for i in range(len(weights) + 1): errors_r[i] = torch.tensor(errors_r[i]) * (180.0 / 3.141592) errors_t[i] = torch.tensor(errors_t[i]) * 100 print( f"Iteration {i}: \tMean Translation Error: {errors_t[i].mean():.4f} cm " f" Mean Rotation Error: {errors_r[i].mean():.4f} °") print( f"Iteration {i}: \tMedian Translation Error: {errors_t[i].median():.4f} cm " f" Median Rotation Error: {errors_r[i].median():.4f} °\n") print("-------------------------------------------------------") print("Timings:") for i in range(len(errors_t2)): errors_t2[i] = torch.stack(errors_t2[i]) errors_rpy[i] = torch.stack(errors_rpy[i]) plt.plot(errors_t2[-1][:, 0].cpu().numpy()) plt.show() plt.plot(errors_t2[-1][:, 1].cpu().numpy()) plt.show() plt.plot(errors_t2[-1][:, 2].cpu().numpy()) plt.show() if _config["save_name"] is not None: torch.save( torch.stack(errors_t).cpu().numpy(), f'./results_for_paper/{_config["save_name"]}_errors_t') torch.save( torch.stack(errors_r).cpu().numpy(), f'./results_for_paper/{_config["save_name"]}_errors_r') torch.save( torch.stack(errors_t2).cpu().numpy(), f'./results_for_paper/{_config["save_name"]}_errors_t2') torch.save( torch.stack(errors_rpy).cpu().numpy(), f'./results_for_paper/{_config["save_name"]}_errors_rpy') print("End!")
def __getitem__(self, idx): item = self.all_files[idx] run = str(item.split('/')[0]) timestamp = str(item.split('/')[1]) img_path = os.path.join(self.root_dir, run, 'image_2', timestamp + '.png') pc_path = os.path.join(self.root_dir, run, self.maps_folder, timestamp + '.h5') try: with h5py.File(pc_path, 'r') as hf: pc = hf['PC'][:] if self.use_reflectance: reflectance = hf['intensity'][:] reflectance = torch.from_numpy(reflectance).float() except Exception as e: print(f'File Broken: {pc_path}') raise e pc_in = torch.from_numpy(pc.astype(np.float32)) #.float() if pc_in.shape[1] == 4 or pc_in.shape[1] == 3: pc_in = pc_in.t() if pc_in.shape[0] == 3: homogeneous = torch.ones(pc_in.shape[1]).unsqueeze(0) pc_in = torch.cat((pc_in, homogeneous), 0) elif pc_in.shape[0] == 4: if not torch.all(pc_in[3, :] == 1.): pc_in[3, :] = 1. else: raise TypeError("Wrong PointCloud shape") h_mirror = False if np.random.rand() > 0.5 and self.split == 'train': h_mirror = True pc_in[1, :] *= -1 img = Image.open(img_path) img_rotation = 0. if self.split == 'train': img_rotation = np.random.uniform(-5, 5) try: img = self.custom_transform(img, img_rotation, h_mirror) except OSError: new_idx = np.random.randint(0, self.__len__()) return self.__getitem__(new_idx) # Rotate PointCloud for img_rotation if self.split == 'train': R = mathutils.Euler((radians(img_rotation), 0, 0), 'XYZ') T = mathutils.Vector((0., 0., 0.)) pc_in = rotate_forward(pc_in, R, T) if self.split != 'test': max_angle = self.max_r rotz = np.random.uniform(-max_angle, max_angle) * (3.141592 / 180.0) roty = np.random.uniform(-max_angle, max_angle) * (3.141592 / 180.0) rotx = np.random.uniform(-max_angle, max_angle) * (3.141592 / 180.0) transl_x = np.random.uniform(-self.max_t, self.max_t) transl_y = np.random.uniform(-self.max_t, self.max_t) transl_z = np.random.uniform(-self.max_t, min(self.max_t, 1.)) else: initial_RT = self.test_RT[idx] rotz = initial_RT[6] roty = initial_RT[5] rotx = initial_RT[4] transl_x = initial_RT[1] transl_y = initial_RT[2] transl_z = initial_RT[3] R = mathutils.Euler((rotx, roty, rotz), 'XYZ') T = mathutils.Vector((transl_x, transl_y, transl_z)) R, T = invert_pose(R, T) R, T = torch.tensor(R), torch.tensor(T) #io.imshow(depth_img.numpy(), cmap='jet') #io.show() calib = get_calib_kitti(int(run)) if h_mirror: calib[2] = (img.shape[2] / 2) * 2 - calib[2] if not self.use_reflectance: sample = { 'rgb': img, 'point_cloud': pc_in, 'calib': calib, 'tr_error': T, 'rot_error': R, 'idx': int(run), 'rgb_name': timestamp } else: sample = { 'rgb': img, 'point_cloud': pc_in, 'reflectance': reflectance, 'calib': calib, 'tr_error': T, 'rot_error': R, 'idx': int(run), 'rgb_name': timestamp } return sample