def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 K = np.array( [[0.58, 0, 0.5, 0], [0, 1.92, 0.5, 0], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=np.float32) assert sum((opt.eval_mono, opt.eval_stereo)) == 1, \ "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo" if opt.ext_disp_to_eval is None: opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) filenames = readlines( os.path.join(splits_dir, opt.eval_split, "test_files.txt")) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) img_ext = '.png' if opt.png else '.jpg' dataset = datasets.KITTIRAWDataset(opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0], 4, is_train=False, img_ext=img_ext) dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) encoder = networks.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) model_dict = encoder.state_dict() encoder.load_state_dict( {k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() pred_disps = [] print("-> Computing predictions with size {}x{}".format( encoder_dict['width'], encoder_dict['height'])) with torch.no_grad(): for data in dataloader: input_color = data[("color", 0, 0)].cuda() if opt.post_process: # Post-processed results require each image to have two forward passes input_color = torch.cat( (input_color, torch.flip(input_color, [3])), 0) output = depth_decoder(encoder(input_color)) pred_disp, _ = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth) pred_disp = pred_disp.cpu()[:, 0].numpy() if opt.post_process: N = pred_disp.shape[0] // 2 pred_disp = batch_post_process_disparity( pred_disp[:N], pred_disp[N:, :, ::-1]) pred_disps.append(pred_disp) pred_disps = np.concatenate(pred_disps) else: # Load predictions from file print("-> Loading predictions from {}".format(opt.ext_disp_to_eval)) pred_disps = np.load(opt.ext_disp_to_eval) if opt.eval_eigen_to_benchmark: eigen_to_benchmark_ids = np.load( os.path.join(splits_dir, "benchmark", "eigen_to_benchmark_ids.npy")) pred_disps = pred_disps[eigen_to_benchmark_ids] if opt.eval_object: object_masks = [] for line in filenames: line = line.split() folder, frame_index = line[0], int(line[1]) object_mask_filename = os.path.join( os.path.dirname(__file__), "object_masks", folder, "{:010d}.npy".format(int(frame_index))) object_mask = np.load(object_mask_filename) object_masks.append(object_mask) if opt.save_pred_disps: output_path = os.path.join(opt.load_weights_folder, "disps_{}_split.npy".format(opt.eval_split)) print("-> Saving predicted disparities to ", output_path) np.save(output_path, pred_disps) if opt.no_eval: print("-> Evaluation disabled. Done.") quit() elif opt.eval_split == 'benchmark': save_dir = os.path.join(opt.load_weights_folder, "benchmark_predictions") print("-> Saving out benchmark predictions to {}".format(save_dir)) if not os.path.exists(save_dir): os.makedirs(save_dir) for idx in range(len(pred_disps)): disp_resized = cv2.resize(pred_disps[idx], (1216, 352)) depth = STEREO_SCALE_FACTOR / disp_resized depth = np.clip(depth, 0, 80) depth = np.uint16(depth * 256) save_path = os.path.join(save_dir, "{:010d}.png".format(idx)) cv2.imwrite(save_path, depth) print( "-> No ground truth is available for the KITTI benchmark, so not evaluating. Done." ) quit() gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz") gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1', allow_pickle=True)["data"] print("-> Evaluating") if opt.eval_stereo: print(" Stereo evaluation - " "disabling median scaling, scaling by {}".format( STEREO_SCALE_FACTOR)) opt.scaling = "disable" opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR else: print(" Mono evaluation - using median scaling") errors = [] ratios = [] for i in range(pred_disps.shape[0]): gt_depth = gt_depths[i] gt_height, gt_width = gt_depth.shape[:2] pred_disp = pred_disps[i] pred_disp = cv2.resize(pred_disp, (gt_width, gt_height)) pred_depth = 1 / pred_disp if opt.eval_split == "eigen": mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH) crop = np.array([ 0.40810811 * gt_height, 0.99189189 * gt_height, 0.03594771 * gt_width, 0.96405229 * gt_width ]).astype(np.int32) crop_mask = np.zeros(mask.shape) crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1 mask = np.logical_and(mask, crop_mask) if opt.eval_object: object_mask = object_masks[i].astype(np.bool) else: mask = gt_depth > 0 if opt.scaling == "gt": ratio = np.median(gt_depth[mask]) / np.median(pred_depth[mask]) if opt.eval_object: mask = np.logical_and(mask, object_mask) elif opt.scaling == "dgc": tensor_K = K.copy() tensor_K[0, :] *= gt_width tensor_K[1, :] *= gt_height tensor_K = torch.from_numpy(tensor_K).unsqueeze(0).cuda() cam_height = torch.tensor([opt.cam_height]).cuda() scale_recovery = ScaleRecovery(1, gt_height, gt_width).cuda() pred_depth = torch.from_numpy(pred_depth).unsqueeze(0).cuda() ratio = scale_recovery(pred_depth, tensor_K, cam_height).cpu().item() pred_depth = pred_depth[0].cpu().numpy() else: ratio = 1 pred_depth = pred_depth[mask] gt_depth = gt_depth[mask] pred_depth *= ratio ratios.append(ratio) pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH if len(gt_depth) != 0: errors.append(compute_errors(gt_depth, pred_depth)) ratios = np.array(ratios) med = np.median(ratios) print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format( med, np.std(ratios / med))) mean_errors = np.array(errors).mean(0) print("\n " + ("{:>8} | " * 7 ).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3")) print(("&{: 8.3f} " * 7).format(*mean_errors.tolist()) + "\\\\") print("\n-> Done!")
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 K = np.array([[0.58, 0, 0.5, 0], [0, 1.92, 0.5, 0], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=np.float32) assert sum((opt.eval_mono, opt.eval_stereo)) == 1, \ "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo" if opt.ext_disp_to_eval is None: opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) filenames = readlines(os.path.join(splits_dir, opt.eval_split, "test_files.txt")) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) dataset = datasets.KITTIRAWDataset( opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0], 4, is_train=False) dataloader = DataLoader( dataset, 16, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) encoder = networks.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) model_dict = encoder.state_dict() encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() pred_disps = [] print("-> Computing predictions with size {}x{}".format( encoder_dict['width'], encoder_dict['height'])) with torch.no_grad(): for data in dataloader: input_color = data[("color", 0, 0)].cuda() if opt.post_process: # Post-processed results require each image to have two forward passes input_color = torch.cat((input_color, torch.flip(input_color, [3])), 0) output = depth_decoder(encoder(input_color)) pred_disp, _ = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth) pred_disp = pred_disp.cpu()[:, 0].numpy() if opt.post_process: N = pred_disp.shape[0] // 2 pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1]) pred_disps.append(pred_disp) pred_disps = np.concatenate(pred_disps) else: # Load predictions from file print("-> Loading predictions from {}".format(opt.ext_disp_to_eval)) pred_disps = np.load(opt.ext_disp_to_eval) if opt.eval_eigen_to_benchmark: eigen_to_benchmark_ids = np.load( os.path.join(splits_dir, "benchmark", "eigen_to_benchmark_ids.npy")) pred_disps = pred_disps[eigen_to_benchmark_ids] if opt.eval_object: object_masks = [] for line in filenames: line = line.split() folder, frame_index = line[0], int(line[1]) object_mask_filename = os.path.join( os.path.dirname(__file__), "object_masks", folder, "{:010d}.npy".format(int(frame_index))) object_mask = np.load(object_mask_filename) object_masks.append(object_mask) if opt.save_pred_disps: output_path = os.path.join( opt.load_weights_folder, "disps_{}_split.npy".format(opt.eval_split)) print("-> Saving predicted disparities to ", output_path) np.save(output_path, pred_disps) if opt.no_eval: print("-> Evaluation disabled. Done.") quit() elif opt.eval_split == 'benchmark': save_dir = os.path.join(opt.load_weights_folder, "benchmark_predictions") print("-> Saving out benchmark predictions to {}".format(save_dir)) if not os.path.exists(save_dir): os.makedirs(save_dir) for idx in range(len(pred_disps)): disp_resized = cv2.resize(pred_disps[idx], (1216, 352)) depth = STEREO_SCALE_FACTOR / disp_resized depth = np.clip(depth, 0, 80) depth = np.uint16(depth * 256) save_path = os.path.join(save_dir, "{:010d}.png".format(idx)) cv2.imwrite(save_path, depth) print("-> No ground truth is available for the KITTI benchmark, so not evaluating. Done.") quit() gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz") gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1', allow_pickle=True)["data"] print("-> Evaluating") if opt.eval_stereo: print(" Stereo evaluation - " "disabling median scaling, scaling by {}".format(STEREO_SCALE_FACTOR)) opt.scaling = "disable" opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR else: print(" Mono evaluation - using median scaling") errors = [] ratios = [] ex_logs = [] mean_scale = [] side_map = {"2": 2, "3": 3, "l": 2, "r": 3} #resize_ori = transforms.Resize((pred_disps.shape[1],pred_disps.shape[2]),interpolation=Image.ANTIALIAS) for i in range(pred_disps.shape[0]): gt_depth = gt_depths[i] gt_height, gt_width = gt_depth.shape[:2] line = filenames[i].split() folder = line[0] frame_index = line[1] side = side_map[line[2]] color = pil_loader(get_image_path(folder,int(frame_index),side)) #color = pil_loader('/mnt/sdb/xuefeng_data/dkit_dataset/20200629_mechanical_fast/images/{:006d}.png'.format(i)) #color = color.crop((0,191,640,383)) pred_disp = pred_disps[i] pred_disp = cv2.resize(pred_disp, (gt_width, gt_height)) pred_depth = 1 / pred_disp if opt.eval_split == "eigen": mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH) crop = np.array( [0.40810811 * gt_height, 0.99189189 * gt_height, 0.03594771 * gt_width, 0.96405229 * gt_width]).astype(np.int32) crop_mask = np.zeros(mask.shape) crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1 mask = np.logical_and(mask, crop_mask) if opt.eval_object: object_mask = object_masks[i].astype(np.bool) else: mask = gt_depth > 0 if opt.scaling == "gt": ratio = np.median(gt_depth[mask]) / np.median(pred_depth[mask]) if opt.eval_object: mask = np.logical_and(mask, object_mask) elif opt.scaling == "dgc": scale_recovery = ScaleRecovery(1, gt_height, gt_width, K).cuda() #scale_recovery = ScaleRecovery(1, 192, 640, K).cuda() pred_depth = torch.from_numpy(pred_depth).unsqueeze(0).cuda() ratio1,surface_normal1,ground_mask1,cam_points1 = scale_recovery(pred_depth) #ratio = ratio1.cpu().item() surface_normal = surface_normal1.cpu()[0,0,:,:].numpy() ground_mask = ground_mask1.cpu()[0,0,:,:].numpy() pred_depth = pred_depth[0].cpu().numpy() cam_points=cam_points1.cpu().numpy() cam_points2=cam_points.transpose(1,2,0) cam_points_masked = cam_points2[np.where(ground_mask==1)] np.random.shuffle(cam_points_masked) cam_points4 = np.array(cam_points_masked) print(cam_points4.shape) cam_points4 = cam_points4[:2000,:] cam_points3 = np.concatenate((cam_points4, np.ones((cam_points4.shape[0], 1))), axis=1) print(cam_points3.shape) plane,inliers = fit_plane_LSE_RANSAC(cam_points3) #print(plane) ratio_rans = abs(1.65 / plane[-1]) else: ratio = 1 #print(ratio) #print(max(pred_depth)) #print(min(pred_depth)) pred_depth_ori = pred_depth*mask gt_depth_ori = gt_depth*mask pred_depth_ori = np.where(mask==1,pred_depth_ori,1) pred_depth = pred_depth[mask] gt_depth = gt_depth[mask] #mean_scale.append(np.mean(gt_depth/pred_depth)) ''' error_try = 100 scale_abs = 0 for ratio_try in np.arange(0.1,50,step=0.1): pred_depth1=pred_depth * ratio_try error_tmp = compute_errors(gt_depth, pred_depth1)[0] #print(error_tmp) if error_tmp < error_try: error_try = error_tmp scale_abs = ratio_try div_scale = gt_depth_ori / pred_depth_ori #print(div_scale.shape) div_values1 = div_scale[mask] div_scale = (div_scale-scale_abs)/scale_abs div_values = div_scale[mask] div_rmse = sqrt(sum((div_values1-scale_abs)*(div_values1-scale_abs))/len(div_values1)) print(min(div_values),max(div_values)) ex_logs.append([i,min(div_values), max(div_values), div_rmse,scale_abs]) #print(div_scale.shape) #div_scale = div_scale/np.max(div_scale) mu = np.mean(div_values1) sigma = np.std(div_values1) print(min(div_values1),max(div_values1)) fig,ax=plt.subplots() n, bins, patches = ax.hist(div_values1,150,range=(3,130),density = True) y = norm.pdf(bins, mu, 0.8*sigma) ax.plot(bins, y, 'r') plt.xlabel('Scale') plt.ylabel('Density') plt.savefig(os.path.join(os.path.dirname(__file__), "hist_imgs2","{:010d}.jpg".format(i))) plt.close() #blend_img = blending_imgs(div_scale, color,i) #blend_img.save(os.path.join(os.path.dirname(__file__), "blend_imgs","{:010d}.jpg".format(i))) blending_imgs(surface_normal,color,i,'surface_normals') blending_imgs(ground_mask,color,i,'ground_masks') ''' blending_imgs(ground_mask,color,i,ground_mask) pred_depth *= ratio_rans ratios.append(ratio_rans) pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH #blending_imgs(div_scale, color,i,mask) if len(gt_depth) != 0: errors.append(compute_errors(gt_depth, pred_depth)) ''' fl = open('ex.txt','w') fl.writelines(str(ex_logs)) fl.close() ''' #np.save('mean_scale.npy', mean_scale) ratios = np.array(ratios) med = np.median(ratios) print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(med, np.std(ratios / med))) mean_errors = np.array(errors).mean(0) print("\n " + ("{:>8} | " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3")) print(("&{: 8.3f} " * 7).format(*mean_errors.tolist()) + "\\\\") print("\n-> Done!")
def evaluate(opt): """Evaluate odometry on the KITTI dataset """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 K = np.array( [[0.58, 0, 0.5, 0], [0, 1.92, 0.5, 0], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=np.float32) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) assert opt.eval_split == "odom_9" or opt.eval_split == "odom_10" or opt.eval_split == "odom_0", \ "eval_split should be either odom_9 or odom_10" sequence_id = int(opt.eval_split.split("_")[1]) filenames = readlines( os.path.join(os.path.dirname(__file__), "splits", "odom", "test_files_{:02d}.txt".format(sequence_id))) dataset = KITTIOdomDataset(opt.data_path, filenames, opt.height, opt.width, [0, 1], 4, is_train=False) dataloader = DataLoader(dataset, opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) pose_encoder_path = os.path.join(opt.load_weights_folder, "pose_encoder.pth") pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth") depth_encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") depth_decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2) pose_encoder.load_state_dict(torch.load(pose_encoder_path)) depth_encoder = networks.ResnetEncoder(opt.num_layers, False) depth_encoder_dict = torch.load(depth_encoder_path) model_dict = depth_encoder.state_dict() depth_encoder.load_state_dict( {k: v for k, v in depth_encoder_dict.items() if k in model_dict}) pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2) pose_decoder.load_state_dict(torch.load(pose_decoder_path)) depth_decoder = networks.DepthDecoder(depth_encoder.num_ch_enc) depth_decoder.load_state_dict(torch.load(depth_decoder_path)) pose_encoder.cuda() pose_encoder.eval() pose_decoder.cuda() pose_decoder.eval() depth_encoder.cuda() depth_encoder.eval() depth_decoder.cuda() depth_decoder.eval() pred_poses = [] pred_disps = [] print("-> Computing pose predictions") opt.frame_ids = [0, 1] # pose network only takes two frames as input with torch.no_grad(): for inputs in dataloader: input_color = inputs[("color", 0, 0)].cuda() depth_output = depth_decoder(depth_encoder(input_color)) pred_disp, _ = disp_to_depth(depth_output[("disp", 0)], opt.min_depth, opt.max_depth) pred_disp = pred_disp.cpu()[:, 0].numpy() pred_disps.append(pred_disp) for key, ipt in inputs.items(): inputs[key] = ipt.cuda() all_color_aug = torch.cat( [inputs[("color_aug", i, 0)] for i in opt.frame_ids], 1) features = [pose_encoder(all_color_aug)] axisangle, translation = pose_decoder(features) pred_poses.append( transformation_from_parameters(axisangle[:, 0], translation[:, 0]).cpu().numpy()) pred_poses = np.concatenate(pred_poses) pred_disps = np.concatenate(pred_disps) pred_poses_scaled = [] ratios_d = [] gt_norms_div = [] gt_norms = [] pred_norms = [] td_divs_dgc = [] poses_pred = [] for i in range(pred_poses.shape[0]): pred_pose = pred_poses[i] pred_disp = pred_disps[i + 1] pred_depth = 1 / pred_disp scale_recovery = ScaleRecovery(1, 192, 640, K).cuda() pred_depth = torch.from_numpy(pred_depth).unsqueeze(0).cuda() ratio = scale_recovery(pred_depth).cpu().item() pred_pose_scaled = pred_pose[:3, 3] * ratio poses_pred.append(pred_pose[:3, 3]) pred_poses_scaled.append(pred_pose_scaled) ratios_d.append(ratio) gt_poses_path = os.path.join(opt.data_path, "poses", "{:02d}.txt".format(sequence_id)) gt_global_poses = np.loadtxt(gt_poses_path).reshape(-1, 3, 4) gt_global_poses = np.concatenate( (gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1) gt_global_poses[:, 3, 3] = 1 gt_xyzs = gt_global_poses[:, :3, 3] gt_local_poses = [] for i in range(1, len(gt_global_poses)): gt_local_poses.append( np.linalg.inv( np.dot(np.linalg.inv(gt_global_poses[i - 1]), gt_global_poses[i]))) ates = [] num_frames = gt_xyzs.shape[0] track_length = 5 for i in range(0, num_frames - 1): local_xyzs = np.array( dump_xyz(pred_poses_scaled[i:i + track_length - 1])) gt_local_xyzs = np.array( dump_xyz(gt_local_poses[i:i + track_length - 1])) gt_norm_div = np.linalg.norm(gt_local_xyzs) / np.linalg.norm( local_xyzs) ates.append(compute_ate(gt_local_xyzs, local_xyzs)) gt_norms_div.append(gt_norm_div) gt_norms.append(np.linalg.norm(gt_local_xyzs)) print("\n Trajectory error: {:0.3f}, std: {:0.3f}\n".format( np.mean(ates), np.std(ates))) save_path = os.path.join(os.path.dirname(__file__), "poses_scaled{:02d}.npy".format(sequence_id)) np.save(save_path, pred_poses) save_path = os.path.join(os.path.dirname(__file__), "poses_gt{:02d}.npy".format(sequence_id)) np.save(save_path, pred_poses) save_path = os.path.join(os.path.dirname(__file__), "poses_pred{:02d}.npy".format(sequence_id)) np.save(save_path, gt_xyzs) save_path = os.path.join(os.path.dirname(__file__), "gt_norms{:02d}.npy".format(sequence_id)) np.save(save_path, gt_norms) save_path = os.path.join(os.path.dirname(__file__), "gt_norms_div{:02d}.npy".format(sequence_id)) np.save(save_path, gt_norms_div) save_path = os.path.join(os.path.dirname(__file__), "ratios_d{:02d}.npy".format(sequence_id)) np.save(save_path, ratios_d) save_path = os.path.join(os.path.dirname(__file__), "pred_norms{:02d}.npy".format(sequence_id)) np.save(save_path, pred_norms) print("-> Predictions saved to", save_path)