def export_gt_depths_mc(opt): split_folder = Path(opt.split) lines = readlines(split_folder / opt.base) print("Exporting ground truth depths for {}".format(opt.split)) gt_depths = [] data_path = Path(opt.data_path) # raw kitti path for line in tqdm(lines): traj_name, shader, frame = relpath_split(line) if opt.split == "mc" or "mc_lite": # 后来补充的, ground-truth 在 ‘depth_annotated_path’,结果偏高 pass gt_depth_path = data_path / traj_name / "depth" / "{:04d}.png".format( int(frame)) gt_depth = np.array(pil.open(gt_depth_path)).astype(np.float32) gt_depth = gt_depth.sum(axis=2) gt_depth = np_normalize_image(gt_depth) gt_depths.append(gt_depth.astype(np.float32)) else: print('no data set selected') return output_path = split_folder / "gt_depths.npz" print("Saving to {}".format(opt.split)) np.savez_compressed(output_path, data=np.array(gt_depths))
def main(): data_base = Path('/home/roit/datasets/kitti') in_file = Path('/home/roit/datasets/splits/kitti/eigen_std/test.txt') out_file = Path('/home/roit/datasets/splits/kitti/eigen_std/wo_test.txt') lines = readlines(in_file) discard_list = [] for item in lines: date, scene, camera, frame = relpath_split(item) reframe_forward = str(int(frame) - 1) reframe_passward = str(int(frame) + 1) path = os.path.join(date, scene, camera, 'data', "{:010d}".format(int(reframe_forward))) image_path1 = data_base / path + '.png' path = os.path.join(date, scene, camera, 'data', "{:010d}".format(int(reframe_passward))) image_path2 = data_base / path + '.png' if not Path.exists(image_path1) or not Path.exists(image_path2): discard_list.append(item) print(item) for item in discard_list: lines.remove(item) print("in file: {}".format(in_file)) print("out file: {}".format(out_file)) print("input lines:{}".format(len(lines))) print('total discard lines {}'.format(len(discard_list))) writelines(out_file, lines)
def kittiv2_test(): train_filenames = readlines( '/home/roit/datasets/splits/eigen_zhou_std/train_files.txt') train_dataset = KITTIRAWDataset( # KITTIRAWData data_path='/home/roit/datasets/kitti', filenames=train_filenames, height=192, width=640, frame_sides=[-1, 0, 1], # kitti[0,-1,1],mc[-1,0,1] num_scales=4, is_train=True, img_ext='.png') train_loader = DataLoader( # train_datasets:KITTIRAWDataset dataset=train_dataset, batch_size=16, shuffle=False, num_workers=8, pin_memory=True, drop_last=True) for data in train_loader: print('ok') print('okk')
def export_gt_depths_kitti(): parser = argparse.ArgumentParser(description='export_gt_depth') parser.add_argument('--split_dir', type=str, help='which split to export gt from', default='/home/roit/datasets/splits/eigen', choices=["eigen","eigen_zhou", "eigen_benchmark", "custom","custom_lite"]) parser.add_argument('--data_path',#2012年版本最原始的 type=str, help='path to the root of the KITTI data', default='/home/roit/datasets/kitti') parser.add_argument('--depth_annotated_path',type=str, default='/home/roit/bluep2/datasets/kitti_data_depth_annotated', help='2015年又补充的') opt = parser.parse_args() split_dir = Path(opt.split_dir) lines = readlines(split_dir/ "test_files.txt") print("Exporting ground truth depths for {}".format(opt.split_dir)) gt_depths = [] data_path = Path(opt.data_path)#raw kitti path depth_annotated_path = Path(opt.depth_annotated_path) for line in tqdm(lines): folder, frame_id, _ = line.split() frame_id = int(frame_id) if split_dir.stem == "eigen":#depth ground truth 在 场景文件夹中, 云图,full eigen split calib_dir = data_path/folder.split("/")[0] velo_filename = data_path/folder/"velodyne_points/data"/"{:010d}.bin".format(frame_id) gt_depth = generate_depth_map(calib_dir, velo_filename, 2, True) elif split_dir.stem == "eigen_benchmark":#后来补充的, ground-truth 在 ‘depth_annotated_path’,结果偏高 gt_depth_path = depth_annotated_path/folder/"proj_depth"/"groundtruth"/"image_02"/"{:010d}.png".format(frame_id) gt_depth = np.array(pil.open(gt_depth_path)).astype(np.float32) / 256 else:# opt.split =='custom':#根据png来取gt数据 gt_depth_path = depth_annotated_path/folder/"proj_depth"/"groundtruth"/"image_02"/"{:010d}.png".format(frame_id) gt_depth = np.array(pil.open(gt_depth_path)).astype(np.float32) / 256 gt_depths.append(gt_depth.astype(np.float32)) output_path = split_dir/"gt_depths2.npz" print("Saving to {}".format(split_dir)) np.savez_compressed(output_path, data=np.array(gt_depths))
def extract_vsd_img2(): dataset = Path("/home/roit/datasets/VSD") wk_root = Path('/home/roit/aws/aprojects/xdr94_mono2') root = wk_root / 'splits/visdrone/test_files.txt' img_dump = wk_root / 'visdrone_test_img' img_dump.mkdir_p() rel_paths = readlines(root) rel_paths.sort() for item in tqdm(rel_paths): seq, frame = item.split('/') img_p = dataset / seq / frame + '.jpg' out_name = item.replace('/', '_') + '.jpg' cmd = 'cp ' + img_p + ' ' + img_dump / out_name os.system(cmd)
def kitti(): dataset = Path("/media/roit/970evo/home/roit/datasets/kitti") wk_root = Path('/home/roit/aws/aprojects/xdr94_mono2') root = wk_root / 'splits/eigen/test_files.txt' out_path = wk_root / 'eigen_imgs' out_path.mkdir_p() files = readlines(root) for item in tqdm(files): dir,pre,num,lr = re.split(' |/',item) out_name = pre +'_'+ num+'_'+lr+'.png' cmd = 'cp '+ dataset/dir/pre/'image_02/data'/"{:010d}.png".format(int(num))+' '+out_path/out_name os.system(cmd) print('ok')
def extract_MC(): cp_img=False cp_gt =True dataset = Path("/home/roit/datasets/mcv5") wk_root = Path('/home/roit/aws/aprojects/xdr94_mono2') lines = '/home/roit/datasets/splits/mc/mcv5-sildurs-e-10k-12345-s/test.txt' dump_base = Path('/home/roit/bluep2/test_out/mc/apr/mcv5-sildurs-e-10k-12345-s') shader = 'sildurs-e' save_cmap='magma' dump_base.mkdir_p() img_dump = wk_root/'color' img_dump.mkdir_p() gt_dump = wk_root/'test_files' gt_dump.mkdir_p() (dump_base / 'img').mkdir_p() (dump_base / 'depth').mkdir_p() files = readlines(lines) for item in tqdm(files): if cp_img: img_p = dataset/item out_name = item.replace('/','_') cmd = 'cp '+img_p+' '+dump_base/'img'/out_name os.system(cmd) if cp_gt: gt_p = dataset / item gt_p =gt_p.replace(shader,'depth') out_name = item.replace('/', '_') cmd = 'cp ' + gt_p + ' ' + dump_base / 'depth' / out_name os.system(cmd)
def dataset_init(opts): # datasets setting #global feed_height = opts['feed_height'] feed_width = opts['feed_width'] dataset_opt = opts['dataset'] frame_sides = opts['frame_sides'] scales = opts['scales'] device = opts['device'] # local datasets_dict = {"kitti": KITTIRAWDataset, # "kitti_odom": KITTIOdomDataset, "mc": MCDataset, "custom_mono": CustomMonoDataset} if dataset_opt['type'] in datasets_dict.keys(): dataset = datasets_dict[dataset_opt['type']] # 选择建立哪个类,这里kitti,返回构造函数句柄 else: dataset = CustomMonoDataset split_path = Path(dataset_opt['split']['path']) train_path = split_path / dataset_opt['split']['train_file'] val_path = split_path / dataset_opt['split']['val_file'] data_path = Path(dataset_opt['path']) batch_size = dataset_opt['batch_size'] num_workers = dataset_opt['num_workers'] train_filenames = readlines(train_path) val_filenames = readlines(val_path) img_ext = '.png' # train loader train_dataset = dataset( # KITTIRAWData data_path=data_path, filenames=train_filenames, height=feed_height, width=feed_width, frame_sides=frame_sides, # kitti[0,-1,1],mc[-1,0,1] num_scales=len(scales), mode="train" # img_ext='.png' ) train_loader = DataLoader( # train_datasets:KITTIRAWDataset dataset=train_dataset, batch_size=batch_size*len(device), shuffle=True, num_workers=num_workers, pin_memory=True, drop_last=True ) # val loader val_dataset = dataset( data_path=data_path, filenames=val_filenames, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=len(scales), mode="val", img_ext=img_ext) val_loader = DataLoader( dataset=val_dataset, batch_size=batch_size*len(device), shuffle=False, num_workers=num_workers, pin_memory=True, drop_last=True) print("Using split:{}, {}, {}".format(split_path, dataset_opt['split']['train_file'], dataset_opt['split']['val_file'] )) print("There are {:d} training items and {:d} validation items".format( len(train_dataset), len(val_dataset))) return train_loader, val_loader
def evaluate(opts): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = opts['min_depth'] MAX_DEPTH = opts['max_depth'] data_path = opts['dataset']['path'] batch_size = opts['dataset']['batch_size'] num_workers = opts['dataset']['num_workers'] feed_height = opts['feed_height'] feed_width = opts['feed_width'] full_width = opts['dataset']['full_width'] full_height = opts['dataset']['full_height'] metric_mode = opts['metric_mode'] #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值 #但是对于 data_path = Path(opts['dataset']['path']) lines = Path(opts['dataset']['split'] ['path']) / opts['dataset']['split']['test_file'] model_path = opts['model']['load_paths'] encoder_mode = opts['model']['encoder_mode'] frame_sides = opts['frame_sides'] # frame_prior,frame_now,frame_next = opts['frame_sides'] encoder, decoder = model_init(model_path, mode=encoder_mode) file_names = readlines(lines) print('-> dataset_path:{}'.format(data_path)) print('-> model_path') for k, v in opts['model']['load_paths'].items(): print('\t' + str(v)) print("-> metrics mode: {}".format(metric_mode)) print("-> data split:{}".format(lines)) print('-> total:{}'.format(len(file_names))) if opts['dataset']['type'] == 'mc': dataset = datasets.MCDataset(data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode="test") elif opts['dataset']['type'] == 'kitti': dataset = datasets.KITTIRAWDataset( # KITTIRAWData data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode="test") dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, drop_last=False) pred_depths = [] gt_depths = [] disps = [] for data in tqdm(dataloader): image = cv2.imread('/home/roit/datasets/nyudepthv2/img/0001.jpg') image = cv2.resize(image, (384, 288)) image = np.transpose(image, [2, 0, 1]) image = torch.tensor(image).cuda() / 255. image = image.unsqueeze(0) # input_color = reframe(encoder_mode,data,frame_sides=frame_sides,key='color') # input_color = input_color.cuda() features = encoder(image) disp = decoder(*features) depth_gt = data['depth_gt'] pred_disp, pred_depth = disp_to_depth(disp, min_depth=MIN_DEPTH, max_depth=MAX_DEPTH) #pred_depth = disp2depth(disp) pred_depth = pred_depth.cpu()[:, 0].numpy() depth_gt = depth_gt.cpu()[:, 0].numpy() pred_depths.append(pred_depth) gt_depths.append(depth_gt) gt_depths = np.concatenate(gt_depths, axis=0) pred_depths = np.concatenate(pred_depths, axis=0) metrics = [] ratios = [] for gt, pred in zip(gt_depths, pred_depths): gt_height, gt_width = gt.shape[:2] pred = cv2.resize(pred, (gt_width, gt_height)) # crop # if test_dir.stem == "eigen" or test_dir.stem == 'custom':#???,可能是以前很老的 if opts['dataset']['type'] == "kitti": # ???,可能是以前很老的 mask = np.logical_and(gt > MIN_DEPTH, gt < MAX_DEPTH) crop = np.array([ 0.40810811 * gt_height, 0.99189189 * gt_height, 0.03594771 * gt_width, 0.96405229 * gt_width ]).astype(np.int32) crop_mask = np.zeros(mask.shape) crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1 mask = np.logical_and(mask, crop_mask) else: mask = np.logical_and(gt > MIN_DEPTH, gt < MAX_DEPTH) pred = pred[mask] # 并reshape成1d gt = gt[mask] ratio = np.median(gt) / np.median( pred) # 中位数, 在eval的时候, 将pred值线性变化,尽量能使与gt接近即可 ratios.append(ratio) pred *= ratio pred[pred < MIN_DEPTH] = MIN_DEPTH # 所有历史数据中最小的depth, 更新, pred[pred > MAX_DEPTH] = MAX_DEPTH # ... metric = compute_errors(gt, pred, mode=metric_mode) metrics.append(metric) metrics = np.array(metrics) mean_metrics = np.mean(metrics, axis=0) # print("\n " + ("{:>8} | " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3")) print(("&{: 8.3f} " * 7).format(*mean_metrics.tolist()) + "\\\\") ratios = np.array(ratios) median = np.median(ratios) print("\n Scaling ratios | med: {:0.3f} | std: {:0.3f}\n".format( median, np.std(ratios / median)))
def main(opt): """Evaluate odometry on the KITTI dataset """ assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) #assert opt.eval_split == "odom_9" or opt.eval_split == "odom_10", \ # "eval_split should be either odom_9 or odom_10" #sequence_id = int(opt.eval_split.split("_")[1]) #filenames = readlines( # os.path.join(os.path.dirname(__file__), "splits", "odom", # "test_files_{:02d}.txt".format(sequence_id))) # dataset = KITTIOdomDataset(opt.eval_pose_data_path, filenames, opt.height, opt.width, # [0, 1], 4, is_train=False) filenames = readlines(Path('./splits') / opt.split / 'test_files.txt') dataset = CustomMonoDataset(opt.dataset_path, filenames, opt.height, opt.width, [0, 1], 1, is_train=False) dataloader = DataLoader(dataset, opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) #model pose_encoder_path = Path(opt.load_weights_folder) / "pose_encoder.pth" pose_decoder_path = Path(opt.load_weights_folder) / "pose.pth" pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2) pose_encoder.load_state_dict(torch.load(pose_encoder_path)) pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2) pose_decoder.load_state_dict(torch.load(pose_decoder_path)) pose_encoder.cuda() pose_encoder.eval() pose_decoder.cuda() pose_decoder.eval() pred_poses = [] print("-> Computing pose predictions") opt.frame_ids = [0, 1] # pose network only takes two frames as input print("-> eval " + opt.split) for inputs in tqdm(dataloader): for key, ipt in inputs.items(): inputs[key] = ipt.cuda() all_color_aug = torch.cat( [inputs[("color_aug", i, 0)] for i in opt.frame_ids], 1) features = [pose_encoder(all_color_aug)] axisangle, translation = pose_decoder(features) pred_pose = transformation_from_parameters(axisangle[:, 0], translation[:, 0]) pred_pose = pred_pose.cpu().numpy() pred_poses.append(pred_pose) pred_poses = np.concatenate(pred_poses) length = pred_poses.shape[0] pred_poses.resize([length, 16]) pred_poses = pred_poses[:, :12] filename = opt.dump_name np.savetxt(filename, pred_poses, delimiter=' ', fmt='%1.8e') print("-> Predictions saved to", filename)
def main(opt): """Evaluate odometry on the KITTI dataset """ #assert opt.eval_split == "odom_9" or opt.eval_split == "odom_10", \ # "eval_split should be either odom_9 or odom_10" #sequence_id = int(opt.eval_split.split("_")[1]) #filenames = readlines( # os.path.join(os.path.dirname(__file__), "splits", "odom", # "test_files_{:02d}.txt".format(sequence_id))) # dataset = KITTIOdomDataset(opt.eval_pose_data_path, filenames, opt.height, opt.width, # [0, 1], 4, is_train=False) if opt.infer_file == None: filenames = readlines(Path('./splits') / opt.split / 'test_files.txt') else: filenames = readlines(Path('./splits') / opt.split / opt.infer_file) if opt.split == "custom_mono": dataset = CustomMonoDataset(opt.dataset_path, filenames, opt.height, opt.width, [0, 1], 1, is_train=False) elif opt.split == "mc": dataset = MCDataset(opt.dataset_path, filenames, opt.height, opt.width, [0, 1], 1, is_train=False) dataloader = DataLoader(dataset, opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) #model weights_pose = torch.load(opt.posenet_path) pose_net = networks.PoseNet().to(device) pose_net.load_state_dict(weights_pose['state_dict'], strict=False) pose_net.eval() pred_poses = [] print("-> Computing pose predictions") opt.frame_ids = [0, 1] # pose network only takes two frames as input print("-> eval " + opt.split) global_pose = np.identity(4) poses = [global_pose[0:3, :].reshape(1, 12)] for inputs in tqdm(dataloader): for key, ipt in inputs.items(): inputs[key] = ipt.cuda() pose = pose_net(inputs[("color_aug", 0, 0)], inputs[("color_aug", 1, 0)]) #1,6 pose_mat = pose_vec2mat(pose).squeeze(0).cpu().numpy() pose_mat = np.vstack([pose_mat, np.array([0, 0, 0, 1])]) # 4X4 global_pose = global_pose @ np.linalg.inv(pose_mat) poses.append(global_pose[0:3, :].reshape(1, 12)) poses = np.concatenate(poses, axis=0) if opt.scale_factor: poses[:, 3] *= opt.scale_factor #x-axis poses[:, 11] *= opt.scale_factor #z-axis if opt.infer_file: dump_name = Path(opt.infer_file).stem + '.txt' else: dump_name = opt.dump_name np.savetxt(dump_name, poses, delimiter=' ', fmt='%1.8e')
def main(args): """Function to predict for a single image or folder of images """ print(args.dataset_path) if torch.cuda.is_available() and not args.no_cuda: device = torch.device("cuda") else: device = torch.device("cpu") #download_model_if_doesnt_exist(args.model_path,args.model_name) model_path = Path(args.model_path) / args.model_name if not model_path.exists(): print(model_path + " does not exists") print("-> Loading model from ", model_path) encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") #1 LOADING PRETRAINED MODEL #1.1 encoder print(" Loading pretrained encoder") encoder = networks.ResnetEncoder(18, False) loaded_dict_enc = torch.load(encoder_path, map_location=device) # extract the height and width of image that this model was trained with feed_height = loaded_dict_enc['height'] feed_width = loaded_dict_enc['width'] filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict() } encoder.load_state_dict(filtered_dict_enc) encoder.to(device) encoder.eval() #1.2 decoder print(" Loading pretrained decoder") depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)) loaded_dict = torch.load(depth_decoder_path, map_location=device) depth_decoder.load_state_dict(loaded_dict) depth_decoder.to(device) depth_decoder.eval() #2. FINDING INPUT IMAGES dataset_path = Path(args.dataset_path) #files root = Path(os.path.dirname(__file__)) txt = root / 'splits' / args.split / args.txt_files print('-> inference file: ', txt) rel_paths = readlines(txt) #out if args.out_path != None: out_path = Path(args.out_path) else: out_path = Path('./' + dataset_path.stem + '_out') out_path.mkdir_p() files = [] #rel_paths 2 paths if args.split in ['custom', 'custom_lite', 'eigen', 'eigen_zhou']: #kitti for item in rel_paths: item = item.split(' ') if item[2] == 'l': camera = 'image_02' elif item[2] == 'r': camera = 'image_01' files.append(dataset_path / item[0] / camera / 'data' / "{:010d}.png".format(int(item[1]))) elif args.split == 'mc': for item in rel_paths: #item = item.split('/') files.append(item) elif args.split == 'visdrone' or 'visdrone_lite': for item in rel_paths: item = item.split('/') files.append(dataset_path / item[0] / item[1] + '.jpg') else: for item in rel_paths: item = item.split('/') files.append(dataset_path / item[0] / item[1] + '.jpg') #2.1 cnt = 0 #3. PREDICTING ON EACH IMAGE IN TURN print('\n-> inference ' + args.dataset_path) files.sort() for image_path in tqdm(files): # Load image and preprocess if args.split == 'mc': input_image = pil.open(dataset_path / image_path + '.png').convert('RGB') else: input_image = pil.open(image_path).convert('RGB') original_width, original_height = input_image.size input_image = input_image.resize((feed_width, feed_height), pil.LANCZOS) input_image = transforms.ToTensor()(input_image).unsqueeze(0) # PREDICTION input_image = input_image.to(device) #torch.Size([1, 3, 192, 640]) features = encoder(input_image) #a list from 0 to 4 outputs = depth_decoder(features) # dict , 4 disptensor cnt += 1 disp = outputs[("disp", 0)] # has a same size with input disp_resized = torch.nn.functional.interpolate( disp, (original_height, original_width), mode="bilinear", align_corners=False) # Saving numpy file #if args.out_name=='num': if args.split == 'eigen' or args.split == 'custom': output_name = str(image_path).split('/')[-4] + '_{}'.format( image_path.stem) elif args.split == 'mc': block, p, color, frame = image_path.split('/') output_name = str(image_path).replace('/', '_') + '.png' elif args.split == 'visdrone' or args.split == 'visdrone_lite': output_name = image_path.relpath(dataset_path).strip( '.jpg').replace('/', '_') pass elif args.split == 'custom_mono': output_name = image_path.relpath(dataset_path).strip( '.jpg').replace('/', '_') else: output_name = image_path.relpath(dataset_path).strip( '.jpg').replace('/', '_') if args.npy_out: name_dest_npy = os.path.join(out_path, "{}_disp.npy".format(output_name)) scaled_disp, _ = disp_to_depth(disp, 0.1, 100) np.save(name_dest_npy, scaled_disp.cpu().numpy()) # Saving colormapped depth image disp_resized_np = disp_resized.squeeze().cpu().numpy() vmax = np.percentile(disp_resized_np, 95) name_dest_im = Path(out_path) / "{}.png".format(output_name) plt.imsave(name_dest_im, disp_resized_np, cmap='magma', vmax=vmax) print(cnt) print('\n-> Done,save at ' + args.out_path)
def evaluate(opt): """Evaluate odometry on the KITTI dataset """ assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) #assert opt.eval_split == "odom_9" or opt.eval_split == "odom_10", \ # "eval_split should be either odom_9 or odom_10" sequence_id = int(opt.eval_split.split("_")[1]) filenames = readlines( os.path.join(os.path.dirname(__file__), "splits", "odom", "test_files_{:02d}.txt".format(sequence_id))) dataset = KITTIOdomDataset(opt.eval_pose_data_path, filenames, opt.height, opt.width, [0, 1], 4, is_train=False) dataloader = DataLoader(dataset, opt.eval_odom_batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) pose_encoder_path = os.path.join(opt.load_weights_folder, "pose_encoder.pth") pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth") pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2) pose_encoder.load_state_dict(torch.load(pose_encoder_path)) pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2) pose_decoder.load_state_dict(torch.load(pose_decoder_path)) pose_encoder.cuda() pose_encoder.eval() pose_decoder.cuda() pose_decoder.eval() pred_poses = [] print("-> Computing pose predictions") opt.frame_ids = [0, 1] # pose network only takes two frames as input print("-> eval " + opt.eval_split) for inputs in tqdm(dataloader): for key, ipt in inputs.items(): inputs[key] = ipt.cuda() all_color_aug = torch.cat( [inputs[("color_aug", i, 0)] for i in opt.frame_ids], 1) features = [pose_encoder(all_color_aug)] axisangle, translation = pose_decoder(features) #encoder_out = torch.onnx.export(model=pose_decoder, args=features, f="monopose_poseencoder18.onnx",verbose=True, export_params=True) pred_poses.append( transformation_from_parameters(axisangle[:, 0], translation[:, 0]).cpu().numpy()) pred_poses = np.concatenate(pred_poses) gt_poses_path = os.path.join(opt.eval_pose_data_path, "poses", "{:02d}.txt".format(sequence_id)) gt_global_poses = np.loadtxt(gt_poses_path).reshape(-1, 3, 4) gt_global_poses = np.concatenate( (gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1) gt_global_poses[:, 3, 3] = 1 gt_xyzs = gt_global_poses[:, :3, 3] print('-> step2') gt_local_poses = [] for i in tqdm(range(1, len(gt_global_poses))): gt_local_poses.append( np.linalg.inv( np.dot(np.linalg.inv(gt_global_poses[i - 1]), gt_global_poses[i]))) print('-> step3') ates = [] num_frames = gt_xyzs.shape[0] track_length = 5 for i in tqdm(range(0, num_frames - 1)): local_xyzs = np.array(dump_xyz(pred_poses[i:i + track_length - 1])) gt_local_xyzs = np.array( dump_xyz(gt_local_poses[i:i + track_length - 1])) ates.append(compute_ate(gt_local_xyzs, local_xyzs)) print("\n-> Trajectory error: {:0.3f}, std: {:0.3f}\n".format( np.mean(ates), np.std(ates))) opt.saved_npy = opt.eval_split + ".npy" save_path = os.path.join(opt.eval_pose_save_path, opt.saved_npy) np.save(save_path, pred_poses) print("-> Predictions saved to", save_path)
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值 #但是对于 if not opt.eval_mono or opt.eval_stereo: print( "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo" ) test_dir = Path(opt.test_dir) #1. load gt print('\n-> load gt:{}\n'.format(opt.test_dir)) gt_path = test_dir / "gt_depths.npz" gt_depths = np.load(gt_path, allow_pickle=True) gt_depths = gt_depths["data"] #2. load img data and predict, output is pred_disps(shape is [nums,1,w,h]) depth_eval_path = Path(opt.depth_eval_path) if not depth_eval_path.exists(): print("Cannot find a folder at {}".format(depth_eval_path)) print("-> Loading weights from {}".format(depth_eval_path)) #model loading filenames = readlines(test_dir / opt.test_files) encoder_path = depth_eval_path / "encoder.pth" decoder_path = depth_eval_path / "depth.pth" encoder_dict = torch.load(encoder_path) encoder = networks.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) model_dict = encoder.state_dict() encoder.load_state_dict( {k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() # dataloader dataset = datasets.KITTIRAWDatasetv1(opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0], 4, is_train=False) dataloader = DataLoader(dataset, batch_size=opt.eval_batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) pred_disps = [] print("\n-> Computing predictions with size {}x{}\n".format( encoder_dict['width'], encoder_dict['height'])) #prediction for data in tqdm(dataloader): input_color = data[("color", 0, 0)].cuda() # if opt.post_process: # # Post-processed results require each image to have two forward passes # input_color = torch.cat((input_color, torch.flip(input_color, [3])), 0) #eval 0 output = depth_decoder(encoder(input_color)) #eval 1 pred_disp, pred_depth_tmp = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth) pred_disp = pred_disp.cpu()[:, 0].numpy() #pred_depth = pred_depth.cpu()[:,0].numpy() # if opt.post_process: # N = pred_disp.shape[0] // 2 # pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1]) pred_disps.append(pred_disp) #endfor pred_disps = np.concatenate(pred_disps) # if opt.save_pred_disps: # output_path = depth_eval_path/ "disps_{}_split.npy".format(opt.test_dir) # print("-> Saving predicted disparities to ", output_path) # np.save(output_path, pred_disps) # if opt.no_eval: # print("-> Evaluation disabled. Done.") # quit() # elif test_dir.stem == 'benchmark': # save_dir = depth_eval_path/ "benchmark_predictions" # print("-> Saving out benchmark predictions to {}".format(save_dir)) # if not os.path.exists(save_dir): # os.makedirs(save_dir) # for idx in tqdm(range(len(pred_disps))): # disp_resized = cv2.resize(pred_disps[idx], (1216, 352)) # depth = STEREO_SCALE_FACTOR / disp_resized # depth = np.clip(depth, 0, 80) # depth = np.uint16(depth * 256) # save_path = os.path.join(save_dir, "{:010d}.png".format(idx)) # cv2.imwrite(save_path, depth) # print("-> No ground truth is available for the KITTI benchmark, so not evaluating. Done.") # quit() #3. evaluation print("-> Evaluating") # if opt.eval_stereo: # print(" Stereo evaluation - " # "disabling median scaling, scaling by {}".format(STEREO_SCALE_FACTOR)) # opt.median_scaling = False # opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR # else: # print(" Mono evaluation - using median scaling") metrics = [] ratios = [] nums_evaluate = pred_disps.shape[0] for i in tqdm(range(nums_evaluate)): gt_depth = gt_depths[i] gt_height, gt_width = gt_depth.shape[:2] pred_disp = pred_disps[i] #eval2 pred_disp = cv2.resize(pred_disp, (gt_width, gt_height)) #1271x341 t0 128x640 pred_depth = 1 / pred_disp # 也可以根据上面直接得到 #crop if test_dir.stem == "eigen" or test_dir.stem == 'custom': #???,可能是以前很老的 mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH) crop = np.array([ 0.40810811 * gt_height, 0.99189189 * gt_height, 0.03594771 * gt_width, 0.96405229 * gt_width ]).astype(np.int32) crop_mask = np.zeros(mask.shape) crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1 mask = np.logical_and(mask, crop_mask) else: mask = gt_depth > 0 #eval3 pred_depth = pred_depth[mask] #并reshape成1d gt_depth = gt_depth[mask] pred_depth *= opt.pred_depth_scale_factor #median scaling if opt.median_scaling: ratio = np.median(gt_depth) / np.median( pred_depth) #中位数, 在eval的时候, 将pred值线性变化,尽量能使与gt接近即可 ratios.append(ratio) pred_depth *= ratio pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH #所有历史数据中最小的depth, 更新, pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH #... metric = compute_errors(gt_depth, pred_depth) metrics.append(metric) metrics = np.array(metrics) #4. precess results, latex style output if opt.median_scaling: ratios = np.array(ratios) med = np.median(ratios) print("\n Scaling ratios | med: {:0.3f} | std: {:0.3f}\n".format( med, np.std(ratios / med))) mean_metrics = metrics.mean(0) print("\n " + ("{:>8} | " * 7 ).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3")) print(("&{: 8.3f} " * 7).format(*mean_metrics.tolist()) + "\\\\") print("\n-> Done!")
def evaluate(opts): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = opts['min_depth'] MAX_DEPTH = opts['max_depth'] data_path = opts['dataset']['path'] batch_size = opts['dataset']['batch_size'] num_workers = opts['dataset']['num_workers'] feed_height = opts['feed_height'] feed_width = opts['feed_width'] full_width = opts['dataset']['full_width'] full_height = opts['dataset']['full_height'] out_dir = Path(opts['out_dir']) out_dir.mkdir_p() sub_dirs = opts['sub_dirs'] for item in sub_dirs: (out_dir / item).mkdir_p() # metric_mode = opts['metric_mode'] #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值 #但是对于 data_path = Path(opts['dataset']['path']) lines = Path(opts['dataset']['split'] ['path']) / opts['dataset']['split']['test_file'] model_path = opts['model']['load_paths'] encoder_mode = opts['model']['encoder_mode'] frame_sides = opts['frame_sides'] # frame_prior,frame_now,frame_next = opts['frame_sides'] encoder, decoder = model_init(model_path, mode=encoder_mode) file_names = readlines(lines) print('-> dataset_path:{}'.format(data_path)) print('-> model_path') for k, v in opts['model']['load_paths'].items(): print('\t' + str(v)) print("-> data split:{}".format(lines)) print('-> total:{}'.format(len(file_names))) if opts['dataset']['type'] == 'mc': dataset = datasets.MCDataset(data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode="test") elif opts['dataset']['type'] == 'kitti': dataset = datasets.KITTIRAWDataset( # KITTIRAWData data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode="test") elif opts['dataset']['type'] == 'custom_mono': dataset = datasets.CustomMonoDataset(data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode='test') dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, drop_last=False) pred_depths = [] gt_depths = [] disps = [] idx = 0 for data in tqdm(dataloader): input_color = reframe(encoder_mode, data, frame_sides=frame_sides, key='color') input_color = input_color.cuda() features = encoder(input_color) disp = decoder(*features) # depth_gt = data['depth_gt'] pred_disp, pred_depth = disp_to_depth(disp, min_depth=MIN_DEPTH, max_depth=MAX_DEPTH) #pred_depth = disp2depth(disp) if "depth" in sub_dirs: pred_depth = pred_depth.cpu()[:, 0].numpy()[0] depth = cv2.resize(pred_depth, (full_width, full_height)) depth = np_normalize_image(depth) cv2.imwrite(out_dir / "depth" / file_names[idx].replace('/', '_'), depth * 255) if "disp" in sub_dirs: pred_disp = pred_disp.cpu()[:, 0].numpy()[0] disp = cv2.resize(pred_disp, (full_width, full_height)) disp = np_normalize_image(disp) cv2.imwrite(out_dir / "disp" / file_names[idx].replace('/', '_'), disp * 255) idx += 1
def prediction(opts): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = opts['min_depth'] MAX_DEPTH = opts['max_depth'] data_path = opts['dataset']['path'] batch_size = opts['dataset']['batch_size'] num_workers = opts['dataset']['num_workers'] feed_height = opts['feed_height'] feed_width = opts['feed_width'] full_width = opts['dataset']['full_width'] full_height = opts['dataset']['full_height'] metric_mode = opts['metric_mode'] framework_mode = opts['model']['mode'] #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值 #但是对于 data_path = Path(opts['dataset']['path']) lines = Path(opts['dataset']['split'] ['path']) / opts['dataset']['split']['test_file'] model_path = opts['model']['load_paths'] components = opts['model']['mode'] frame_sides = opts['frame_sides'] out_dir_base = Path(opts['out_dir_base']) # frame_prior,frame_now,frame_next = opts['frame_sides'] encoder, decoder = model_init(model_path, mode=components) file_names = readlines(lines) print('-> dataset_path:{}'.format(data_path)) print('-> model_path') for k, v in opts['model']['load_paths'].items(): print('\t' + str(v)) print("-> metrics mode: {}".format(metric_mode)) print("-> data split:{}".format(lines)) print('-> total:{}'.format(len(file_names))) file_names.sort() #prediction loader # test_files = [] # for base in file_names: # test_files.append(data_path/base) # test_files.sort() if opts['dataset']['type'] == 'mc': dataset = datasets.MCDataset(data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode="prediction") elif opts['dataset']['type'] == 'kitti': dataset = datasets.KITTIRAWDataset( # KITTIRAWData data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode="prediction") dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, drop_last=False) out_shows = [] if opts['out_dir']: out_dir = out_dir_base / opts['out_dir'] else: out_dir = out_dir_base / data_path.stem out_dir.mkdir_p() for data in tqdm(dataloader): input_color = input_frames(data, mode=framework_mode, frame_sides=frame_sides) features = encoder(input_color) disp = decoder(*features) pred_disp, pred_depth = disp_to_depth(disp, min_depth=MIN_DEPTH, max_depth=MAX_DEPTH) out_show = pred_disp out_show = out_show.cpu()[:, 0].numpy() out_shows.append(out_show) for idx, item in enumerate(out_shows): depth_name = file_names[idx].replace('/', '_').replace('.png', 'depth') idx += 1 plt.imsave(out_dir / depth_name + '{}'.format('.png'), item[0], cmap='magma')
def main_with_masks(args): """Function to predict for a single image or folder of images """ print(args.dataset_path) if torch.cuda.is_available() and not args.no_cuda: device = torch.device("cuda") else: device = torch.device("cpu") out_path = Path(args.out_path) out_path.mkdir_p() dirs = {} for mask in args.results: dirs[mask] = (out_path / mask) (out_path / mask).mkdir_p() print('-> split:{}'.format(args.split)) print('-> save to {}'.format(args.out_path)) if args.split in ['custom', 'custom_lite', 'eigen', 'eigen_zhou']: feed_height = 192 feed_width = 640 min_depth = 0.1 max_depth = 80 full_height = 375 full_width = 1242 dataset = KITTIRAWDataset elif args.split in ["visdrone", "visdrone_lite"]: feed_width = 352 feed_height = 192 min_depth = 0.1 max_depth = 255 dataset = VSDataset elif args.split in ['mc', 'mc_lite']: feed_height = 288 feed_width = 384 min_depth = 0.1 max_depth = 255 dataset = MCDataset feed_height = 192 feed_width = 640 backproject_depth = BackprojectDepth(1, feed_height, feed_width).to(device) project_3d = Project3D(1, feed_height, feed_width) photometric_error = PhotometricError() txt_files = args.txt_files #data test_path = Path(args.wk_root) / "splits" / args.split / txt_files test_filenames = readlines(test_path) if args.as_name_sort: #按照序列顺序名字排列 test_filenames.sort() #check filenames: i = 0 for i, item in enumerate(test_filenames): #item = test_filenames[i] if args.split in ['eigen', 'custom', 'custom_lite', 'eigen_zhou']: dirname, frame, lr = test_filenames[i].split() files = (Path(args.dataset_path) / dirname / 'image_02/data').files() files.sort() min = int(files[0].stem) max = int(files[-1].stem) if int(frame) + args.frame_ids[0] <= min or int( frame) + args.frame_ids[-1] >= max: test_filenames[i] = '' if args.split in ['mc', 'mc_lite']: #虽然在split的时候已经处理过了 block, trajactory, color, frame = test_filenames[i].split('/') files = (Path(args.dataset_path) / block / trajactory / color).files() files.sort() min = int(files[0].stem) max = int(files[-1].stem) if int(frame) + args.frame_ids[0] <= min or int( frame) + args.frame_ids[-1] >= max: test_filenames[i] = '' pass if args.split in ['visdrone', 'visdrone_lite']: #虽然在split的时候已经处理过了 dirname, frame = test_filenames[i].split('/') files = (Path(args.dataset_path) / dirname).files() files.sort() min = int(files[0].stem) max = int(files[-1].stem) if int(frame) + args.frame_ids[0] <= min or int( frame) + args.frame_ids[-1] >= max: test_filenames[i] = '' while '' in test_filenames: test_filenames.remove('') test_dataset = dataset( # KITTIRAWData args.dataset_path, test_filenames, feed_height, feed_width, args.frame_ids, 1, is_train=False, img_ext=args.ext) test_loader = DataLoader( # train_datasets:KITTIRAWDataset dataset=test_dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, drop_last=False) print('->items num: {}'.format(len(test_loader))) #layers #download_model_if_doesnt_exist(args.model_path,args.model_name) model_path = Path(args.model_path) / args.model_name if not model_path.exists(): print(model_path + " does not exists") print("-> Loading model from ", model_path) encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") #1 LOADING PRETRAINED MODEL #1.1 encoder print(" Loading pretrained encoder") encoder = networks.ResnetEncoder(18, False) loaded_dict_enc = torch.load(encoder_path, map_location=device) # extract the height and width of image that this model was trained with feed_height = loaded_dict_enc['height'] feed_width = loaded_dict_enc['width'] filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict() } encoder.load_state_dict(filtered_dict_enc) encoder.to(device) encoder.eval() #1.2 decoder print(" Loading pretrained decoder") depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)) loaded_dict = torch.load(depth_decoder_path, map_location=device) depth_decoder.load_state_dict(loaded_dict) depth_decoder.to(device) depth_decoder.eval() #paths pose_encoder_path = Path(model_path) / "pose_encoder.pth" pose_decoder_path = Path(model_path) / 'pose.pth' # 2.1 pose encoder print(" Loading pretrained pose encoder") pose_encoder = networks.ResnetEncoder(18, False, 2) pose_encoder.load_state_dict(torch.load(pose_encoder_path)) pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2) pose_decoder.load_state_dict(torch.load(pose_decoder_path)) pose_encoder.to(device) pose_encoder.eval() # 2.2 pose decoder print(" Loading pretrained decoder") pose_decoder = networks.PoseDecoder(num_ch_enc=pose_encoder.num_ch_enc, num_input_features=1, num_frames_to_predict_for=2) pose_loaded_dict = torch.load(pose_decoder_path, map_location=device) pose_decoder.load_state_dict(pose_loaded_dict) pose_decoder.to(device) pose_decoder.eval() source_scale = 0 scale = 0 for batch_idx, inputs in tqdm(enumerate(test_loader)): for key, ipt in inputs.items(): inputs[key] = ipt.to(device) features = encoder(inputs[("color", 0, 0)]) # a list from 0 to 4 outputs = depth_decoder(features) # dict , 4 disptensor disp = outputs[("disp", 0)] # has a same size with input #disp_resized = torch.nn.functional.interpolate(disp, (full_height, full_width), mode="bilinear", align_corners=False) _, depth = disp_to_depth(disp, min_depth, max_depth) for f_i in [args.frame_ids[0], args.frame_ids[-1]]: if f_i < 0: pose_inputs = [ inputs[("color", f_i, 0)], inputs[("color", 0, 0)] ] else: pose_inputs = [ inputs[("color", 0, 0)], inputs[("color", f_i, 0)] ] pose_inputs = torch.cat(pose_inputs, 1) features = pose_encoder(pose_inputs) axisangle, translation = pose_decoder([features]) outputs[("cam_T_cam", 0, f_i)] = transformation_from_parameters( axisangle[:, 0], translation[:, 0], invert=(f_i < 0)) # b44 T = outputs[("cam_T_cam", 0, f_i)] cam_points = backproject_depth(depth, inputs[("inv_K", 0)]) # D@K_inv pix_coords = project_3d(cam_points, inputs[("K", 0)], T) # K@D@K_inv outputs[("sample", f_i, 0)] = pix_coords # rigid_flow outputs[("color", f_i, 0)] = F.grid_sample(inputs[("color", f_i, 0)], outputs[("sample", f_i, 0)], padding_mode="border") # output"color" 就是i-warped # add a depth warp outputs[("color_identity", f_i, 0)] = inputs[("color", f_i, 0)] target = inputs[("color", 0, 0)] reprojection_losses = [] for frame_id in [args.frame_ids[0], args.frame_ids[-1]]: pred = outputs[("color", frame_id, 0)] reprojection_losses.append(photometric_error.run(pred, target)) reprojection_losses = torch.cat(reprojection_losses, 1) identity_reprojection_losses = [] for frame_id in [args.frame_ids[0], args.frame_ids[-1]]: pred = inputs[("color", frame_id, source_scale)] identity_reprojection_losses.append( photometric_error.run(pred, target)) identity_reprojection_losses = torch.cat(identity_reprojection_losses, 1) erro_maps = torch.cat( (identity_reprojection_losses, reprojection_losses), dim=1) # b4hw identical_mask = IdenticalMask(erro_maps) identical_mask = identical_mask[0].detach().cpu().numpy() save_name = test_filenames[batch_idx].replace('/', '_') save_name = save_name.replace('l', '') save_name = save_name.replace('r', '') save_name = save_name.replace(' ', '') if "identical_mask" in args.results: plt.imsave(dirs['identical_mask'] / "{}.png".format(save_name), identical_mask) if "depth" in args.results: # Saving colormapped depth image disp_np = disp[0, 0].detach().cpu().numpy() vmax = np.percentile(disp_np, 95) plt.imsave(dirs['depth'] / "{}.png".format(save_name), disp_np, cmap='magma', vmax=vmax) if "mean_mask" in args.results: mean_mask = MeanMask(erro_maps) mean_mask = mean_mask[0].detach().cpu().numpy() plt.imsave(dirs['mean_mask'] / "{}.png".format(save_name), mean_mask, cmap='bone') if "identical_mask" in args.results: identical_mask = IdenticalMask(erro_maps) identical_mask = identical_mask[0].detach().cpu().numpy() plt.imsave(dirs['identical_mask'] / "{}.png".format(save_name), identical_mask, cmap='bone') if "var_mask" in args.results: var_mask = VarMask(erro_maps) var_mask = var_mask[0].detach().cpu().numpy() plt.imsave(dirs["var_mask"] / "{}.png".format(save_name), var_mask, cmap='bone') if "final_mask" in args.results: identical_mask = IdenticalMask(erro_maps) mean_mask = MeanMask(erro_maps) var_mask = VarMask(erro_maps) final_mask = float8or(mean_mask * identical_mask, var_mask) final_mask = final_mask[0].detach().cpu().numpy() plt.imsave(dirs["final_mask"] / "{}.png".format(save_name), final_mask, cmap='bone')