def pipeline(self, params): # 将视频扩展生单帧图片 self.extract_frames(params) print_banner("Downscaling frames (raw)") # 将视频缩放成float32的基本数据 self.video.downscale_frames("color_down", params.size, "raw") # 将图片更改为png格式--生成对应的关键文件 print_banner("Downscaling frames (png)") self.video.downscale_frames("color_down_png", params.size, "png") # 将图片缩放为flow print_banner("Downscaling frames (for flow)") self.video.downscale_frames("color_flow", Flow.max_size(), "png", align=64) # 生成图片范围 frame_range = FrameRange( frame_range=params.frame_range.set, num_frames=self.video.frame_count, ) frames = frame_range.frames() # frame值的集合 # 计算初始化深度 print_banner("Compute initial depth") # 构造深度微调器 ft = DepthFineTuner(self.out_dir, frames, params) initial_depth_dir = pjoin(self.path, f"depth_{params.model_type}") # 获取深度模型路径 if not self.video.check_frames(pjoin(initial_depth_dir, "depth"), "raw"): ft.save_depth(initial_depth_dir) # 查看存在的帧文件 valid_frames = calibrate_scale(self.video, self.out_dir, frame_range, params) # frame range for finetuning: ft_frame_range = frame_range.intersection( OptionalSet(set(valid_frames))) print("Filtered out frames", sorted(set(frame_range.frames()) - set(ft_frame_range.frames()))) # 开始计算流 print_banner("Compute flow") # 查找相似帧-注意这里是随机查找,相似的一对 frame_pairs = sample_pairs(ft_frame_range, params.flow_ops) self.flow.compute_flow(frame_pairs, params.flow_checkpoint) print_banner("Compute flow masks") self.flow.mask_valid_correspondences() flow_list_path = self.flow.check_good_flow_pairs( frame_pairs, params.overlap_ratio) shutil.copyfile(flow_list_path, pjoin(self.path, "flow_list.json")) print_banner("Visualize flow") self.flow.visualize_flow(warp=True) print_banner("Fine-tuning") ft.fine_tune(writer=self.writer) print_banner("Compute final depth") if not self.video.check_frames(pjoin(ft.out_dir, "depth"), "raw", frames): ft.save_depth(ft.out_dir, frames) if params.make_video: print_banner("Export visualization videos") self.make_videos(params, ft.out_dir) return initial_depth_dir, ft.out_dir, frame_range.frames()
def extract_frames(self, params): print_banner("Extracting PTS") # 读取帧率信息 self.video.extract_pts() print_banner("Extracting frames") # 对其进行扩展 self.video.extract_frames() # 将视频扩展为帧
def calibrate_scale(video, out_dir, frame_range, args): # COLMAP reconstruction. 重键colMap print_banner("COLMAP reconstruction") colmap_dir = pjoin(video.path, 'colmap_dense') src_meta_file = pjoin(colmap_dir, "metadata.npz") # 计算colmap colmap = COLMAPProcessor(args.colmap_bin_path) dense_dir = colmap.dense_dir(colmap_dir, 0) # 确认元数据文件是否存在 if os.path.isfile(src_meta_file): print("Checked metadata file exists.") else: color_dir = prepare_colmap_color(video) # 计算col map if not colmap.check_dense( dense_dir, color_dir, valid_ratio=args.dense_frame_ratio): path_args = [color_dir, colmap_dir] mask_path = pjoin(video.path, 'colmap_mask') if os.path.isdir(mask_path): path_args.extend(['--mask_path', mask_path]) colmap_args = COLMAPParams().parse_args( args=path_args + ['--dense_max_size', str(args.size)], namespace=args) colmap.process(colmap_args) intrinsics, extrinsics = make_camera_params_from_colmap( video.path, colmap.sparse_dir(colmap_dir, 0)) np.savez(src_meta_file, intrinsics=intrinsics, extrinsics=extrinsics) # Convert COLMAP dense depth maps to .raw file format. 将深度图像转换为.raw的rgb图像 print_banner("Convert COLMAP depth maps") converted_depth_fmt = pjoin(video.path, "depth_colmap_dense", "depth", "frame_{:06d}.raw") # convert colmap dense depths to .raw 将其转换为.raw图像 converted_depth_dir = os.path.dirname(converted_depth_fmt) dense_depth_dir = pjoin(dense_dir, "stereo", "depth_maps") frames = frame_range.frames() if not check_frames( dense_depth_dir, colmap.dense_depth_suffix(), converted_depth_dir, "", frame_names={f"frame_{i:06d}.png" for i in frames}, ): os.makedirs(converted_depth_dir, exist_ok=True) colmap_depth_fmt = pjoin( dense_depth_dir, "frame_{:06d}.png" + colmap.dense_depth_suffix()) for i in frames: colmap_depth_fn = colmap_depth_fmt.format(i) if not os.path.isfile(colmap_depth_fn): logging.warning("[SCALE CALIBRATION] %s does not exist.", colmap_depth_fn) continue cmp_depth = load_colmap.read_array(colmap_depth_fn) inv_cmp_depth = 1.0 / cmp_depth ix = np.isinf(inv_cmp_depth) | (inv_cmp_depth < 0) inv_cmp_depth[ix] = float("nan") image_io.save_raw_float32_image(converted_depth_fmt.format(i), inv_cmp_depth) with SuppressedStdout(): visualization.visualize_depth_dir( converted_depth_dir, converted_depth_dir, force=True, min_percentile=0, max_percentile=99, ) # Compute scaled depth maps print_banner("Compute per-frame scales") scaled_depth_dir = pjoin(out_dir, "depth_scaled_by_colmap_dense", "depth") scaled_depth_fmt = pjoin(scaled_depth_dir, "frame_{:06d}.raw") scales_file = pjoin(out_dir, "scales.csv") src_depth_fmt = pjoin(video.path, f"depth_{args.model_type}", "depth", "frame_{:06d}.raw") frames = frame_range.frames() if (check_frames(converted_depth_dir, ".png", os.path.dirname(scaled_depth_fmt), ".raw") and os.path.isfile(scales_file)): src_to_colmap_scales = np.loadtxt(scales_file, delimiter=',') assert src_to_colmap_scales.shape[0] >= len(frames) * args.dense_frame_ratio \ and src_to_colmap_scales.shape[1] == 2, \ (f"scales shape is {src_to_colmap_scales.shape} does not match " + f"({len(frames)}, 2) with threshold {args.dense_frame_ratio}") print("Existing scales file loaded.") else: # Scale depth maps os.makedirs(scaled_depth_dir, exist_ok=True) src_to_colmap_scales_map = {} for i in frames: converted_depth_fn = converted_depth_fmt.format(i) if not os.path.isfile(converted_depth_fn): logging.warning("[SCALE CALIBRATION] %s does not exist", converted_depth_fn) continue # convert colmap_depth to raw inv_cmp_depth = image_io.load_raw_float32_image(converted_depth_fn) # compute scale for init depths inv_src_depth = image_io.load_raw_float32_image( src_depth_fmt.format(i)) # src_depth * scale = (1/inv_src_depth) * scale == cmp_depth inv_cmp_depth = cv2.resize(inv_cmp_depth, inv_src_depth.shape[:2][::-1], interpolation=cv2.INTER_NEAREST) ix = np.isfinite(inv_cmp_depth) if np.sum(ix) / ix.size < args.dense_pixel_ratio: # not enough pixels are valid and hence the frame is invalid. continue scales = (inv_src_depth / inv_cmp_depth)[ix] scale = np.median(scales) print(f"Scale[{i}]: median={scale}, std={np.std(scales)}") # scale = np.median(inv_depth) * np.median(cmp_depth) src_to_colmap_scales_map[i] = float(scale) scaled_inv_src_depth = inv_src_depth / scale image_io.save_raw_float32_image(scaled_depth_fmt.format(i), scaled_inv_src_depth) with SuppressedStdout(): visualization.visualize_depth_dir(scaled_depth_dir, scaled_depth_dir, force=True) # Write scales.csv xs = sorted(src_to_colmap_scales_map.keys()) ys = [src_to_colmap_scales_map[x] for x in xs] src_to_colmap_scales = np.stack((np.array(xs), np.array(ys)), axis=-1) np.savetxt(scales_file, src_to_colmap_scales, delimiter=",") valid_frames = {int(s) for s in src_to_colmap_scales[:, 0]} # Scale the extrinsics' translations scaled_meta_file = pjoin(out_dir, "metadata_scaled.npz") if os.path.isfile(scaled_meta_file): print("Scaled metadata file exists.") else: scales = src_to_colmap_scales[:, 1] mean_scale = scales.mean() print(f"[scales] mean={mean_scale}, std={np.std(scales)}") with np.load(src_meta_file) as meta_colmap: intrinsics = meta_colmap["intrinsics"] extrinsics = meta_colmap["extrinsics"] extrinsics[..., -1] /= mean_scale np.savez( scaled_meta_file, intrinsics=intrinsics, extrinsics=extrinsics, scales=src_to_colmap_scales, ) color_fmt = pjoin(video.path, "color_down", "frame_{:06d}.raw") vis_dir = pjoin(out_dir, "vis_calibration_dense") visualize_all_calibration( extrinsics, intrinsics, scaled_depth_fmt, color_fmt, frame_range, vis_dir, ) return valid_frames
def extract_frames(self, params): print_banner("Extracting PTS") self.video.extract_pts() print_banner("Extracting frames") self.video.extract_frames()
def extract_frames(self, params): print_banner("Extracting PTS") # print a beautiful banner self.video.extract_pts() # 从视频文件得到pts然后保存入frames.txt 文件中 print_banner("Extracting frames") self.video.extract_frames() # 从视频文件提取frame