예제 #1
0
    def pipeline(self, params):

        # 将视频扩展生单帧图片
        self.extract_frames(params)

        print_banner("Downscaling frames (raw)")
        # 将视频缩放成float32的基本数据
        self.video.downscale_frames("color_down", params.size, "raw")
        # 将图片更改为png格式--生成对应的关键文件
        print_banner("Downscaling frames (png)")
        self.video.downscale_frames("color_down_png", params.size, "png")
        # 将图片缩放为flow
        print_banner("Downscaling frames (for flow)")
        self.video.downscale_frames("color_flow",
                                    Flow.max_size(),
                                    "png",
                                    align=64)
        # 生成图片范围
        frame_range = FrameRange(
            frame_range=params.frame_range.set,
            num_frames=self.video.frame_count,
        )
        frames = frame_range.frames()  # frame值的集合
        # 计算初始化深度
        print_banner("Compute initial depth")
        # 构造深度微调器
        ft = DepthFineTuner(self.out_dir, frames, params)
        initial_depth_dir = pjoin(self.path,
                                  f"depth_{params.model_type}")  # 获取深度模型路径
        if not self.video.check_frames(pjoin(initial_depth_dir, "depth"),
                                       "raw"):
            ft.save_depth(initial_depth_dir)
        # 查看存在的帧文件
        valid_frames = calibrate_scale(self.video, self.out_dir, frame_range,
                                       params)
        # frame range for finetuning:
        ft_frame_range = frame_range.intersection(
            OptionalSet(set(valid_frames)))
        print("Filtered out frames",
              sorted(set(frame_range.frames()) - set(ft_frame_range.frames())))
        # 开始计算流
        print_banner("Compute flow")
        # 查找相似帧-注意这里是随机查找,相似的一对
        frame_pairs = sample_pairs(ft_frame_range, params.flow_ops)
        self.flow.compute_flow(frame_pairs, params.flow_checkpoint)

        print_banner("Compute flow masks")

        self.flow.mask_valid_correspondences()

        flow_list_path = self.flow.check_good_flow_pairs(
            frame_pairs, params.overlap_ratio)
        shutil.copyfile(flow_list_path, pjoin(self.path, "flow_list.json"))

        print_banner("Visualize flow")

        self.flow.visualize_flow(warp=True)

        print_banner("Fine-tuning")

        ft.fine_tune(writer=self.writer)

        print_banner("Compute final depth")

        if not self.video.check_frames(pjoin(ft.out_dir, "depth"), "raw",
                                       frames):
            ft.save_depth(ft.out_dir, frames)

        if params.make_video:
            print_banner("Export visualization videos")
            self.make_videos(params, ft.out_dir)

        return initial_depth_dir, ft.out_dir, frame_range.frames()
예제 #2
0
    def extract_frames(self, params):
        print_banner("Extracting PTS")  # 读取帧率信息
        self.video.extract_pts()

        print_banner("Extracting frames")  # 对其进行扩展
        self.video.extract_frames()  # 将视频扩展为帧
def calibrate_scale(video, out_dir, frame_range, args):
    # COLMAP reconstruction. 重键colMap
    print_banner("COLMAP reconstruction")

    colmap_dir = pjoin(video.path, 'colmap_dense')
    src_meta_file = pjoin(colmap_dir, "metadata.npz")
    # 计算colmap
    colmap = COLMAPProcessor(args.colmap_bin_path)
    dense_dir = colmap.dense_dir(colmap_dir, 0)
    # 确认元数据文件是否存在
    if os.path.isfile(src_meta_file):
        print("Checked metadata file exists.")
    else:
        color_dir = prepare_colmap_color(video)  # 计算col map

        if not colmap.check_dense(
                dense_dir, color_dir, valid_ratio=args.dense_frame_ratio):
            path_args = [color_dir, colmap_dir]
            mask_path = pjoin(video.path, 'colmap_mask')
            if os.path.isdir(mask_path):
                path_args.extend(['--mask_path', mask_path])
            colmap_args = COLMAPParams().parse_args(
                args=path_args +
                ['--dense_max_size', str(args.size)],
                namespace=args)

            colmap.process(colmap_args)

        intrinsics, extrinsics = make_camera_params_from_colmap(
            video.path, colmap.sparse_dir(colmap_dir, 0))
        np.savez(src_meta_file, intrinsics=intrinsics, extrinsics=extrinsics)

    # Convert COLMAP dense depth maps to .raw file format. 将深度图像转换为.raw的rgb图像
    print_banner("Convert COLMAP depth maps")

    converted_depth_fmt = pjoin(video.path, "depth_colmap_dense", "depth",
                                "frame_{:06d}.raw")

    # convert colmap dense depths to .raw 将其转换为.raw图像
    converted_depth_dir = os.path.dirname(converted_depth_fmt)
    dense_depth_dir = pjoin(dense_dir, "stereo", "depth_maps")
    frames = frame_range.frames()
    if not check_frames(
            dense_depth_dir,
            colmap.dense_depth_suffix(),
            converted_depth_dir,
            "",
            frame_names={f"frame_{i:06d}.png"
                         for i in frames},
    ):
        os.makedirs(converted_depth_dir, exist_ok=True)
        colmap_depth_fmt = pjoin(
            dense_depth_dir, "frame_{:06d}.png" + colmap.dense_depth_suffix())
        for i in frames:
            colmap_depth_fn = colmap_depth_fmt.format(i)
            if not os.path.isfile(colmap_depth_fn):
                logging.warning("[SCALE CALIBRATION] %s does not exist.",
                                colmap_depth_fn)
                continue
            cmp_depth = load_colmap.read_array(colmap_depth_fn)
            inv_cmp_depth = 1.0 / cmp_depth
            ix = np.isinf(inv_cmp_depth) | (inv_cmp_depth < 0)
            inv_cmp_depth[ix] = float("nan")
            image_io.save_raw_float32_image(converted_depth_fmt.format(i),
                                            inv_cmp_depth)
        with SuppressedStdout():
            visualization.visualize_depth_dir(
                converted_depth_dir,
                converted_depth_dir,
                force=True,
                min_percentile=0,
                max_percentile=99,
            )

    # Compute scaled depth maps
    print_banner("Compute per-frame scales")

    scaled_depth_dir = pjoin(out_dir, "depth_scaled_by_colmap_dense", "depth")
    scaled_depth_fmt = pjoin(scaled_depth_dir, "frame_{:06d}.raw")
    scales_file = pjoin(out_dir, "scales.csv")
    src_depth_fmt = pjoin(video.path, f"depth_{args.model_type}", "depth",
                          "frame_{:06d}.raw")
    frames = frame_range.frames()

    if (check_frames(converted_depth_dir, ".png",
                     os.path.dirname(scaled_depth_fmt), ".raw")
            and os.path.isfile(scales_file)):
        src_to_colmap_scales = np.loadtxt(scales_file, delimiter=',')
        assert src_to_colmap_scales.shape[0] >= len(frames) * args.dense_frame_ratio \
            and src_to_colmap_scales.shape[1] == 2, \
            (f"scales shape is {src_to_colmap_scales.shape} does not match "
             + f"({len(frames)}, 2) with threshold {args.dense_frame_ratio}")
        print("Existing scales file loaded.")
    else:
        # Scale depth maps
        os.makedirs(scaled_depth_dir, exist_ok=True)
        src_to_colmap_scales_map = {}

        for i in frames:
            converted_depth_fn = converted_depth_fmt.format(i)
            if not os.path.isfile(converted_depth_fn):
                logging.warning("[SCALE CALIBRATION] %s does not exist",
                                converted_depth_fn)
                continue
            # convert colmap_depth to raw
            inv_cmp_depth = image_io.load_raw_float32_image(converted_depth_fn)
            # compute scale for init depths
            inv_src_depth = image_io.load_raw_float32_image(
                src_depth_fmt.format(i))
            # src_depth * scale = (1/inv_src_depth) * scale == cmp_depth
            inv_cmp_depth = cv2.resize(inv_cmp_depth,
                                       inv_src_depth.shape[:2][::-1],
                                       interpolation=cv2.INTER_NEAREST)
            ix = np.isfinite(inv_cmp_depth)

            if np.sum(ix) / ix.size < args.dense_pixel_ratio:
                # not enough pixels are valid and hence the frame is invalid.
                continue

            scales = (inv_src_depth / inv_cmp_depth)[ix]
            scale = np.median(scales)
            print(f"Scale[{i}]: median={scale}, std={np.std(scales)}")
            # scale = np.median(inv_depth) * np.median(cmp_depth)
            src_to_colmap_scales_map[i] = float(scale)
            scaled_inv_src_depth = inv_src_depth / scale
            image_io.save_raw_float32_image(scaled_depth_fmt.format(i),
                                            scaled_inv_src_depth)
        with SuppressedStdout():
            visualization.visualize_depth_dir(scaled_depth_dir,
                                              scaled_depth_dir,
                                              force=True)

        # Write scales.csv
        xs = sorted(src_to_colmap_scales_map.keys())
        ys = [src_to_colmap_scales_map[x] for x in xs]
        src_to_colmap_scales = np.stack((np.array(xs), np.array(ys)), axis=-1)
        np.savetxt(scales_file, src_to_colmap_scales, delimiter=",")

    valid_frames = {int(s) for s in src_to_colmap_scales[:, 0]}

    # Scale the extrinsics' translations
    scaled_meta_file = pjoin(out_dir, "metadata_scaled.npz")
    if os.path.isfile(scaled_meta_file):
        print("Scaled metadata file exists.")
    else:
        scales = src_to_colmap_scales[:, 1]
        mean_scale = scales.mean()
        print(f"[scales] mean={mean_scale}, std={np.std(scales)}")

        with np.load(src_meta_file) as meta_colmap:
            intrinsics = meta_colmap["intrinsics"]
            extrinsics = meta_colmap["extrinsics"]

        extrinsics[..., -1] /= mean_scale
        np.savez(
            scaled_meta_file,
            intrinsics=intrinsics,
            extrinsics=extrinsics,
            scales=src_to_colmap_scales,
        )

        color_fmt = pjoin(video.path, "color_down", "frame_{:06d}.raw")
        vis_dir = pjoin(out_dir, "vis_calibration_dense")
        visualize_all_calibration(
            extrinsics,
            intrinsics,
            scaled_depth_fmt,
            color_fmt,
            frame_range,
            vis_dir,
        )

    return valid_frames
예제 #4
0
    def extract_frames(self, params):
        print_banner("Extracting PTS")
        self.video.extract_pts()

        print_banner("Extracting frames")
        self.video.extract_frames()
예제 #5
0
    def extract_frames(self, params):
        print_banner("Extracting PTS")  # print a beautiful banner
        self.video.extract_pts()  # 从视频文件得到pts然后保存入frames.txt 文件中

        print_banner("Extracting frames")
        self.video.extract_frames()  # 从视频文件提取frame