def process(args):
    N = len(args.im1)
    assert N == len(args.im2) and N == len(args.out)

    device = torch.device("cuda:0")
    Flownet = FlowNet2(args)
    print(f"Loading pretrained model from '{args.pretrained_model_flownet2}'.")
    flownet2_ckpt = torch.load(args.pretrained_model_flownet2)
    Flownet.load_state_dict(flownet2_ckpt["state_dict"])
    Flownet.to(device)
    Flownet.eval()

    for im1, im2, out in zip(args.im1, args.im2, args.out):
        if os.path.isfile(out):
            continue

        flow = infer(args, Flownet, device, im1, im2)
        if args.size:
            flow = resize_flow(flow, args.size)
        print(flow.shape)

        os.makedirs(os.path.dirname(out), exist_ok=True)
        save_raw_float32_image(out, flow)

        if args.visualize:
            vis = flow_to_image(flow)
            cv2.imwrite(os.path.splitext(out)[0] + ".png", vis)
예제 #2
0
    def downscale_frames(self,
                         subdir,
                         max_size,
                         ext,
                         align=16,
                         full_subdir="color_full"):
        full_dir = pjoin(self.path, full_subdir)
        down_dir = pjoin(self.path, subdir)

        mkdir_ifnotexists(down_dir)

        if self.check_frames(down_dir, ext):
            # Frames are already extracted and checked OK.
            return

        for i in range(self.frame_count):
            full_file = "%s/frame_%06d.png" % (full_dir, i)
            down_file = ("%s/frame_%06d." + ext) % (down_dir, i)
            suppress_messages = (i > 0)
            image = image_io.load_image(full_file,
                                        max_size=max_size,
                                        align=align,
                                        suppress_messages=suppress_messages)
            image = image[..., ::-1]  # Channel swizzle

            if ext == "raw":
                image_io.save_raw_float32_image(down_file, image)
            else:
                cv2.imwrite(down_file, image * 255)

        self.check_frames(down_dir, ext)
예제 #3
0
    def save_depth(self, dir: str = None, frames=None):
        if dir is None:
            dir = self.out_dir
        if frames is None:
            frames = self.frames

        color_fmt = pjoin(self.base_dir, "color_down", "frame_{:06d}.raw")
        depth_dir = pjoin(dir, "depth")
        depth_fmt = pjoin(depth_dir, "frame_{:06d}")

        dataset = VideoFrameDataset(color_fmt, frames)
        data_loader = DataLoader(dataset,
                                 batch_size=1,
                                 shuffle=False,
                                 num_workers=4)

        torch.backends.cudnn.enabled = True
        torch.backends.cudnn.benchmark = True

        self.model.eval()

        os.makedirs(depth_dir, exist_ok=True)
        for data in data_loader:
            data = to_device(data)
            stacked_images, metadata = data
            frame_id = metadata["frame_id"][0]

            depth = self.model.forward(stacked_images, metadata)

            depth = depth.detach().cpu().numpy().squeeze()
            inv_depth = 1.0 / depth

            image_io.save_raw_float32_image(
                depth_fmt.format(frame_id) + ".raw", inv_depth)

        with SuppressedStdout():
            visualization.visualize_depth_dir(depth_dir, depth_dir, force=True)
예제 #4
0
    def eval_and_save(self, criterion, data_loader, suf) -> Dict[str, torch.Tensor]:
        """
        Note this function asssumes the structure of the data produced by data_loader
        """
        N = len(data_loader.dataset)
        loss_dict = {}
        saved_frames = set()
        total_index = 0
        max_frame_index = 0
        all_pairs = []

        for _, data in zip(range(N), data_loader):
            data = to_device(data)
            stacked_img, metadata = data

            with torch.no_grad():
                depth = self.model(stacked_img, metadata)

            batch_indices = (
                metadata["geometry_consistency"]["indices"].cpu().numpy().tolist()
            )

            # Update the maximum frame index and pairs list.
            max_frame_index = max(max_frame_index, max(itertools.chain(*batch_indices)))
            all_pairs += batch_indices

            # Compute and store losses.
            _, loss_meta = criterion(
                depth, metadata, parameters=self.model.parameters(),
            )

            for loss_name, losses in loss_meta.items():
                if loss_name not in loss_dict:
                    loss_dict[loss_name] = {}

                for indices, loss in zip(batch_indices, losses):
                    loss_dict[loss_name][str(indices)] = loss.item()

            # Save depth maps.
            inv_depths_batch = 1.0 / depth.cpu().detach().numpy()
            if self.vis_depth_scale is None:
                # Single scale for the whole dataset.
                self.vis_depth_scale = inv_depths_batch.max()

            for inv_depths, indices in zip(inv_depths_batch, batch_indices):
                for inv_depth, index in zip(inv_depths, indices):
                    # Only save frames not saved before.
                    if index in saved_frames:
                        continue
                    saved_frames.add(index)

                    fn_pre = pjoin(
                        self.out_dir, "eval", "depth_{:06d}{}".format(index, suf)
                    )
                    image_io.save_raw_float32_image(fn_pre + ".raw", inv_depth)

                    inv_depth_vis = visualization.visualize_depth(
                        inv_depth, depth_min=0, depth_max=self.vis_depth_scale
                    )
                    cv2.imwrite(fn_pre + ".png", inv_depth_vis)
                total_index += 1

        loss_meta = {
            loss_name: torch.tensor(tuple(loss.values()))
            for loss_name, loss in loss_dict.items()
        }
        loss_dict["mean"] = {k: v.mean().item() for k, v in loss_meta.items()}

        with open(pjoin(self.out_dir, "eval", "loss{}.json".format(suf)), "w") as f:
            json.dump(loss_dict, f)

        # Print verbose summary to stdout.
        index_width = int(math.ceil(math.log10(max_frame_index)))
        loss_names = list(loss_dict.keys())
        loss_names.remove("mean")
        loss_format = {}
        for name in loss_names:
            max_value = max(loss_dict[name].values())
            width = math.ceil(math.log10(max_value))
            loss_format[name] = f"{width+7}.6f"

        for pair in sorted(all_pairs):
            line = f"({pair[0]:{index_width}d}, {pair[1]:{index_width}d}): "
            line += ", ".join(
                [f"{name}: {loss_dict[name][str(pair)]:{loss_format[name]}}"
                for name in loss_names]
            )
            print(line)

        print("Mean: " + " " * (2 * index_width) + ", ".join(
            [f"{name}: {loss_dict[name][str(pair)]:{loss_format[name]}}"
            for name in loss_names]
        ))

        return loss_meta
def calibrate_scale(video, out_dir, frame_range, args):
    # COLMAP reconstruction. 重键colMap
    print_banner("COLMAP reconstruction")

    colmap_dir = pjoin(video.path, 'colmap_dense')
    src_meta_file = pjoin(colmap_dir, "metadata.npz")
    # 计算colmap
    colmap = COLMAPProcessor(args.colmap_bin_path)
    dense_dir = colmap.dense_dir(colmap_dir, 0)
    # 确认元数据文件是否存在
    if os.path.isfile(src_meta_file):
        print("Checked metadata file exists.")
    else:
        color_dir = prepare_colmap_color(video)  # 计算col map

        if not colmap.check_dense(
                dense_dir, color_dir, valid_ratio=args.dense_frame_ratio):
            path_args = [color_dir, colmap_dir]
            mask_path = pjoin(video.path, 'colmap_mask')
            if os.path.isdir(mask_path):
                path_args.extend(['--mask_path', mask_path])
            colmap_args = COLMAPParams().parse_args(
                args=path_args +
                ['--dense_max_size', str(args.size)],
                namespace=args)

            colmap.process(colmap_args)

        intrinsics, extrinsics = make_camera_params_from_colmap(
            video.path, colmap.sparse_dir(colmap_dir, 0))
        np.savez(src_meta_file, intrinsics=intrinsics, extrinsics=extrinsics)

    # Convert COLMAP dense depth maps to .raw file format. 将深度图像转换为.raw的rgb图像
    print_banner("Convert COLMAP depth maps")

    converted_depth_fmt = pjoin(video.path, "depth_colmap_dense", "depth",
                                "frame_{:06d}.raw")

    # convert colmap dense depths to .raw 将其转换为.raw图像
    converted_depth_dir = os.path.dirname(converted_depth_fmt)
    dense_depth_dir = pjoin(dense_dir, "stereo", "depth_maps")
    frames = frame_range.frames()
    if not check_frames(
            dense_depth_dir,
            colmap.dense_depth_suffix(),
            converted_depth_dir,
            "",
            frame_names={f"frame_{i:06d}.png"
                         for i in frames},
    ):
        os.makedirs(converted_depth_dir, exist_ok=True)
        colmap_depth_fmt = pjoin(
            dense_depth_dir, "frame_{:06d}.png" + colmap.dense_depth_suffix())
        for i in frames:
            colmap_depth_fn = colmap_depth_fmt.format(i)
            if not os.path.isfile(colmap_depth_fn):
                logging.warning("[SCALE CALIBRATION] %s does not exist.",
                                colmap_depth_fn)
                continue
            cmp_depth = load_colmap.read_array(colmap_depth_fn)
            inv_cmp_depth = 1.0 / cmp_depth
            ix = np.isinf(inv_cmp_depth) | (inv_cmp_depth < 0)
            inv_cmp_depth[ix] = float("nan")
            image_io.save_raw_float32_image(converted_depth_fmt.format(i),
                                            inv_cmp_depth)
        with SuppressedStdout():
            visualization.visualize_depth_dir(
                converted_depth_dir,
                converted_depth_dir,
                force=True,
                min_percentile=0,
                max_percentile=99,
            )

    # Compute scaled depth maps
    print_banner("Compute per-frame scales")

    scaled_depth_dir = pjoin(out_dir, "depth_scaled_by_colmap_dense", "depth")
    scaled_depth_fmt = pjoin(scaled_depth_dir, "frame_{:06d}.raw")
    scales_file = pjoin(out_dir, "scales.csv")
    src_depth_fmt = pjoin(video.path, f"depth_{args.model_type}", "depth",
                          "frame_{:06d}.raw")
    frames = frame_range.frames()

    if (check_frames(converted_depth_dir, ".png",
                     os.path.dirname(scaled_depth_fmt), ".raw")
            and os.path.isfile(scales_file)):
        src_to_colmap_scales = np.loadtxt(scales_file, delimiter=',')
        assert src_to_colmap_scales.shape[0] >= len(frames) * args.dense_frame_ratio \
            and src_to_colmap_scales.shape[1] == 2, \
            (f"scales shape is {src_to_colmap_scales.shape} does not match "
             + f"({len(frames)}, 2) with threshold {args.dense_frame_ratio}")
        print("Existing scales file loaded.")
    else:
        # Scale depth maps
        os.makedirs(scaled_depth_dir, exist_ok=True)
        src_to_colmap_scales_map = {}

        for i in frames:
            converted_depth_fn = converted_depth_fmt.format(i)
            if not os.path.isfile(converted_depth_fn):
                logging.warning("[SCALE CALIBRATION] %s does not exist",
                                converted_depth_fn)
                continue
            # convert colmap_depth to raw
            inv_cmp_depth = image_io.load_raw_float32_image(converted_depth_fn)
            # compute scale for init depths
            inv_src_depth = image_io.load_raw_float32_image(
                src_depth_fmt.format(i))
            # src_depth * scale = (1/inv_src_depth) * scale == cmp_depth
            inv_cmp_depth = cv2.resize(inv_cmp_depth,
                                       inv_src_depth.shape[:2][::-1],
                                       interpolation=cv2.INTER_NEAREST)
            ix = np.isfinite(inv_cmp_depth)

            if np.sum(ix) / ix.size < args.dense_pixel_ratio:
                # not enough pixels are valid and hence the frame is invalid.
                continue

            scales = (inv_src_depth / inv_cmp_depth)[ix]
            scale = np.median(scales)
            print(f"Scale[{i}]: median={scale}, std={np.std(scales)}")
            # scale = np.median(inv_depth) * np.median(cmp_depth)
            src_to_colmap_scales_map[i] = float(scale)
            scaled_inv_src_depth = inv_src_depth / scale
            image_io.save_raw_float32_image(scaled_depth_fmt.format(i),
                                            scaled_inv_src_depth)
        with SuppressedStdout():
            visualization.visualize_depth_dir(scaled_depth_dir,
                                              scaled_depth_dir,
                                              force=True)

        # Write scales.csv
        xs = sorted(src_to_colmap_scales_map.keys())
        ys = [src_to_colmap_scales_map[x] for x in xs]
        src_to_colmap_scales = np.stack((np.array(xs), np.array(ys)), axis=-1)
        np.savetxt(scales_file, src_to_colmap_scales, delimiter=",")

    valid_frames = {int(s) for s in src_to_colmap_scales[:, 0]}

    # Scale the extrinsics' translations
    scaled_meta_file = pjoin(out_dir, "metadata_scaled.npz")
    if os.path.isfile(scaled_meta_file):
        print("Scaled metadata file exists.")
    else:
        scales = src_to_colmap_scales[:, 1]
        mean_scale = scales.mean()
        print(f"[scales] mean={mean_scale}, std={np.std(scales)}")

        with np.load(src_meta_file) as meta_colmap:
            intrinsics = meta_colmap["intrinsics"]
            extrinsics = meta_colmap["extrinsics"]

        extrinsics[..., -1] /= mean_scale
        np.savez(
            scaled_meta_file,
            intrinsics=intrinsics,
            extrinsics=extrinsics,
            scales=src_to_colmap_scales,
        )

        color_fmt = pjoin(video.path, "color_down", "frame_{:06d}.raw")
        vis_dir = pjoin(out_dir, "vis_calibration_dense")
        visualize_all_calibration(
            extrinsics,
            intrinsics,
            scaled_depth_fmt,
            color_fmt,
            frame_range,
            vis_dir,
        )

    return valid_frames