示例#1
0
def update_scannet_info_json(path, path_meta, test_only=False, verbose=2):
    scenes = []
    if not test_only:
        scenes += sorted([
            os.path.join('scans', scene)
            for scene in os.listdir(os.path.join(path, 'scans'))
        ])
    scenes += sorted([
        os.path.join('scans_test', scene)
        for scene in os.listdir(os.path.join(path, 'scans_test'))
    ])

    for scene in scenes:
        if verbose > 0:
            print('update info json for %s' % scene)

        info_file = os.path.join(path_meta, scene, 'info.json')
        data = load_info_json(info_file)

        folder, scene = scene.split('/')
        data['path'] = path
        data['file_name_mesh_gt'] = os.path.join(path, folder, scene,
                                                 scene + '_vh_clean_2.ply')
        data['file_name_seg_indices'] = os.path.join(
            path, folder, scene, scene + '_vh_clean_2.0.010000.segs.json')
        data['file_name_seg_groups'] = os.path.join(
            path, folder, scene, scene + '.aggregation.json')

        frames = data['frames']
        new_frames = []
        for frame_id, frame in enumerate(frames):
            frame['file_name_image'] = os.path.join(path, folder, scene,
                                                    'color',
                                                    '%d.jpg' % frame_id)
            frame['file_name_depth'] = os.path.join(path, folder, scene,
                                                    'depth',
                                                    '%d.png' % frame_id)
            if frame['file_name_instance'] != '':
                frame['file_name_instance'] = os.path.join(
                    path, folder, scene, 'instance-filt', '%d.png' % frame_id)
            new_frames.append(frame)

        data['frames'] = new_frames

        for voxel_size in [4, 8, 16]:
            data['file_name_vol_%02d' % voxel_size] = os.path.join(
                path_meta, folder, scene, 'tsdf_%02d.npz' % voxel_size)

        json.dump(
            data, open(os.path.join(path_meta, folder, scene, 'info.json'),
                       'w'))
示例#2
0
def fuse_scene(path_meta,
               scene,
               voxel_size,
               trunc_ratio=3,
               max_depth=3,
               vol_prcnt=.995,
               vol_margin=1.5,
               fuse_semseg=False,
               device=0,
               verbose=2):
    """ Use TSDF fusion with GT depth maps to generate GT TSDFs

    Args:
        path_meta: path to save the TSDFs 
            (we recommend creating a parallel directory structure to save 
            derived data so that we don't modify the original dataset)
        scene: name of scene to process
        voxel_size: voxel size of TSDF
        trunc_ratio: truncation distance in voxel units
        max_depth: mask out large depth values since they are noisy
        vol_prcnt: for computing the bounding volume of the TSDF... ignore outliers
        vol_margin: padding for computing bounding volume of the TSDF
        fuse_semseg: whether to accumulate semseg images for GT semseg
            (prefered method is to not accumulate and insted transfer labels
            from ground truth labeled mesh)
        device: cpu/ which gpu
        verbose: how much logging to print

    Returns:
        writes a TSDF (.npz) file into path_meta/scene

    Notes: we use a conservative value of max_depth=3 to reduce noise in the 
    ground truth. However, this means some distant data is missing which can
    create artifacts. Nevertheless, we found we acheived the best 2d metrics 
    with the less noisy ground truth.
    """

    if verbose > 0:
        print('fusing', scene, 'voxel size', voxel_size)

    info_file = os.path.join(path_meta, scene, 'info.json')

    # get gpu device for this worker
    device = torch.device('cuda', device)  # gpu for this process

    # get the dataset
    transform = transforms.Compose([
        transforms.ResizeImage((640, 480)),
        transforms.ToTensor(),
        transforms.InstanceToSemseg('nyu40'),
        transforms.IntrinsicsPoseToProjection(),
    ])
    frame_types = ['depth', 'semseg'] if fuse_semseg else ['depth']
    dataset = SceneDataset(info_file, transform, frame_types)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=None,
                                             batch_sampler=None,
                                             num_workers=4)

    # find volume bounds and origin by backprojecting depth maps to point clouds
    # use a subset of the frames to save time
    if len(dataset) <= 200:
        dataset1 = dataset
    else:
        inds = np.linspace(0, len(dataset) - 1, 200).astype(np.int)
        dataset1 = torch.utils.data.Subset(dataset, inds)
    dataloader1 = torch.utils.data.DataLoader(dataset1,
                                              batch_size=None,
                                              batch_sampler=None,
                                              num_workers=4)

    pts = []
    for i, frame in enumerate(dataloader1):
        projection = frame['projection'].to(device)
        depth = frame['depth'].to(device)
        depth[depth > max_depth] = 0
        pts.append(depth_to_world(projection, depth).view(3, -1).T)
    pts = torch.cat(pts)
    pts = pts[torch.isfinite(pts[:, 0])].cpu().numpy()
    # use top and bottom vol_prcnt of points plus vol_margin
    origin = torch.as_tensor(
        np.quantile(pts, 1 - vol_prcnt, axis=0) - vol_margin).float()
    vol_max = torch.as_tensor(
        np.quantile(pts, vol_prcnt, axis=0) + vol_margin).float()
    vol_dim = ((vol_max - origin) / (float(voxel_size) / 100)).int().tolist()

    # initialize tsdf
    tsdf_fusion = TSDFFusion(vol_dim,
                             float(voxel_size) / 100,
                             origin,
                             trunc_ratio,
                             device,
                             label=fuse_semseg)

    # integrate frames
    for i, frame in enumerate(dataloader):
        if verbose > 1 and i % 25 == 0:
            print(scene, 'integrating voxel size', voxel_size, i, len(dataset))

        projection = frame['projection'].to(device)
        image = frame['image'].to(device)
        depth = frame['depth'].to(device)
        semseg = frame['semseg'].to(device) if fuse_semseg else None

        # only use reliable depth
        depth[depth > max_depth] = 0

        tsdf_fusion.integrate(projection, depth, image, semseg)

    # save mesh and tsdf
    file_name_vol = os.path.join(path_meta, scene,
                                 'tsdf_%02d.npz' % voxel_size)
    file_name_mesh = os.path.join(path_meta, scene,
                                  'mesh_%02d.ply' % voxel_size)
    tsdf = tsdf_fusion.get_tsdf()
    tsdf.save(file_name_vol)
    mesh = tsdf.get_mesh()
    mesh.export(file_name_mesh)
    if fuse_semseg:
        mesh = tsdf.get_mesh('instance')
        mesh.export(file_name_mesh.replace('.ply', '_semseg.ply'))

    # update info json
    data = load_info_json(info_file)
    data['file_name_vol_%02d' % voxel_size] = file_name_vol
    json.dump(data, open(info_file, 'w'))
示例#3
0
def label_scene(path_meta, scene, voxel_size, dist_thresh=.05, verbose=2):
    """ Transfer instance labels from ground truth mesh to TSDF

    For each voxel find the nearest vertex and transfer the label if
    it is close enough to the voxel.

    Args:
        path_meta: path to save the TSDFs 
            (we recommend creating a parallel directory structure to save 
            derived data so that we don't modify the original dataset)
        scene: name of scene to process
        voxel_size: voxel size of TSDF to process
        dist_thresh: beyond this distance labels are not transferd
        verbose: how much logging to print

    Returns:
        Updates the TSDF (.npz) file with the instance volume
    """

    # dist_thresh: beyond this distance to nearest gt mesh vertex,
    # voxels are not labeled
    if verbose > 0:
        print('labeling', scene)

    info_file = os.path.join(path_meta, scene, 'info.json')
    data = load_info_json(info_file)

    # each vertex in gt mesh indexs a seg group
    segIndices = json.load(open(data['file_name_seg_indices'],
                                'r'))['segIndices']

    # maps seg groups to instances
    segGroups = json.load(open(data['file_name_seg_groups'], 'r'))['segGroups']
    mapping = {
        ind: group['id'] + 1
        for group in segGroups for ind in group['segments']
    }

    # get per vertex instance ids (0 is unknown, [1,...] are objects)
    n = len(segIndices)
    instance_verts = torch.zeros(n, dtype=torch.long)
    for i in range(n):
        if segIndices[i] in mapping:
            instance_verts[i] = mapping[segIndices[i]]

    # load vertex locations
    mesh = trimesh.load(data['file_name_mesh_gt'], process=False)
    verts = mesh.vertices

    # construct kdtree of vertices for fast nn lookup
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(verts)
    kdtree = o3d.geometry.KDTreeFlann(pcd)

    # load tsdf volume
    tsdf = TSDF.load(data['file_name_vol_%02d' % voxel_size])
    coords = coordinates(tsdf.tsdf_vol.size(), device=torch.device('cpu'))
    coords = coords.type(torch.float) * tsdf.voxel_size + tsdf.origin.T
    mask = tsdf.tsdf_vol.abs().view(-1) < 1

    # transfer vertex instance ids to voxels near surface
    instance_vol = torch.zeros(len(mask), dtype=torch.long)
    for i in mask.nonzero():
        _, inds, dist = kdtree.search_knn_vector_3d(coords[:, i], 1)
        if dist[0] < dist_thresh:
            instance_vol[i] = instance_verts[inds[0]]

    tsdf.attribute_vols['instance'] = instance_vol.view(
        list(tsdf.tsdf_vol.size()))
    tsdf.save(data['file_name_vol_%02d' % voxel_size])

    key = 'vol_%02d' % voxel_size
    temp_data = {
        key: tsdf,
        'instances': data['instances'],
        'dataset': data['dataset']
    }
    tsdf = transforms.InstanceToSemseg('nyu40')(temp_data)[key]
    mesh = tsdf.get_mesh('semseg')
    fname = data['file_name_vol_%02d' % voxel_size]
    mesh.export(fname.replace('tsdf', 'mesh').replace('.npz', '_semseg.ply'))
示例#4
0
def fuse_scene(path_meta, scene, info_file, dataset, voxel_size, device, origin, vol_max,
               trunc_ratio=3, max_depth=3, fuse_semseg=False, verbose=2):
    """ Use TSDF fusion with GT depth maps to generate GT TSDFs

    Args:
        path_meta: path to save the TSDFs 
            (we recommend creating a parallel directory structure to save 
            derived data so that we don't modify the original dataset)
        scene: name of scene to process
        voxel_size: voxel size of TSDF
        trunc_ratio: truncation distance in voxel units
        max_depth: mask out large depth values since they are noisy
        vol_prcnt: for computing the bounding volume of the TSDF... ignore outliers
        vol_margin: padding for computing bounding volume of the TSDF
        fuse_semseg: whether to accumulate semseg images for GT semseg
            (prefered method is to not accumulate and insted transfer labels
            from ground truth labeled mesh)
        device: cpu/ which gpu
        verbose: how much logging to print

    Returns:
        writes a TSDF (.npz) file into path_meta/scene

    Notes: we use a conservative value of max_depth=3 to reduce noise in the 
    ground truth. However, this means some distant data is missing which can
    create artifacts. Nevertheless, we found we acheived the best 2d metrics 
    with the less noisy ground truth.
    """

    if verbose>0:
        print('fusing', scene, 'voxel size', voxel_size)

    vol_dim = ((vol_max-origin)/(float(voxel_size)/100)).int().tolist()


    # initialize tsdf
    tsdf_fusion = TSDFFusion(vol_dim, float(voxel_size)/100, origin,
                             trunc_ratio, device, label=fuse_semseg)

    dataloader = torch.utils.data.DataLoader(dataset, batch_size=None,
                                             batch_sampler=None, num_workers=4)

    # integrate frames
    for i, frame in enumerate(dataloader):
        if verbose>1 and i%50==0:
            print(scene, 'integrating voxel size', voxel_size, i, len(dataset))

        projection = frame['projection'].to(device)
        image = frame['image'].to(device)
        depth = frame['depth'].to(device)
        semseg = frame['semseg'].to(device) if fuse_semseg else None

        # only use reliable depth
        depth[depth>max_depth]=0

        tsdf_fusion.integrate(projection, depth, image, semseg)

    # save mesh and tsdf
    file_name_vol = os.path.join(path_meta, scene, 'tsdf_%02d.npz'%voxel_size)
    file_name_mesh = os.path.join(path_meta, scene, 'mesh_%02d.ply'%voxel_size)
    tsdf = tsdf_fusion.get_tsdf()
    tsdf.save(file_name_vol)
    mesh = tsdf.get_mesh()
    mesh.export(file_name_mesh)
    if fuse_semseg:
        mesh = tsdf.get_mesh('instance')
        mesh.export(file_name_mesh.replace('.ply','_semseg.ply'))

    # update info json
    data = load_info_json(info_file)
    data['file_name_vol_%02d'%voxel_size] = file_name_vol
    json.dump(data, open(info_file, 'w'))
示例#5
0
def process(info_file, pathout, stride, scale):
    """ Run Colmap dense reconstruction with ground truth pose.

    Copies and creates the necessary file structure required by Colmap.
    Then runs Colmap.

    Args:
        info_file: path to info_json file for the scene
        pathout: path to store intermediate and final results
        stride: number of frames to skip (reduces runtime)
        scale: how much to downsample images (reduces runtime and often 
            improves stereo matching results)
    """

    info = load_info_json(info_file)
    dataset = info['dataset']
    scene = info['scene']
    frames = info['frames'][::stride]

    os.makedirs(os.path.join(pathout, dataset, scene, 'images'), exist_ok=True)

    for i, frame in enumerate(frames):
        if i % 25 == 0:
            print(i, len(frames))

        img = Image.open(frame['file_name_image'])
        w = img.width // scale
        h = img.height // scale
        fname_out = os.path.split(frame['file_name_image'])[1]
        fname_out = os.path.join(pathout, dataset, scene, 'images', fname_out)
        img.resize((w, h), Image.BILINEAR).save(fname_out)

    with open(os.path.join(pathout, dataset, scene, 'cameras.txt'), 'w') as fp:
        fp.write('1 PINHOLE {w} {h} {fx} {fy} {cx} {cy}'.format(
            w=w,
            h=h,
            fx=frames[0]['intrinsics'][0][0] / scale,
            fy=frames[0]['intrinsics'][1][1] / scale,
            cx=frames[0]['intrinsics'][0][2] / scale,
            cy=frames[0]['intrinsics'][1][2] / scale,
        ))

    with open(os.path.join(pathout, dataset, scene, 'points3D.txt'),
              'w') as fp:
        pass

    cmd = 'colmap feature_extractor --database_path %s --image_path %s' % (
        os.path.join(pathout, dataset, scene, 'database.db'),
        os.path.join(pathout, dataset, scene, 'images'))
    os.system(cmd)
    cmd = 'colmap exhaustive_matcher --database_path %s' % (os.path.join(
        pathout, dataset, scene, 'database.db'))
    os.system(cmd)

    conn = sqlite3.connect(os.path.join(pathout, dataset, scene,
                                        'database.db'))
    c = conn.cursor()
    c.execute('SELECT image_id, name FROM images')
    db_list = sorted(c.fetchall(), key=lambda x: x[1])
    pose_dict = {
        os.path.split(frame['file_name_image'])[1]: np.array(frame['pose'])
        for frame in frames
    }
    with open(os.path.join(pathout, dataset, scene, 'images.txt'), 'w') as fp:
        for ind, name in db_list:

            pose = pose_dict[name]
            pose = np.linalg.inv(pose)
            q = R.from_matrix(pose[:3, :3]).as_quat()
            t = pose[:3, 3]

            fp.write(
                '{i}, {qw}, {qx}, {qy}, {qz}, {tx}, {ty}, {tz}, 1, {name}\n\n'.
                format(i=ind,
                       qw=q[3],
                       qx=q[0],
                       qy=q[1],
                       qz=q[2],
                       tx=t[0],
                       ty=t[1],
                       tz=t[2],
                       name=name))

    cmd = ('colmap point_triangulator --database_path %s --image_path %s'
           ' --input_path %s --output_path %s') % (
               os.path.join(pathout, dataset, scene, 'database.db'),
               os.path.join(pathout, dataset, scene,
                            'images'), os.path.join(pathout, dataset, scene),
               os.path.join(pathout, dataset, scene))
    os.system(cmd)

    cmd = 'colmap image_undistorter --image_path %s --input_path %s --output_path %s' % (
        os.path.join(pathout, dataset, scene,
                     'images'), os.path.join(pathout, dataset, scene),
        os.path.join(pathout, dataset, scene))
    os.system(cmd)

    cmd = 'colmap patch_match_stereo --workspace_path %s' % (os.path.join(
        pathout, dataset, scene))
    os.system(cmd)

    cmd = 'colmap stereo_fusion --workspace_path %s --output_path %s' % (
        os.path.join(pathout, dataset, scene),
        os.path.join(pathout, dataset, scene, 'fused.ply'))
    os.system(cmd)

    cmd = ('colmap delaunay_mesher --input_path %s --output_path %s'
           ' --DelaunayMeshing.quality_regularization 5.'
           ' --DelaunayMeshing.max_proj_dist 10') % (os.path.join(
               pathout, dataset,
               scene), os.path.join(pathout, dataset, scene + '.ply'))
    os.system(cmd)
示例#6
0
def eval_scene(info_file, pathout):
    """ Evaluates COLMAP inference compared to ground truth

    Args:
        info_file: path to info_json file for the scene
        pathout: path where intermediate and final results are stored
    """

    info = load_info_json(info_file)
    dataset = info['dataset']
    scene = info['scene']
    frames = info['frames']

    fnames = os.listdir(
        os.path.join(pathout, dataset, scene, 'stereo', 'depth_maps'))
    frames = [
        frame for frame in frames
        if os.path.split(frame['file_name_image'])[1] +
        '.geometric.bin' in fnames
    ]

    # 2d depth metrics
    for i, frame in enumerate(frames):
        if i % 25 == 0:
            print(scene, i, len(fnames))

        fname_trgt = frame['file_name_depth']
        fname_pred = os.path.join(
            pathout, dataset, scene, 'stereo', 'depth_maps',
            os.path.split(frame['file_name_image'])[1] + '.geometric.bin')
        depth_trgt = imageio.imread(fname_trgt).astype('float32') / 1000
        depth_pred = read_array(fname_pred)
        depth_pred[
            depth_pred >
            5] = 0  # ignore depth beyond 5 meters as it is probably wrong
        depth_pred = resize(depth_pred, depth_trgt.shape)

        temp = eval_depth(depth_pred, depth_trgt)
        if i == 0:
            metrics_depth = temp
        else:
            metrics_depth = {
                key: value + temp[key]
                for key, value in metrics_depth.items()
            }
    metrics_depth = {
        key: value / len(frames)
        for key, value in metrics_depth.items()
    }

    # 3d point metrics
    fname_pred = os.path.join(pathout, dataset, scene, 'fused.ply')
    fname_trgt = info['file_name_mesh_gt']
    metrics_mesh = eval_mesh(fname_pred, fname_trgt)

    metrics = {**metrics_depth, **metrics_mesh}
    print(metrics)

    rslt_file = os.path.join(pathout, dataset, scene, 'metrics.json')
    json.dump(metrics, open(rslt_file, 'w'))

    return metrics