Python SparseBlockMask.SparseBlockMask示例，neuclease.util.SparseBlockMask.SparseBlockMask Python示例

示例#1

0

显示文件

文件： test_sparse_block_mask.py 项目： stuarteberg/pydvid

def test_sparse_boxes_WITH_OFFSET():
    block_mask = np.zeros((5, 6, 7), dtype=bool)

    # since mask offset is 20, this spans 3 bricks (physical: 20-70, logical: 0-90)
    block_mask[0, 0, 0:5] = True

    # spans a single brick (physical: 30-60, logical: 30-60)
    block_mask[0, 1, 1:4] = True

    block_mask_resolution = 10

    # MASK STARTS AT OFFSET
    mask_box_start = np.array([0, 10, 20])
    mask_box_stop = mask_box_start + 10 * np.array(block_mask.shape)

    block_mask_box = (mask_box_start, mask_box_stop)
    brick_grid = Grid((10, 10, 30), (0, 0, 0))

    sparse_block_mask = SparseBlockMask(block_mask, block_mask_box,
                                        block_mask_resolution)
    logical_boxes = sparse_block_mask.sparse_boxes(brick_grid,
                                                   return_logical_boxes=True)

    assert (logical_boxes == [[[0, 10, 0], [10, 20, 30]],
                              [[0, 10, 30], [10, 20, 60]],
                              [[0, 10, 60], [10, 20, 90]],
                              [[0, 20, 30], [10, 30, 60]]]).all()

    physical_boxes = sparse_block_mask.sparse_boxes(brick_grid,
                                                    return_logical_boxes=False)

    assert (physical_boxes == [[[0, 10, 20], [10, 20, 30]],
                               [[0, 10, 30], [10, 20, 60]],
                               [[0, 10, 60], [10, 20, 70]],
                               [[0, 20, 30], [10, 30, 60]]]).all()

示例#2

0

显示文件

文件： test_sparse_block_mask.py 项目： stuarteberg/pydvid

def test_sparse_boxes_NO_OFFSET():
    block_mask = np.zeros((5, 6, 7), dtype=bool)

    block_mask[0, 0, 0:5] = True

    block_mask[0, 1, 1:4] = True

    block_mask_resolution = 10

    # MASK STARTS AT ORIGIN (NO OFFSET)
    mask_box_start = np.array([0, 0, 0])
    mask_box_stop = mask_box_start + 10 * np.array(block_mask.shape)

    block_mask_box = (mask_box_start, mask_box_stop)
    brick_grid = Grid((10, 10, 30))

    sparse_block_mask = SparseBlockMask(block_mask, block_mask_box,
                                        block_mask_resolution)
    logical_boxes = sparse_block_mask.sparse_boxes(brick_grid,
                                                   return_logical_boxes=True)
    assert (logical_boxes == [[[0, 0, 0], [10, 10, 30]],
                              [[0, 0, 30], [10, 10, 60]],
                              [[0, 10, 0], [10, 20, 30]],
                              [[0, 10, 30], [10, 20, 60]]]).all()

    physical_boxes = sparse_block_mask.sparse_boxes(brick_grid,
                                                    return_logical_boxes=False)
    assert (physical_boxes == [[[0, 0, 0], [10, 10, 30]],
                               [[0, 0, 30], [10, 10, 50]],
                               [[0, 10, 10], [10, 20, 30]],
                               [[0, 10, 30], [10, 20, 40]]]).all()

示例#3

0

显示文件

文件： connectedcomponents.py 项目： aplbrain/flyemflows

    def init_brickwall(self, volume_service, subset_labels, roi):
        sbm = None

        if roi["name"]:
            base_service = volume_service.base_service

            if not roi["server"] or not roi["uuid"]:
                assert isinstance(base_service, DvidVolumeService), \
                    "Since you aren't using a DVID input source, you must specify the ROI server and uuid."

            roi["server"] = (roi["server"] or volume_service.server)
            roi["uuid"] = (roi["uuid"] or volume_service.uuid)

            if roi["scale"] is not None:
                scale = roi["scale"]
            elif isinstance(volume_service, ScaledVolumeService):
                scale = volume_service.scale_delta
                assert scale <= 5, \
                    "The 'roi' option doesn't support volumes downscaled beyond level 5"
            else:
                scale = 0

            brick_shape = volume_service.preferred_message_shape
            assert not (brick_shape % 2**(5-scale)).any(), \
                "If using an ROI, select a brick shape that is divisible by 32"

            seg_box = volume_service.bounding_box_zyx
            seg_box = round_box(seg_box, 2**(5-scale))
            seg_box_s0 = seg_box * 2**scale
            seg_box_s5 = seg_box // 2**(5-scale)

            with Timer(f"Fetching mask for ROI '{roi['name']}' ({seg_box_s0[:, ::-1].tolist()})", logger):
                roi_mask_s5, _ = fetch_roi(roi["server"], roi["uuid"], roi["name"], format='mask', mask_box=seg_box_s5)

            # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
            sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**(5-scale))

        elif subset_labels:
            try:
                sbm = volume_service.sparse_block_mask_for_labels([*subset_labels])
                if ((sbm.box[1] - sbm.box[0]) == 0).any():
                    raise RuntimeError("Could not find sparse masks for any of the subset-labels")
            except NotImplementedError:
                sbm = None

        with Timer("Initializing BrickWall", logger):
            # Aim for 2 GB RDD partitions when loading segmentation
            GB = 2**30
            target_partition_size_voxels = 2 * GB // np.uint64().nbytes

            # Apply halo WHILE downloading the data.
            # TODO: Allow the user to configure whether or not the halo should
            #       be fetched from the outset, or added after the blocks are loaded.
            halo = self.config["connectedcomponents"]["halo"]
            brickwall = BrickWall.from_volume_service(volume_service, 0, None, self.client, target_partition_size_voxels, halo, sbm, compression='lz4_2x')

        return brickwall

示例#4

0

显示文件

    def init_boxes(self, volume_service, roi):
        if not roi["name"]:
            boxes = boxes_from_grid(volume_service.bounding_box_zyx,
                                    volume_service.preferred_message_shape,
                                    clipped=True)
            return np.array([*boxes])

        base_service = volume_service.base_service

        if not roi["server"] or not roi["uuid"]:
            assert isinstance(base_service, DvidVolumeService), \
                "Since you aren't using a DVID input source, you must specify the ROI server and uuid."

        roi["server"] = (roi["server"] or volume_service.server)
        roi["uuid"] = (roi["uuid"] or volume_service.uuid)

        if roi["scale"] is not None:
            scale = roi["scale"]
        elif isinstance(volume_service, ScaledVolumeService):
            scale = volume_service.scale_delta
            assert scale <= 5, \
                "The 'roi' option doesn't support volumes downscaled beyond level 5"
        else:
            scale = 0

        brick_shape = volume_service.preferred_message_shape
        assert not (brick_shape % 2**(5-scale)).any(), \
            "If using an ROI, select a brick shape that is divisible by 32"

        seg_box = volume_service.bounding_box_zyx
        seg_box = round_box(seg_box, 2**(5 - scale))
        seg_box_s0 = seg_box * 2**scale
        seg_box_s5 = seg_box // 2**(5 - scale)

        with Timer(
                f"Fetching mask for ROI '{roi['name']}' ({seg_box_s0[:, ::-1].tolist()})",
                logger):
            roi_mask_s5, _ = fetch_roi(roi["server"],
                                       roi["uuid"],
                                       roi["name"],
                                       format='mask',
                                       mask_box=seg_box_s5)

        # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
        sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**(5 - scale))
        boxes = sbm.sparse_boxes(brick_shape)

        # Clip boxes to the true (not rounded) bounding box
        boxes[:, 0] = np.maximum(boxes[:, 0],
                                 volume_service.bounding_box_zyx[0])
        boxes[:, 1] = np.minimum(boxes[:, 1],
                                 volume_service.bounding_box_zyx[1])
        return boxes

示例#5

0

显示文件

文件： CreateMeshes.py 项目： janelia-flyem/flyemflows

    def _get_sparse_block_mask(self, volume_service):
        """
        If the user's config specified a sparse subset of bodies to process,
        Return a SparseBlockMask object indicating where those bodies reside.
        
        If the user did not specify a 'subset-bodies' list, returns None, indicating
        that all segmentation blocks in the volume should be read.
        
        Also, if the input volume is not from a DvidVolumeService, return None.
        (In that case, the 'subset-bodies' feature can be used, but it isn't as efficient.)
        """
        import pandas as pd
        config = self.config_data

        sparse_body_ids = config["mesh-config"]["storage"]["subset-bodies"]
        if not sparse_body_ids:
            return None

        if not isinstance(volume_service.base_service, DvidVolumeService):
            # We only know how to retrieve sparse blocks for DVID volumes.
            # For other volume sources, we'll just have to fetch everything and filter
            # out the unwanted bodies at the mask aggregation step.
            return None

        grouping_scheme = config["mesh-config"]["storage"]["grouping-scheme"]
        assert grouping_scheme in ('no-groups', 'singletons', 'labelmap'), \
            f"Not allowed to use 'subset-bodies' setting for grouping scheme: {grouping_scheme}"

        if grouping_scheme in ('no-groups', 'singletons'):
            # The 'body ids' are identical to segment ids
            sparse_segment_ids = sparse_body_ids
        elif grouping_scheme == 'labelmap':
            # We need to convert the body ids into sparse segment ids
            mapping_pairs = self.load_labelmap()
            segments, bodies = mapping_pairs.transpose()

            # pandas.Series permits duplicate index values,
            # which is convenient for this reverse lookup
            reverse_lookup = pd.Series(index=bodies, data=segments)
            sparse_segment_ids = reverse_lookup.loc[sparse_body_ids].values

        # Fetch the sparse mask of blocks that the sparse segments belong to
        dvid_service = volume_service.base_service
        block_mask, lowres_box, block_shape = \
            sparkdvid.get_union_block_mask_for_bodies( dvid_service.server,
                                                       dvid_service.uuid,
                                                       dvid_service.instance_name,
                                                       sparse_segment_ids )

        fullres_box = lowres_box * block_shape
        return SparseBlockMask(block_mask, fullres_box, block_shape)

示例#6

0

显示文件

    def init_boxes(self, volume_service, roi):
        if not roi:
            boxes = boxes_from_grid(volume_service.bounding_box_zyx,
                                    volume_service.preferred_message_shape,
                                    clipped=True)
            return np.array([*boxes])

        base_service = volume_service.base_service
        assert isinstance(base_service, DvidVolumeService), \
            "Can't specify an ROI unless you're using a dvid input"

        assert isinstance(volume_service, (ScaledVolumeService, DvidVolumeService)), \
            "The 'roi' option doesn't support adapters other than 'rescale-level'"
        scale = 0
        if isinstance(volume_service, ScaledVolumeService):
            scale = volume_service.scale_delta
            assert scale <= 5, \
                "The 'roi' option doesn't support volumes downscaled beyond level 5"

        server, uuid, _seg_instance = base_service.instance_triple

        brick_shape = volume_service.preferred_message_shape
        assert not (brick_shape % 2**(5-scale)).any(), \
            "If using an ROI, select a brick shape that is divisible by 32"

        seg_box = volume_service.bounding_box_zyx
        seg_box = round_box(seg_box, 2**(5 - scale))
        seg_box_s0 = seg_box * 2**scale
        seg_box_s5 = seg_box // 2**(5 - scale)

        with Timer(
                f"Fetching mask for ROI '{roi}' ({seg_box_s0[:, ::-1].tolist()})",
                logger):
            roi_mask_s5, _ = fetch_roi(server,
                                       uuid,
                                       roi,
                                       format='mask',
                                       mask_box=seg_box_s5)

        # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
        sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**(5 - scale))
        boxes = sbm.sparse_boxes(brick_shape)

        # Clip boxes to the true (not rounded) bounding box
        boxes[:, 0] = np.maximum(boxes[:, 0],
                                 volume_service.bounding_box_zyx[0])
        boxes[:, 1] = np.minimum(boxes[:, 1],
                                 volume_service.bounding_box_zyx[1])
        return boxes

示例#7

0

显示文件

文件： test_sparse_block_mask.py 项目： stuarteberg/pydvid

def test_get_fullres_mask():
    coarse_mask = np.random.randint(2, size=(10, 10), dtype=bool)
    full_mask = upsample(coarse_mask, 10)
    sbm = SparseBlockMask(coarse_mask, [(0, 0), (100, 100)], (10, 10))

    # Try the exact bounding box
    extracted = sbm.get_fullres_mask([(0, 0), (100, 100)])
    assert (extracted == full_mask).all()

    # Try a bounding box that exceeds the original mask
    # (excess region should be all zeros)
    extracted = sbm.get_fullres_mask([(10, 20), (150, 150)])
    assert extracted.shape == (140, 130)
    expected = np.zeros((140, 130), dtype=bool)
    expected[:90, :80] = full_mask[10:, 20:]
    assert (extracted == expected).all()

示例#8

0

显示文件

    def init_boxes(self, volume_service, roi, chunk_shape_s0):
        """
        Return a set of bounding boxes to tile the given ROI.
        Scale 0 of the volume service should correspond to full-res data,
        which is 32x higher-res than ROI resolution.
        """
        if not roi["name"]:
            boxes = boxes_from_grid(volume_service.bounding_box_zyx,
                                    chunk_shape_s0,
                                    clipped=True)
            return np.array([*boxes])

        base_service = volume_service.base_service

        if not roi["server"] or not roi["uuid"]:
            assert isinstance(base_service, DvidVolumeService), \
                "Since you aren't using a DVID input source, you must specify the ROI server and uuid."

        roi["server"] = (roi["server"] or volume_service.server)
        roi["uuid"] = (roi["uuid"] or volume_service.uuid)

        assert not (chunk_shape_s0 % 2**5).any(), \
            "If using an ROI, select a chunk shape that is divisible by 32"

        seg_box_s0 = volume_service.bounding_box_zyx
        seg_box_s0 = round_box(seg_box_s0, 2**5)
        seg_box_s5 = seg_box_s0 // 2**5

        with Timer(
                f"Fetching mask for ROI '{roi['name']}' ({seg_box_s0[:, ::-1].tolist()})",
                logger):
            roi_mask_s5, _ = fetch_roi(roi["server"],
                                       roi["uuid"],
                                       roi["name"],
                                       format='mask',
                                       mask_box=seg_box_s5)

        # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
        sbm = SparseBlockMask(roi_mask_s5, seg_box_s0, 2**5)
        boxes = sbm.sparse_boxes(chunk_shape_s0)

        # Clip boxes to the true (not rounded) bounding box
        boxes[:, 0] = np.maximum(boxes[:, 0],
                                 volume_service.bounding_box_zyx[0])
        boxes[:, 1] = np.minimum(boxes[:, 1],
                                 volume_service.bounding_box_zyx[1])
        return boxes

示例#9

0

显示文件

文件： maskedcopy.py 项目： janelia-flyem/flyemflows

    def init_boxes(self, volume_service, roi):
        if not roi["name"]:
            boxes = boxes_from_grid(volume_service.bounding_box_zyx,
                                    volume_service.preferred_message_shape,
                                    clipped=True)
            return np.array([*boxes])

        server, uuid, roi_name = roi["server"], roi["uuid"], roi["name"]
        roi_scale = roi["relative-scale"]

        brick_shape = volume_service.preferred_message_shape
        assert not (brick_shape % 2**roi_scale).any(), \
            "If using an ROI, select a brick shape that is divisible by 32"

        seg_box = volume_service.bounding_box_zyx
        seg_box = round_box(seg_box, 2**roi_scale)
        seg_box_s5 = seg_box // 2**roi_scale

        with Timer(
                f"Fetching mask for ROI '{roi_name}' ({seg_box[:, ::-1].tolist()})",
                logger):
            roi_mask_s5, _ = fetch_roi(server,
                                       uuid,
                                       roi_name,
                                       format='mask',
                                       mask_box=seg_box_s5)

        # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
        sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**roi_scale)
        boxes = sbm.sparse_boxes(brick_shape)

        # Clip boxes to the true (not rounded) bounding box
        boxes[:, 0] = np.maximum(boxes[:, 0],
                                 volume_service.bounding_box_zyx[0])
        boxes[:, 1] = np.minimum(boxes[:, 1],
                                 volume_service.bounding_box_zyx[1])
        return boxes

示例#10

0

显示文件

文件： test_copysegmentation.py 项目： janelia-flyem/flyemflows

def test_copysegmentation_from_hdf5_to_dvid_custom_sbm(
        setup_hdf5_segmentation_input, disable_auto_retry):
    template_dir, config, volume, dvid_address, repo_uuid, output_segmentation_name = setup_hdf5_segmentation_input

    # Our bricks are long in Z, so use a mask that's aligned that way, too.
    mask = np.zeros(volume.shape, bool)
    mask[:, :, 64:128] = True
    mask[:, :, 192:256] = True

    sbm = SparseBlockMask(mask[::64, ::64, ::64], [(0, 0, 0), volume.shape],
                          (64, 64, 64))
    with open(f"{template_dir}/sbm.pkl", 'wb') as f:
        pickle.dump(sbm, f)
    config["copysegmentation"]["sparse-block-mask"] = f"{template_dir}/sbm.pkl"

    setup = (template_dir, config, volume, dvid_address, repo_uuid,
             output_segmentation_name)
    box_zyx, expected_vol, output_vol = _run_to_dvid(setup,
                                                     check_scale_0=False)

    expected_vol = expected_vol.copy()
    mask = mask[box_to_slicing(*box_zyx)]
    expected_vol[~mask] = 0
    assert (output_vol == expected_vol).all()

示例#11

0

显示文件

    def _init_masks(self):
        options = self.config["copysegmentation"]
        self.sbm = None

        if options["sparse-block-mask"]:
            # In theory, we could just take the intersection of the masks involved.
            # But I'm too lazy to think about that right now.
            assert not options["input-mask-labels"] and not options["output-mask-labels"], \
                "Not Implemented: Can't use sparse-block-mask in conjunction with input-mask-labels or output-mask-labels"

            with open(options["sparse-block-mask"], 'rb') as f:
                self.sbm = pickle.load(f)

        is_supervoxels = False
        if isinstance(self.input_service.base_service, DvidVolumeService):
            is_supervoxels = self.input_service.base_service.supervoxels

        output_mask_labels = load_body_list(options["output-mask-labels"],
                                            is_supervoxels)
        self.output_mask_labels = set(output_mask_labels)

        output_sbm = None
        if len(output_mask_labels) > 0:
            if (self.output_service.preferred_message_shape !=
                    self.input_service.preferred_message_shape).any():
                logger.warn(
                    "Not using output mask to reduce data fetching: Your input service and output service don't have the same brick shape"
                )
            elif (self.output_service.bounding_box_zyx !=
                  self.input_service.bounding_box_zyx).any():
                logger.warn(
                    "Not using output mask to reduce data fetching: Your input service and output service don't have the same bounding box"
                )
            else:
                try:
                    output_sbm = self.output_service.sparse_block_mask_for_labels(
                        output_mask_labels)
                except NotImplementedError:
                    output_sbm = None

        input_mask_labels = load_body_list(options["input-mask-labels"],
                                           is_supervoxels)

        input_sbm = None
        if len(input_mask_labels) > 0:
            try:
                input_sbm = self.input_service.sparse_block_mask_for_labels(
                    input_mask_labels)
            except NotImplementedError:
                input_sbm = None

        if self.sbm is not None:
            pass
        elif input_sbm is None:
            self.sbm = output_sbm
        elif output_sbm is None:
            self.sbm = input_sbm
        else:
            assert (input_sbm.resolution == output_sbm.resolution).all(), \
                "FIXME: At the moment, you can't supply both an input mask and an output "\
                "mask unless the input and output sources use the same brick shape (message-block-shape)"

            final_box = box_intersection(input_sbm.box, output_sbm.box)

            input_box = (input_sbm.box - final_box) // input_sbm.resolution
            input_mask = extract_subvol(input_sbm.lowres_mask, input_box)

            output_box = (output_sbm - final_box) // output_sbm.resolution
            output_mask = extract_subvol(output_sbm.lowres_mask, output_box)

            assert input_mask.shape == output_mask.shape
            assert input_mask.dtype == output_mask.dtype == np.bool
            final_mask = (input_mask & output_mask)

            self.sbm = SparseBlockMask(final_mask, final_box,
                                       input_sbm.resolution)

        id_offset = options["add-offset-to-ids"]
        if id_offset != 0:
            id_offset = options["add-offset-to-ids"]
            input_mask_labels = np.asarray(input_mask_labels, np.uint64)
            input_mask_labels += id_offset
        self.input_mask_labels = set(input_mask_labels)

示例#12

0

显示文件

文件： paste_sparsevols.py 项目： stuarteberg/pydvid

def main():
    configure_default_logging()

    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--no-downres', action='store_true')
    parser.add_argument('--only-within-roi')
    parser.add_argument('--not-within-roi')
    parser.add_argument('dvid_server')
    parser.add_argument('uuid')
    parser.add_argument('labelmap_instance')
    parser.add_argument('sparsevol_files', nargs='+')
    args = parser.parse_args()

    instance_info = (args.dvid_server, args.uuid, args.labelmap_instance)

    assert not args.only_within_roi or not args.not_within_roi, \
        "Can't supply both --only-within-roi and --not-within-roi.  Pick one or the other (or neither)."

    roi = args.only_within_roi or args.not_within_roi
    invert_roi = (args.not_within_roi is not None)

    if roi:
        roi_mask, mask_box = fetch_roi(args.dvid_server,
                                       args.uuid,
                                       roi,
                                       format='mask')
        roi_sbm = SparseBlockMask(roi_mask, mask_box * (2**5),
                                  2**5)  # ROIs are provided at scale 5
    else:
        roi_sbm = None

    # Ideally, we would choose the max label for the node we're writing to,
    # but the /maxlabel endpoint doesn't work for all nodes
    # instead, we'll use the repo-wide maxlabel from the /info JSON.
    #maxlabel = fetch_maxlabel(args.dvid_server, args.uuid, args.labelmap_instance)
    maxlabel = fetch_instance_info(
        args.dvid_server, args.uuid,
        args.labelmap_instance)["Extended"]["MaxRepoLabel"]

    for i, path in enumerate(args.sparsevol_files):
        maxlabel += 1
        name = os.path.split(path)[1]
        prefix_logger = PrefixedLogger(logger, f"Vol #{i:02d} {name}: ")

        with Timer(f"Pasting {name} as {maxlabel}", logger):
            overwritten_labels = overwrite_sparsevol(*instance_info, maxlabel,
                                                     path, roi_sbm, invert_roi,
                                                     args.no_downres,
                                                     prefix_logger)

        results_path = os.path.splitext(path)[0] + '.json'
        with open(results_path, 'w') as f:
            results = {
                'new-label': maxlabel,
                'overwritten_labels': sorted(overwritten_labels)
            }
            json.dump(results, f, indent=2, cls=NumpyConvertingEncoder)

    logger.info(f"Done.")

示例#13

0

显示文件

    def execute(self):
        self._sanitize_config()

        input_config = self.config["input"]
        options = self.config["samplepoints"]
        resource_config = self.config["resource-manager"]

        resource_mgr_client = ResourceManagerClient(resource_config["server"], resource_config["port"])
        volume_service = VolumeService.create_from_config(input_config, resource_mgr_client)

        input_csv = options["input-table"]
        with Timer(f"Reading {input_csv}", logger):
            coordinate_table_df = pd.read_csv(input_csv, header=0, dtype=CSV_TYPES)
            points = coordinate_table_df[['z', 'y', 'x']].values

        rescale = options["rescale-points-to-level"]
        if rescale != 0:
            points //= (2**rescale)

        # All points must lie within the input volume        
        points_box = [points.min(axis=0), 1+points.max(axis=0)]
        if (box_intersection(points_box, volume_service.bounding_box_zyx) != points_box).all():
            raise RuntimeError("The point list includes points outside of the volume bounding box.")

        with Timer("Sorting points by Brick ID", logger):
            # 'Brick ID' is defined as the divided corner coordinate 
            brick_shape = volume_service.preferred_message_shape
            brick_ids_and_points = np.concatenate( (points // brick_shape, points), axis=1 )
            brick_ids_and_points = lexsort_columns(brick_ids_and_points)

            brick_ids = brick_ids_and_points[: ,:3]
            points = brick_ids_and_points[:, 3:]
            
            # Extract the first row of each group to get the set of unique brick IDs
            point_group_spans = groupby_spans_presorted(brick_ids)
            point_group_starts = (start for start, stop in point_group_spans)
            unique_brick_ids = brick_ids[np.fromiter(point_group_starts, np.int32)]

        with Timer("Constructing sparse mask", logger):
            # BrickWall.from_volume_service() supports the ability to initialize a sparse RDD,
            # with only a subset of Bricks (rather than a dense RDD containing every brick
            # within the volume bounding box).
            # It requires a SparseBlockMask object indicating exactly which Bricks need to be fetched.
            brick_mask_box = np.array([unique_brick_ids.min(axis=0), 1+unique_brick_ids.max(axis=0)])

            brick_mask_shape = (brick_mask_box[1] - brick_mask_box[0])
            brick_mask = np.zeros(brick_mask_shape, bool)
            brick_mask_coords = unique_brick_ids - brick_mask_box[0]
            brick_mask[tuple(brick_mask_coords.transpose())] = True
            sbm = SparseBlockMask(brick_mask, brick_mask_box*brick_shape, brick_shape)

        with Timer("Initializing BrickWall", logger):
            # Aim for 2 GB RDD partitions when loading segmentation
            GB = 2**30
            target_partition_size_voxels = 2 * GB // np.uint64().nbytes
            brickwall = BrickWall.from_volume_service(volume_service, 0, None, self.client, target_partition_size_voxels, 0, sbm, lazy=True)
        
        with Timer(f"Grouping {len(points)} points", logger):
            # This is faster than pandas.DataFrame.groupby() for large data
            point_groups = groupby_presorted(points, brick_ids)
            id_and_ptgroups = list(zip(unique_brick_ids, point_groups))
            num_groups = len(id_and_ptgroups)

        with Timer(f"Join {num_groups} point groups with bricks", logger):
            id_and_ptgroups = dask.bag.from_sequence( id_and_ptgroups,
                                                      npartitions=brickwall.bricks.npartitions )

            id_and_ptgroups = id_and_ptgroups.map(lambda i_p: (*i_p[0], i_p[1]))
            id_and_ptgroups_df = id_and_ptgroups.to_dataframe(columns=['z', 'y', 'x', 'pointgroup'])
            
            ids_and_bricks = brickwall.bricks.map(lambda brick: (*(brick.logical_box[0] // brick_shape), brick))
            ids_and_bricks_df = ids_and_bricks.to_dataframe(columns=['z', 'y', 'x', 'brick'])

            def set_brick_id_index(df):
                def set_brick_id(df):
                    df['brick_id'] = encode_coords_to_uint64( df[['z', 'y', 'x']].values.astype(np.int32) )
                    return df
                df['brick_id'] = np.uint64(0)
                df = df.map_partitions(set_brick_id, meta=df)

                # Note: bricks and pointgroups are already sorted by
                # brick scan-order so, brick_id is already sorted.
                # Specifying sorted=True is critical to performance here.
                df = df.set_index('brick_id', sorted=True)
                return df

            # Give them matching indexes
            ids_and_bricks_df = set_brick_id_index(ids_and_bricks_df)
            id_and_ptgroups_df = set_brick_id_index(id_and_ptgroups_df)

            # Join (index-on-index, so it should be fast)
            ptgroup_and_brick_df = id_and_ptgroups_df.merge( ids_and_bricks_df,
                                                             how='left', left_index=True, right_index=True )
            ptgroup_and_brick_df = ptgroup_and_brick_df[['pointgroup', 'brick']]
            ptgroup_and_brick = ptgroup_and_brick_df.to_bag()
            
        # Persist and force computation before proceeding.
        #ptgroup_and_brick = persist_and_execute(ptgroup_and_brick, "Persisting joined point groups", logger, False)
        #assert ptgroup_and_brick.count().compute() == num_groups == brickwall.num_bricks

        def sample_points(points_and_brick):
            """
            Given a Brick and array of points (N,3) that lie within it,
            sample labels from the points within the brick and return
            a record array containing the points and the sampled labels.
            """
            points, brick = points_and_brick

            result_dtype = [('z', np.int32), ('y', np.int32), ('x', np.int32), ('label', np.uint64)]
            result = np.zeros((len(points),), result_dtype)
            result['z'] = points[:,0]
            result['y'] = points[:,1]
            result['x'] = points[:,2]

            # Make relative to brick offset
            points -= brick.physical_box[0]
            
            result['label'] = brick.volume[tuple(points.transpose())]
            return result

        with Timer("Sampling bricks", logger):
            brick_samples = ptgroup_and_brick.map(sample_points).compute()

        with Timer("Concatenating samples", logger):
            sample_table = np.concatenate(brick_samples)

        with Timer("Sorting samples", logger):
            # This will sort in terms of the SCALED z,y,x coordinates
            sample_table.sort()

        with Timer("Sorting table", logger):
            if rescale == 0:
                coordinate_table_df.sort_values(['z', 'y', 'x'], inplace=True)
            else:
                # sample_table is sorted by RESCALED coordiante,
                # so sort our table the same way
                coordinate_table_df['rz'] = coordinate_table_df['z'] // (2**rescale)
                coordinate_table_df['ry'] = coordinate_table_df['y'] // (2**rescale)
                coordinate_table_df['rx'] = coordinate_table_df['x'] // (2**rescale)
                coordinate_table_df.sort_values(['rz', 'ry', 'rx'], inplace=True)
                del coordinate_table_df['rz']
                del coordinate_table_df['ry']
                del coordinate_table_df['rx']
                
        # Now that samples and input rows are sorted identically,
        # append the results
        output_col = options["output-column"]
        coordinate_table_df[output_col] = sample_table['label'].copy()

        if rescale != 0:
            with Timer("Re-sorting table at scale 0", logger):
                # For simplicity (API and testing), we guarantee that coordinates are sorted in the output.
                # In the case of rescaled points, they need to be sorted once more (at scale 0 this time)
                coordinate_table_df.sort_values(['z', 'y', 'x'], inplace=True)

        with Timer("Exporting samples", logger):
            coordinate_table_df.to_csv(options["output-table"], header=True, index=False)

        logger.info("DONE.")