def test_sparse_boxes_WITH_OFFSET(): block_mask = np.zeros((5, 6, 7), dtype=bool) # since mask offset is 20, this spans 3 bricks (physical: 20-70, logical: 0-90) block_mask[0, 0, 0:5] = True # spans a single brick (physical: 30-60, logical: 30-60) block_mask[0, 1, 1:4] = True block_mask_resolution = 10 # MASK STARTS AT OFFSET mask_box_start = np.array([0, 10, 20]) mask_box_stop = mask_box_start + 10 * np.array(block_mask.shape) block_mask_box = (mask_box_start, mask_box_stop) brick_grid = Grid((10, 10, 30), (0, 0, 0)) sparse_block_mask = SparseBlockMask(block_mask, block_mask_box, block_mask_resolution) logical_boxes = sparse_block_mask.sparse_boxes(brick_grid, return_logical_boxes=True) assert (logical_boxes == [[[0, 10, 0], [10, 20, 30]], [[0, 10, 30], [10, 20, 60]], [[0, 10, 60], [10, 20, 90]], [[0, 20, 30], [10, 30, 60]]]).all() physical_boxes = sparse_block_mask.sparse_boxes(brick_grid, return_logical_boxes=False) assert (physical_boxes == [[[0, 10, 20], [10, 20, 30]], [[0, 10, 30], [10, 20, 60]], [[0, 10, 60], [10, 20, 70]], [[0, 20, 30], [10, 30, 60]]]).all()
def test_sparse_boxes_NO_OFFSET(): block_mask = np.zeros((5, 6, 7), dtype=bool) block_mask[0, 0, 0:5] = True block_mask[0, 1, 1:4] = True block_mask_resolution = 10 # MASK STARTS AT ORIGIN (NO OFFSET) mask_box_start = np.array([0, 0, 0]) mask_box_stop = mask_box_start + 10 * np.array(block_mask.shape) block_mask_box = (mask_box_start, mask_box_stop) brick_grid = Grid((10, 10, 30)) sparse_block_mask = SparseBlockMask(block_mask, block_mask_box, block_mask_resolution) logical_boxes = sparse_block_mask.sparse_boxes(brick_grid, return_logical_boxes=True) assert (logical_boxes == [[[0, 0, 0], [10, 10, 30]], [[0, 0, 30], [10, 10, 60]], [[0, 10, 0], [10, 20, 30]], [[0, 10, 30], [10, 20, 60]]]).all() physical_boxes = sparse_block_mask.sparse_boxes(brick_grid, return_logical_boxes=False) assert (physical_boxes == [[[0, 0, 0], [10, 10, 30]], [[0, 0, 30], [10, 10, 50]], [[0, 10, 10], [10, 20, 30]], [[0, 10, 30], [10, 20, 40]]]).all()
def init_brickwall(self, volume_service, subset_labels, roi): sbm = None if roi["name"]: base_service = volume_service.base_service if not roi["server"] or not roi["uuid"]: assert isinstance(base_service, DvidVolumeService), \ "Since you aren't using a DVID input source, you must specify the ROI server and uuid." roi["server"] = (roi["server"] or volume_service.server) roi["uuid"] = (roi["uuid"] or volume_service.uuid) if roi["scale"] is not None: scale = roi["scale"] elif isinstance(volume_service, ScaledVolumeService): scale = volume_service.scale_delta assert scale <= 5, \ "The 'roi' option doesn't support volumes downscaled beyond level 5" else: scale = 0 brick_shape = volume_service.preferred_message_shape assert not (brick_shape % 2**(5-scale)).any(), \ "If using an ROI, select a brick shape that is divisible by 32" seg_box = volume_service.bounding_box_zyx seg_box = round_box(seg_box, 2**(5-scale)) seg_box_s0 = seg_box * 2**scale seg_box_s5 = seg_box // 2**(5-scale) with Timer(f"Fetching mask for ROI '{roi['name']}' ({seg_box_s0[:, ::-1].tolist()})", logger): roi_mask_s5, _ = fetch_roi(roi["server"], roi["uuid"], roi["name"], format='mask', mask_box=seg_box_s5) # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0. sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**(5-scale)) elif subset_labels: try: sbm = volume_service.sparse_block_mask_for_labels([*subset_labels]) if ((sbm.box[1] - sbm.box[0]) == 0).any(): raise RuntimeError("Could not find sparse masks for any of the subset-labels") except NotImplementedError: sbm = None with Timer("Initializing BrickWall", logger): # Aim for 2 GB RDD partitions when loading segmentation GB = 2**30 target_partition_size_voxels = 2 * GB // np.uint64().nbytes # Apply halo WHILE downloading the data. # TODO: Allow the user to configure whether or not the halo should # be fetched from the outset, or added after the blocks are loaded. halo = self.config["connectedcomponents"]["halo"] brickwall = BrickWall.from_volume_service(volume_service, 0, None, self.client, target_partition_size_voxels, halo, sbm, compression='lz4_2x') return brickwall
def init_boxes(self, volume_service, roi): if not roi["name"]: boxes = boxes_from_grid(volume_service.bounding_box_zyx, volume_service.preferred_message_shape, clipped=True) return np.array([*boxes]) base_service = volume_service.base_service if not roi["server"] or not roi["uuid"]: assert isinstance(base_service, DvidVolumeService), \ "Since you aren't using a DVID input source, you must specify the ROI server and uuid." roi["server"] = (roi["server"] or volume_service.server) roi["uuid"] = (roi["uuid"] or volume_service.uuid) if roi["scale"] is not None: scale = roi["scale"] elif isinstance(volume_service, ScaledVolumeService): scale = volume_service.scale_delta assert scale <= 5, \ "The 'roi' option doesn't support volumes downscaled beyond level 5" else: scale = 0 brick_shape = volume_service.preferred_message_shape assert not (brick_shape % 2**(5-scale)).any(), \ "If using an ROI, select a brick shape that is divisible by 32" seg_box = volume_service.bounding_box_zyx seg_box = round_box(seg_box, 2**(5 - scale)) seg_box_s0 = seg_box * 2**scale seg_box_s5 = seg_box // 2**(5 - scale) with Timer( f"Fetching mask for ROI '{roi['name']}' ({seg_box_s0[:, ::-1].tolist()})", logger): roi_mask_s5, _ = fetch_roi(roi["server"], roi["uuid"], roi["name"], format='mask', mask_box=seg_box_s5) # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0. sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**(5 - scale)) boxes = sbm.sparse_boxes(brick_shape) # Clip boxes to the true (not rounded) bounding box boxes[:, 0] = np.maximum(boxes[:, 0], volume_service.bounding_box_zyx[0]) boxes[:, 1] = np.minimum(boxes[:, 1], volume_service.bounding_box_zyx[1]) return boxes
def _get_sparse_block_mask(self, volume_service): """ If the user's config specified a sparse subset of bodies to process, Return a SparseBlockMask object indicating where those bodies reside. If the user did not specify a 'subset-bodies' list, returns None, indicating that all segmentation blocks in the volume should be read. Also, if the input volume is not from a DvidVolumeService, return None. (In that case, the 'subset-bodies' feature can be used, but it isn't as efficient.) """ import pandas as pd config = self.config_data sparse_body_ids = config["mesh-config"]["storage"]["subset-bodies"] if not sparse_body_ids: return None if not isinstance(volume_service.base_service, DvidVolumeService): # We only know how to retrieve sparse blocks for DVID volumes. # For other volume sources, we'll just have to fetch everything and filter # out the unwanted bodies at the mask aggregation step. return None grouping_scheme = config["mesh-config"]["storage"]["grouping-scheme"] assert grouping_scheme in ('no-groups', 'singletons', 'labelmap'), \ f"Not allowed to use 'subset-bodies' setting for grouping scheme: {grouping_scheme}" if grouping_scheme in ('no-groups', 'singletons'): # The 'body ids' are identical to segment ids sparse_segment_ids = sparse_body_ids elif grouping_scheme == 'labelmap': # We need to convert the body ids into sparse segment ids mapping_pairs = self.load_labelmap() segments, bodies = mapping_pairs.transpose() # pandas.Series permits duplicate index values, # which is convenient for this reverse lookup reverse_lookup = pd.Series(index=bodies, data=segments) sparse_segment_ids = reverse_lookup.loc[sparse_body_ids].values # Fetch the sparse mask of blocks that the sparse segments belong to dvid_service = volume_service.base_service block_mask, lowres_box, block_shape = \ sparkdvid.get_union_block_mask_for_bodies( dvid_service.server, dvid_service.uuid, dvid_service.instance_name, sparse_segment_ids ) fullres_box = lowres_box * block_shape return SparseBlockMask(block_mask, fullres_box, block_shape)
def init_boxes(self, volume_service, roi): if not roi: boxes = boxes_from_grid(volume_service.bounding_box_zyx, volume_service.preferred_message_shape, clipped=True) return np.array([*boxes]) base_service = volume_service.base_service assert isinstance(base_service, DvidVolumeService), \ "Can't specify an ROI unless you're using a dvid input" assert isinstance(volume_service, (ScaledVolumeService, DvidVolumeService)), \ "The 'roi' option doesn't support adapters other than 'rescale-level'" scale = 0 if isinstance(volume_service, ScaledVolumeService): scale = volume_service.scale_delta assert scale <= 5, \ "The 'roi' option doesn't support volumes downscaled beyond level 5" server, uuid, _seg_instance = base_service.instance_triple brick_shape = volume_service.preferred_message_shape assert not (brick_shape % 2**(5-scale)).any(), \ "If using an ROI, select a brick shape that is divisible by 32" seg_box = volume_service.bounding_box_zyx seg_box = round_box(seg_box, 2**(5 - scale)) seg_box_s0 = seg_box * 2**scale seg_box_s5 = seg_box // 2**(5 - scale) with Timer( f"Fetching mask for ROI '{roi}' ({seg_box_s0[:, ::-1].tolist()})", logger): roi_mask_s5, _ = fetch_roi(server, uuid, roi, format='mask', mask_box=seg_box_s5) # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0. sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**(5 - scale)) boxes = sbm.sparse_boxes(brick_shape) # Clip boxes to the true (not rounded) bounding box boxes[:, 0] = np.maximum(boxes[:, 0], volume_service.bounding_box_zyx[0]) boxes[:, 1] = np.minimum(boxes[:, 1], volume_service.bounding_box_zyx[1]) return boxes
def test_get_fullres_mask(): coarse_mask = np.random.randint(2, size=(10, 10), dtype=bool) full_mask = upsample(coarse_mask, 10) sbm = SparseBlockMask(coarse_mask, [(0, 0), (100, 100)], (10, 10)) # Try the exact bounding box extracted = sbm.get_fullres_mask([(0, 0), (100, 100)]) assert (extracted == full_mask).all() # Try a bounding box that exceeds the original mask # (excess region should be all zeros) extracted = sbm.get_fullres_mask([(10, 20), (150, 150)]) assert extracted.shape == (140, 130) expected = np.zeros((140, 130), dtype=bool) expected[:90, :80] = full_mask[10:, 20:] assert (extracted == expected).all()
def init_boxes(self, volume_service, roi, chunk_shape_s0): """ Return a set of bounding boxes to tile the given ROI. Scale 0 of the volume service should correspond to full-res data, which is 32x higher-res than ROI resolution. """ if not roi["name"]: boxes = boxes_from_grid(volume_service.bounding_box_zyx, chunk_shape_s0, clipped=True) return np.array([*boxes]) base_service = volume_service.base_service if not roi["server"] or not roi["uuid"]: assert isinstance(base_service, DvidVolumeService), \ "Since you aren't using a DVID input source, you must specify the ROI server and uuid." roi["server"] = (roi["server"] or volume_service.server) roi["uuid"] = (roi["uuid"] or volume_service.uuid) assert not (chunk_shape_s0 % 2**5).any(), \ "If using an ROI, select a chunk shape that is divisible by 32" seg_box_s0 = volume_service.bounding_box_zyx seg_box_s0 = round_box(seg_box_s0, 2**5) seg_box_s5 = seg_box_s0 // 2**5 with Timer( f"Fetching mask for ROI '{roi['name']}' ({seg_box_s0[:, ::-1].tolist()})", logger): roi_mask_s5, _ = fetch_roi(roi["server"], roi["uuid"], roi["name"], format='mask', mask_box=seg_box_s5) # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0. sbm = SparseBlockMask(roi_mask_s5, seg_box_s0, 2**5) boxes = sbm.sparse_boxes(chunk_shape_s0) # Clip boxes to the true (not rounded) bounding box boxes[:, 0] = np.maximum(boxes[:, 0], volume_service.bounding_box_zyx[0]) boxes[:, 1] = np.minimum(boxes[:, 1], volume_service.bounding_box_zyx[1]) return boxes
def init_boxes(self, volume_service, roi): if not roi["name"]: boxes = boxes_from_grid(volume_service.bounding_box_zyx, volume_service.preferred_message_shape, clipped=True) return np.array([*boxes]) server, uuid, roi_name = roi["server"], roi["uuid"], roi["name"] roi_scale = roi["relative-scale"] brick_shape = volume_service.preferred_message_shape assert not (brick_shape % 2**roi_scale).any(), \ "If using an ROI, select a brick shape that is divisible by 32" seg_box = volume_service.bounding_box_zyx seg_box = round_box(seg_box, 2**roi_scale) seg_box_s5 = seg_box // 2**roi_scale with Timer( f"Fetching mask for ROI '{roi_name}' ({seg_box[:, ::-1].tolist()})", logger): roi_mask_s5, _ = fetch_roi(server, uuid, roi_name, format='mask', mask_box=seg_box_s5) # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0. sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**roi_scale) boxes = sbm.sparse_boxes(brick_shape) # Clip boxes to the true (not rounded) bounding box boxes[:, 0] = np.maximum(boxes[:, 0], volume_service.bounding_box_zyx[0]) boxes[:, 1] = np.minimum(boxes[:, 1], volume_service.bounding_box_zyx[1]) return boxes
def test_copysegmentation_from_hdf5_to_dvid_custom_sbm( setup_hdf5_segmentation_input, disable_auto_retry): template_dir, config, volume, dvid_address, repo_uuid, output_segmentation_name = setup_hdf5_segmentation_input # Our bricks are long in Z, so use a mask that's aligned that way, too. mask = np.zeros(volume.shape, bool) mask[:, :, 64:128] = True mask[:, :, 192:256] = True sbm = SparseBlockMask(mask[::64, ::64, ::64], [(0, 0, 0), volume.shape], (64, 64, 64)) with open(f"{template_dir}/sbm.pkl", 'wb') as f: pickle.dump(sbm, f) config["copysegmentation"]["sparse-block-mask"] = f"{template_dir}/sbm.pkl" setup = (template_dir, config, volume, dvid_address, repo_uuid, output_segmentation_name) box_zyx, expected_vol, output_vol = _run_to_dvid(setup, check_scale_0=False) expected_vol = expected_vol.copy() mask = mask[box_to_slicing(*box_zyx)] expected_vol[~mask] = 0 assert (output_vol == expected_vol).all()
def _init_masks(self): options = self.config["copysegmentation"] self.sbm = None if options["sparse-block-mask"]: # In theory, we could just take the intersection of the masks involved. # But I'm too lazy to think about that right now. assert not options["input-mask-labels"] and not options["output-mask-labels"], \ "Not Implemented: Can't use sparse-block-mask in conjunction with input-mask-labels or output-mask-labels" with open(options["sparse-block-mask"], 'rb') as f: self.sbm = pickle.load(f) is_supervoxels = False if isinstance(self.input_service.base_service, DvidVolumeService): is_supervoxels = self.input_service.base_service.supervoxels output_mask_labels = load_body_list(options["output-mask-labels"], is_supervoxels) self.output_mask_labels = set(output_mask_labels) output_sbm = None if len(output_mask_labels) > 0: if (self.output_service.preferred_message_shape != self.input_service.preferred_message_shape).any(): logger.warn( "Not using output mask to reduce data fetching: Your input service and output service don't have the same brick shape" ) elif (self.output_service.bounding_box_zyx != self.input_service.bounding_box_zyx).any(): logger.warn( "Not using output mask to reduce data fetching: Your input service and output service don't have the same bounding box" ) else: try: output_sbm = self.output_service.sparse_block_mask_for_labels( output_mask_labels) except NotImplementedError: output_sbm = None input_mask_labels = load_body_list(options["input-mask-labels"], is_supervoxels) input_sbm = None if len(input_mask_labels) > 0: try: input_sbm = self.input_service.sparse_block_mask_for_labels( input_mask_labels) except NotImplementedError: input_sbm = None if self.sbm is not None: pass elif input_sbm is None: self.sbm = output_sbm elif output_sbm is None: self.sbm = input_sbm else: assert (input_sbm.resolution == output_sbm.resolution).all(), \ "FIXME: At the moment, you can't supply both an input mask and an output "\ "mask unless the input and output sources use the same brick shape (message-block-shape)" final_box = box_intersection(input_sbm.box, output_sbm.box) input_box = (input_sbm.box - final_box) // input_sbm.resolution input_mask = extract_subvol(input_sbm.lowres_mask, input_box) output_box = (output_sbm - final_box) // output_sbm.resolution output_mask = extract_subvol(output_sbm.lowres_mask, output_box) assert input_mask.shape == output_mask.shape assert input_mask.dtype == output_mask.dtype == np.bool final_mask = (input_mask & output_mask) self.sbm = SparseBlockMask(final_mask, final_box, input_sbm.resolution) id_offset = options["add-offset-to-ids"] if id_offset != 0: id_offset = options["add-offset-to-ids"] input_mask_labels = np.asarray(input_mask_labels, np.uint64) input_mask_labels += id_offset self.input_mask_labels = set(input_mask_labels)
def main(): configure_default_logging() parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--no-downres', action='store_true') parser.add_argument('--only-within-roi') parser.add_argument('--not-within-roi') parser.add_argument('dvid_server') parser.add_argument('uuid') parser.add_argument('labelmap_instance') parser.add_argument('sparsevol_files', nargs='+') args = parser.parse_args() instance_info = (args.dvid_server, args.uuid, args.labelmap_instance) assert not args.only_within_roi or not args.not_within_roi, \ "Can't supply both --only-within-roi and --not-within-roi. Pick one or the other (or neither)." roi = args.only_within_roi or args.not_within_roi invert_roi = (args.not_within_roi is not None) if roi: roi_mask, mask_box = fetch_roi(args.dvid_server, args.uuid, roi, format='mask') roi_sbm = SparseBlockMask(roi_mask, mask_box * (2**5), 2**5) # ROIs are provided at scale 5 else: roi_sbm = None # Ideally, we would choose the max label for the node we're writing to, # but the /maxlabel endpoint doesn't work for all nodes # instead, we'll use the repo-wide maxlabel from the /info JSON. #maxlabel = fetch_maxlabel(args.dvid_server, args.uuid, args.labelmap_instance) maxlabel = fetch_instance_info( args.dvid_server, args.uuid, args.labelmap_instance)["Extended"]["MaxRepoLabel"] for i, path in enumerate(args.sparsevol_files): maxlabel += 1 name = os.path.split(path)[1] prefix_logger = PrefixedLogger(logger, f"Vol #{i:02d} {name}: ") with Timer(f"Pasting {name} as {maxlabel}", logger): overwritten_labels = overwrite_sparsevol(*instance_info, maxlabel, path, roi_sbm, invert_roi, args.no_downres, prefix_logger) results_path = os.path.splitext(path)[0] + '.json' with open(results_path, 'w') as f: results = { 'new-label': maxlabel, 'overwritten_labels': sorted(overwritten_labels) } json.dump(results, f, indent=2, cls=NumpyConvertingEncoder) logger.info(f"Done.")
def execute(self): self._sanitize_config() input_config = self.config["input"] options = self.config["samplepoints"] resource_config = self.config["resource-manager"] resource_mgr_client = ResourceManagerClient(resource_config["server"], resource_config["port"]) volume_service = VolumeService.create_from_config(input_config, resource_mgr_client) input_csv = options["input-table"] with Timer(f"Reading {input_csv}", logger): coordinate_table_df = pd.read_csv(input_csv, header=0, dtype=CSV_TYPES) points = coordinate_table_df[['z', 'y', 'x']].values rescale = options["rescale-points-to-level"] if rescale != 0: points //= (2**rescale) # All points must lie within the input volume points_box = [points.min(axis=0), 1+points.max(axis=0)] if (box_intersection(points_box, volume_service.bounding_box_zyx) != points_box).all(): raise RuntimeError("The point list includes points outside of the volume bounding box.") with Timer("Sorting points by Brick ID", logger): # 'Brick ID' is defined as the divided corner coordinate brick_shape = volume_service.preferred_message_shape brick_ids_and_points = np.concatenate( (points // brick_shape, points), axis=1 ) brick_ids_and_points = lexsort_columns(brick_ids_and_points) brick_ids = brick_ids_and_points[: ,:3] points = brick_ids_and_points[:, 3:] # Extract the first row of each group to get the set of unique brick IDs point_group_spans = groupby_spans_presorted(brick_ids) point_group_starts = (start for start, stop in point_group_spans) unique_brick_ids = brick_ids[np.fromiter(point_group_starts, np.int32)] with Timer("Constructing sparse mask", logger): # BrickWall.from_volume_service() supports the ability to initialize a sparse RDD, # with only a subset of Bricks (rather than a dense RDD containing every brick # within the volume bounding box). # It requires a SparseBlockMask object indicating exactly which Bricks need to be fetched. brick_mask_box = np.array([unique_brick_ids.min(axis=0), 1+unique_brick_ids.max(axis=0)]) brick_mask_shape = (brick_mask_box[1] - brick_mask_box[0]) brick_mask = np.zeros(brick_mask_shape, bool) brick_mask_coords = unique_brick_ids - brick_mask_box[0] brick_mask[tuple(brick_mask_coords.transpose())] = True sbm = SparseBlockMask(brick_mask, brick_mask_box*brick_shape, brick_shape) with Timer("Initializing BrickWall", logger): # Aim for 2 GB RDD partitions when loading segmentation GB = 2**30 target_partition_size_voxels = 2 * GB // np.uint64().nbytes brickwall = BrickWall.from_volume_service(volume_service, 0, None, self.client, target_partition_size_voxels, 0, sbm, lazy=True) with Timer(f"Grouping {len(points)} points", logger): # This is faster than pandas.DataFrame.groupby() for large data point_groups = groupby_presorted(points, brick_ids) id_and_ptgroups = list(zip(unique_brick_ids, point_groups)) num_groups = len(id_and_ptgroups) with Timer(f"Join {num_groups} point groups with bricks", logger): id_and_ptgroups = dask.bag.from_sequence( id_and_ptgroups, npartitions=brickwall.bricks.npartitions ) id_and_ptgroups = id_and_ptgroups.map(lambda i_p: (*i_p[0], i_p[1])) id_and_ptgroups_df = id_and_ptgroups.to_dataframe(columns=['z', 'y', 'x', 'pointgroup']) ids_and_bricks = brickwall.bricks.map(lambda brick: (*(brick.logical_box[0] // brick_shape), brick)) ids_and_bricks_df = ids_and_bricks.to_dataframe(columns=['z', 'y', 'x', 'brick']) def set_brick_id_index(df): def set_brick_id(df): df['brick_id'] = encode_coords_to_uint64( df[['z', 'y', 'x']].values.astype(np.int32) ) return df df['brick_id'] = np.uint64(0) df = df.map_partitions(set_brick_id, meta=df) # Note: bricks and pointgroups are already sorted by # brick scan-order so, brick_id is already sorted. # Specifying sorted=True is critical to performance here. df = df.set_index('brick_id', sorted=True) return df # Give them matching indexes ids_and_bricks_df = set_brick_id_index(ids_and_bricks_df) id_and_ptgroups_df = set_brick_id_index(id_and_ptgroups_df) # Join (index-on-index, so it should be fast) ptgroup_and_brick_df = id_and_ptgroups_df.merge( ids_and_bricks_df, how='left', left_index=True, right_index=True ) ptgroup_and_brick_df = ptgroup_and_brick_df[['pointgroup', 'brick']] ptgroup_and_brick = ptgroup_and_brick_df.to_bag() # Persist and force computation before proceeding. #ptgroup_and_brick = persist_and_execute(ptgroup_and_brick, "Persisting joined point groups", logger, False) #assert ptgroup_and_brick.count().compute() == num_groups == brickwall.num_bricks def sample_points(points_and_brick): """ Given a Brick and array of points (N,3) that lie within it, sample labels from the points within the brick and return a record array containing the points and the sampled labels. """ points, brick = points_and_brick result_dtype = [('z', np.int32), ('y', np.int32), ('x', np.int32), ('label', np.uint64)] result = np.zeros((len(points),), result_dtype) result['z'] = points[:,0] result['y'] = points[:,1] result['x'] = points[:,2] # Make relative to brick offset points -= brick.physical_box[0] result['label'] = brick.volume[tuple(points.transpose())] return result with Timer("Sampling bricks", logger): brick_samples = ptgroup_and_brick.map(sample_points).compute() with Timer("Concatenating samples", logger): sample_table = np.concatenate(brick_samples) with Timer("Sorting samples", logger): # This will sort in terms of the SCALED z,y,x coordinates sample_table.sort() with Timer("Sorting table", logger): if rescale == 0: coordinate_table_df.sort_values(['z', 'y', 'x'], inplace=True) else: # sample_table is sorted by RESCALED coordiante, # so sort our table the same way coordinate_table_df['rz'] = coordinate_table_df['z'] // (2**rescale) coordinate_table_df['ry'] = coordinate_table_df['y'] // (2**rescale) coordinate_table_df['rx'] = coordinate_table_df['x'] // (2**rescale) coordinate_table_df.sort_values(['rz', 'ry', 'rx'], inplace=True) del coordinate_table_df['rz'] del coordinate_table_df['ry'] del coordinate_table_df['rx'] # Now that samples and input rows are sorted identically, # append the results output_col = options["output-column"] coordinate_table_df[output_col] = sample_table['label'].copy() if rescale != 0: with Timer("Re-sorting table at scale 0", logger): # For simplicity (API and testing), we guarantee that coordinates are sorted in the output. # In the case of rescaled points, they need to be sorted once more (at scale 0 this time) coordinate_table_df.sort_values(['z', 'y', 'x'], inplace=True) with Timer("Exporting samples", logger): coordinate_table_df.to_csv(options["output-table"], header=True, index=False) logger.info("DONE.")