def test_assemble_brick_fragments(): volume = np.random.randint(0,10, (100,300) ) logical_box = np.array( [(10, 20), (20, 120)] ) # Omit the first and last boxes, to prove that the final # physical box ends up smaller than the logical box. # box_0 = np.array( [(10,20), (20,40)] ) box_1 = np.array( [(10,40), (20,60)] ) box_2 = np.array( [(10,60), (20,80)] ) box_3 = np.array( [(10,80), (20,100)] ) # box_4 = np.array( [(10,100), (20,120)] ) # frag_0 = Brick( logical_box, box_0, extract_subvol(volume, box_0) ) # omit frag_1 = Brick( logical_box, box_1, extract_subvol(volume, box_1) ) frag_2 = Brick( logical_box, box_2, extract_subvol(volume, box_2) ) frag_3 = Brick( logical_box, box_3, extract_subvol(volume, box_3) ) # frag_4 = Brick( logical_box, box_4, extract_subvol(volume, box_4) ) # omit assembled_brick = assemble_brick_fragments( [frag_1, frag_2, frag_3] ) assert (assembled_brick.logical_box == logical_box).all() assert (assembled_brick.physical_box == [box_1[0], box_3[1]] ).all() physical_shape = assembled_brick.physical_box[1] - assembled_brick.physical_box[0] assert (assembled_brick.volume.shape == physical_shape).all() assert (assembled_brick.volume == extract_subvol(volume, assembled_brick.physical_box)).all()
def test_sparseblocksstats(setup_sparseblockstats, disable_auto_retry): template_dir, _config, input_volume, _mask_volume, _dvid_address, _repo_uuid = setup_sparseblockstats execution_dir, workflow = launch_flow(template_dir, 1) _final_config = workflow.config with h5py.File(f'{execution_dir}/block-statistics.h5', 'r') as f: assert f['stats'].dtype == np.dtype(list(BLOCK_STATS_DTYPES.items())) stats_df = pd.DataFrame(f['stats'][:]) for row in stats_df.itertuples(): corner = np.array((row.z, row.y, row.x)) block_box = np.array([corner, corner + 64]) block = extract_subvol(input_volume, block_box) assert (block == row.segment_id).sum() == row.count assert len(stats_df.query('z == 0 and y == 0 and x == 0')) == 0, \ "Was not supposed to compute stats for the first block!" block_coords = stats_df[['z', 'y', 'x']].sort_values( ['z', 'y', 'x']).drop_duplicates(['z', 'y', 'x']).values assert (block_coords == 64*np.array([[0,0,1], [0,1,0], [0,1,1]])).all(), \ "Did not cover blocks for the selected labels!" for row in stats_df.itertuples(): corner = np.array((row.z, row.y, row.x)) block_box = np.array([corner, corner + 64]) block = extract_subvol(input_volume, block_box) assert (block == row.segment_id).sum() == row.count
def test_masksegmentation_resume(setup_dvid_segmentation_input, disable_auto_retry): template_dir, config, volume, dvid_address, repo_uuid, roi_mask_s5, _input_segmentation_name, output_segmentation_name = setup_dvid_segmentation_input brick_shape = config["input"]["geometry"]["message-block-shape"] batch_size = config["masksegmentation"]["batch-size"] # This is the total bricks in the volume, not necessarily # the total *processed* bricks, but it's close enough. total_bricks = np.ceil(np.prod(np.array(volume.shape) / brick_shape)).astype(int) total_batches = int(np.ceil(total_bricks / batch_size)) # Skip over half of the original bricks. config["masksegmentation"]["resume-at"] = { "scale": 0, "batch-index": 1 + (total_batches // 2) } # re-dump config yaml = YAML() yaml.default_flow_style = False with open(f"{template_dir}/workflow.yaml", 'w') as f: yaml.dump(config, f) _execution_dir, workflow = launch_flow(template_dir, 1) final_config = workflow.config input_box_xyz = np.array(final_config['input']['geometry']['bounding-box']) input_box_zyx = input_box_xyz[:, ::-1] roi_mask = upsample(roi_mask_s5, 2**5) roi_mask = extract_subvol(roi_mask, input_box_zyx) masked_vol = extract_subvol(volume.copy(), input_box_zyx) masked_vol[roi_mask] = 0 output_box_xyz = np.array( final_config['output']['geometry']['bounding-box']) output_box_zyx = output_box_xyz[:, ::-1] output_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, output_segmentation_name, output_box_zyx, scale=0, supervoxels=True) #np.save('/tmp/original.npy', volume) #np.save('/tmp/output.npy', output_vol) # First part was untouched assert (output_vol[:128] == volume[:128]).all() # Last part was touched somewhere assert (output_vol[128:] != volume[128:]).any()
def test_pad_brick_data_from_volume_source(): source_volume = np.random.randint(0,10, (100,300) ) logical_box = [(1,0), (11,20)] physical_box = [(3,8), (7, 13)] brick = Brick( logical_box, physical_box, extract_subvol(source_volume, physical_box) ) padding_grid = Grid( (5,5), offset=(1,0) ) padded_brick = pad_brick_data_from_volume_source( padding_grid, partial(extract_subvol, source_volume), brick ) assert (padded_brick.logical_box == brick.logical_box).all() assert (padded_brick.physical_box == [(1,5), (11, 15)]).all() assert (padded_brick.volume == extract_subvol(source_volume, padded_brick.physical_box)).all()
def _extract_subbrick(brick, box): """ Given a brick and the box to extract from it, return a new Brick with the same logical_box as the original brick, but only containing the subvolume corresponding to the given box. If necessary, the returned subbrick will be zero-padded to full the entirety of the given box. """ box_clipped = box_intersection(box, brick.physical_box) if (box_clipped[1] - box_clipped[0] <= 0).any(): return None subvol = extract_subvol(brick.volume, box_clipped - brick.physical_box[0]) full_subvol = zero_fill(subvol, box_clipped, box) # FIXME: Should we bother with location_id? # (If we don't, realign operations won't work, # but it's not clear what that would mean for halos anyway) subbrick = Brick(brick.logical_box, box, full_subvol, compression=brick.compression) return subbrick
def test_realign_bricks_to_new_grid_WITH_HALO(): grid = Grid( (10,20), (12,3) ) bounding_box = np.array([(15,30), (95,290)]) volume = np.random.randint(0,10, (100,300) ) original_bricks, _num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() ) halo = 1 halo_shape = np.array([1,1]) new_grid = Grid((20,10), (0,0), halo) new_bricks = realign_bricks_to_new_grid(new_grid, original_bricks).compute() new_logical_boxes = list(brick.logical_box for brick in new_bricks) assert len(new_bricks) == 5 * 26, f"{len(new_bricks)}" # from (0,30) -> (100,290) for logical_box, brick in zip(new_logical_boxes, new_bricks): assert isinstance( brick, Brick ), f"Got {type(brick)}" assert (brick.logical_box == logical_box).all() # logical_box must be exactly one block assert ((brick.logical_box[1] - brick.logical_box[0]) == new_grid.block_shape).all() # Must be grid-aligned assert ((brick.logical_box - new_grid.offset) % new_grid.block_shape == 0).all() # Should match logical_box+halo, except for edges assert (brick.physical_box == box_intersection( brick.logical_box + (-halo_shape, halo_shape), bounding_box )).all() # Volume shape must match assert (brick.volume.shape == brick.physical_box[1] - brick.physical_box[0]).all() # Volume data must match assert (brick.volume == extract_subvol( volume, brick.physical_box )).all()
def test_split_brick_WITH_HALO(): halo = 1 grid = Grid( (10,20), (12,3), halo ) volume = np.random.randint(0,10, (100,300) ) # Test with the first brick in the grid physical_start = np.array(grid.offset) logical_start = physical_start // grid.block_shape * grid.block_shape logical_stop = logical_start + grid.block_shape physical_stop = logical_stop+halo # Not always true, but happens to be true in this case. logical_box = np.array([logical_start, logical_stop]) physical_box = np.array([physical_start, physical_stop]) assert (logical_box == [(10,0), (20,20)]).all() assert (physical_box == [(12,3), (21,21)]).all() original_brick = Brick( logical_box, physical_box, extract_subvol(volume, physical_box) ) # New grid scheme new_grid = Grid((2,10), (0,0)) try: _fragments = split_brick(new_grid, original_brick) except AssertionError: pass # Expected failure: Forbidden to split bricks that have a halo else: assert False, "Did not encounter the expected assertion. split_brick() should fail for bricks that have a halo."
def test_generate_bricks(): grid = Grid( (10,20), (12,3) ) bounding_box = np.array([(15,30), (95,290)]) volume = np.random.randint(0,10, (100,300) ) bricks, num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() ) bricks = bricks.compute() assert len(bricks) == 9 * 14 == num_bricks for brick in bricks: assert isinstance( brick, Brick ) assert brick.logical_box.shape == (2,2) assert brick.physical_box.shape == (2,2) # logical_box must be exactly one block assert ((brick.logical_box[1] - brick.logical_box[0]) == grid.block_shape).all() # Must be grid-aligned assert ((brick.logical_box - grid.offset) % grid.block_shape == 0).all() # Must not exceed bounding box assert (brick.physical_box == box_intersection( brick.logical_box, bounding_box )).all() # Volume shape must match assert (brick.volume.shape == brick.physical_box[1] - brick.physical_box[0]).all() # Volume data must match assert (brick.volume == extract_subvol( volume, brick.physical_box )).all() # __sizeof__ must include the volume assert sys.getsizeof(brick) > sys.getsizeof(brick.volume)
def test_generate_bricks_WITH_HALO(): halo = 1 halo_shape = np.array([1,1]) grid = Grid( (10,20), (12,3), halo ) bounding_box = np.array([(15,30), (95,290)]) volume = np.random.randint(0,10, (100,300) ) bricks, num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() ) bricks = bricks.compute() assert len(bricks) == 9 * 14 == num_bricks for brick in bricks: assert isinstance( brick, Brick ) assert brick.logical_box.shape == (2,2) assert brick.physical_box.shape == (2,2) # logical_box must be exactly one block assert ((brick.logical_box[1] - brick.logical_box[0]) == grid.block_shape).all() # Must be grid-aligned assert ((brick.logical_box - grid.offset) % grid.block_shape == 0).all() # Physical == logical+halo, except for bounding-box edges assert (brick.physical_box == box_intersection( brick.logical_box + (-halo_shape, halo_shape), bounding_box )).all() # Volume shape must match assert (brick.volume.shape == brick.physical_box[1] - brick.physical_box[0]).all() # Volume data must match assert (brick.volume == extract_subvol( volume, brick.physical_box )).all()
def get_voxels( cls, server, uuid, instance_name, scale, instance_type, is_labels, volume_shape, offset, resource_server="", resource_port=0, throttle="auto", supervoxels=False, node_service=None): if node_service is None: node_service = retrieve_node_service(server, uuid, resource_server, resource_port) if throttle == "auto": throttle = (resource_server == "") if instance_type in ('labelarray', 'labelmap'): # Labelarray data can be fetched very efficiently if the request is block-aligned # So, block-align the request no matter what. aligned_start = np.array(offset) // 64 * 64 aligned_stop = (np.array(offset) + volume_shape + 64-1) // 64 * 64 aligned_shape = aligned_stop - aligned_start aligned_volume = node_service.get_labelarray_blocks3D( instance_name, aligned_shape, aligned_start, throttle, scale, supervoxels ) requested_box_within_aligned = ( offset - aligned_start, offset - aligned_start + volume_shape ) return extract_subvol(aligned_volume, requested_box_within_aligned ) elif is_labels: assert scale == 0, "FIXME: get_labels3D() doesn't support scale yet!" # labelblk (or non-aligned labelarray) must be fetched the old-fashioned way return node_service.get_labels3D( instance_name, volume_shape, offset, throttle, compress=True, supervoxels=supervoxels ) else: assert scale == 0, "FIXME: get_gray3D() doesn't support scale yet!" return node_service.get_gray3D( instance_name, volume_shape, offset, throttle, compress=False )
def test_copysegmentation_from_hdf5_to_dvid_input_mask( setup_hdf5_segmentation_input, disable_auto_retry): template_dir, config, volume, dvid_address, repo_uuid, _output_segmentation_name = setup_hdf5_segmentation_input # make sure we get a fresh output output_segmentation_name = 'copyseg-with-input-mask' config["output"]["dvid"]["segmentation-name"] = output_segmentation_name # Select only even IDs all_labels = pd.unique(volume.reshape(-1)) even_labels = all_labels[all_labels % 2 == 0] config["copysegmentation"]["input-mask-labels"] = even_labels.tolist() # Add an offset, which is added to both the input volume AND the mask labels offset = 2000 config["copysegmentation"]["add-offset-to-ids"] = offset input_box = np.array(config["input"]["geometry"]["bounding-box"])[:, ::-1] volume = np.where((volume % 2) == 0, volume + offset, 0) expected_vol = np.zeros_like(volume) overwrite_subvol(expected_vol, input_box, extract_subvol(volume, input_box)) setup = template_dir, config, expected_vol, dvid_address, repo_uuid, output_segmentation_name _box_zyx, _expected_vol, _output_vol = _run_to_dvid(setup)
def test_copysegmentation_from_dvid_to_dvid_input_mask( setup_dvid_segmentation_input, disable_auto_retry): template_dir, config, volume, dvid_address, repo_uuid, _output_segmentation_name = setup_dvid_segmentation_input # make sure we get a fresh output output_segmentation_name = 'copyseg-with-input-mask-from-dvid' config["output"]["dvid"]["segmentation-name"] = output_segmentation_name # Add an offset, which is added to both the input volume AND the mask labels offset = 2000 config["copysegmentation"]["add-offset-to-ids"] = offset # Select some labels that don't extend throughout the whole volume selected_labels = pd.unique(volume[150, 64:128, 64:128].reshape(-1)) assert 0 not in selected_labels selected_coords = np.array( mask_for_labels(volume, selected_labels).nonzero()).transpose() selected_box = np.array( [selected_coords.min(axis=0), 1 + selected_coords.max(axis=0)]) input_box = np.array(config["input"]["geometry"]["bounding-box"])[:, ::-1] subvol_box = box_intersection(input_box, selected_box) selected_subvol = extract_subvol(volume, subvol_box).copy() selected_subvol = apply_mask_for_labels(selected_subvol, selected_labels) config["copysegmentation"]["input-mask-labels"] = selected_labels.tolist() selected_subvol = np.where(selected_subvol, selected_subvol + offset, 0) expected_vol = np.zeros(volume.shape, np.uint64) overwrite_subvol(expected_vol, subvol_box, selected_subvol) setup = template_dir, config, expected_vol, dvid_address, repo_uuid, output_segmentation_name _box_zyx, _expected_vol, _output_vol = _run_to_dvid(setup)
def _run_to_dvid(setup, check_scale_0=True): template_dir, config, volume, dvid_address, repo_uuid, output_segmentation_name = setup yaml = YAML() yaml.default_flow_style = False # re-dump config in case it's been changed by a specific test with open(f"{template_dir}/workflow.yaml", 'w') as f: yaml.dump(config, f) _execution_dir, workflow = launch_flow(template_dir, 1) final_config = workflow.config input_box_xyz = np.array(final_config['input']['geometry']['bounding-box']) input_box_zyx = input_box_xyz[:, ::-1] expected_vol = extract_subvol(volume, input_box_zyx) output_box_xyz = np.array( final_config['output']['geometry']['bounding-box']) output_box_zyx = output_box_xyz[:, ::-1] output_vol = fetch_raw(dvid_address, repo_uuid, output_segmentation_name, output_box_zyx, dtype=np.uint64) np.save('/tmp/output_vol.npy', output_vol) np.save('/tmp/expected_vol.npy', expected_vol) if check_scale_0: assert (output_vol == expected_vol).all(), \ "Written vol does not match expected" return input_box_zyx, expected_vol, output_vol
def test_dvid_volume_service_grayscale(setup_dvid_repo, disable_auto_retry): server, uuid = setup_dvid_repo instance_name = 'test-dvs-grayscale' volume = np.random.randint(100, size=(256, 192, 128), dtype=np.uint8) max_scale = 2 voxel_dimensions = [4.0, 4.0, 32.0] config_text = textwrap.dedent(f"""\ dvid: server: {server} uuid: {uuid} grayscale-name: {instance_name} create-if-necessary: true creation-settings: max-scale: {max_scale} voxel-size: {voxel_dimensions} geometry: bounding-box: [[0,0,0], {list(volume.shape[::-1])}] """) yaml = YAML() with StringIO(config_text) as f: volume_config = yaml.load(f) assert instance_name not in fetch_repo_instances(server, uuid) service = VolumeService.create_from_config(volume_config) repo_instances = fetch_repo_instances(server, uuid) info = fetch_instance_info(server, uuid, instance_name) assert info["Extended"]["VoxelSize"] == voxel_dimensions scaled_volumes = {} for scale in range(max_scale + 1): if scale == 0: assert instance_name in repo_instances assert repo_instances[instance_name] == 'uint8blk' else: assert f"{instance_name}_{scale}" in repo_instances assert repo_instances[f"{instance_name}_{scale}"] == 'uint8blk' vol = downsample(volume, 2**scale, 'label') # label downsampling is easier to test with aligned_shape = (np.ceil(np.array(vol.shape) / 64) * 64).astype(int) aligned_vol = np.zeros(aligned_shape, np.uint8) overwrite_subvol(aligned_vol, [(0, 0, 0), aligned_shape], aligned_vol) service.write_subvolume(aligned_vol, (0, 0, 0), scale) scaled_volumes[scale] = aligned_vol box = np.array([[40, 80, 40], [240, 160, 100]]) for scale in range(max_scale + 1): scaled_box = box // 2**scale vol = service.get_subvolume(scaled_box, scale) assert (vol == extract_subvol(scaled_volumes[scale], scaled_box)).all()
def test_labelindex(labelmap_setup): dvid_server, dvid_repo, _merge_table_path, _mapping_path, _supervoxel_vol = labelmap_setup # Need an unlocked node to test these posts uuid = post_branch(dvid_server, dvid_repo, 'test_labelindex', 'test_labelindex') instance_info = (dvid_server, uuid, 'segmentation-scratch') # Write some random data sv = 99 vol = sv * np.random.randint(2, size=(128, 128, 128), dtype=np.uint64) offset = np.array((64, 64, 64)) # DVID will generate the index. post_labelmap_voxels(*instance_info, offset, vol) # Compute labelindex table from scratch rows = [] for block_coord in ndrange(offset, offset + vol.shape, (64, 64, 64)): block_coord = np.array(block_coord) block_box = np.array((block_coord, block_coord + 64)) block = extract_subvol(vol, block_box - offset) count = (block == sv).sum() rows.append([*block_coord, sv, count]) index_df = pd.DataFrame(rows, columns=['z', 'y', 'x', 'sv', 'count']) # Check DVID's generated labelindex table against expected labelindex_tuple = fetch_labelindex(*instance_info, sv, format='pandas') assert labelindex_tuple.label == sv labelindex_tuple.blocks.sort_values(['z', 'y', 'x', 'sv'], inplace=True) labelindex_tuple.blocks.reset_index(drop=True, inplace=True) assert (labelindex_tuple.blocks == index_df).all().all() # Check our protobuf against DVID's index_tuple = PandasLabelIndex(index_df, sv, 1, datetime.datetime.now().isoformat(), 'someuser') labelindex = create_labelindex(index_tuple) # Since labelindex block entries are not required to be sorted, # dvid might return them in a different order. # Hence this comparison function which sorts them first. def compare_proto_blocks(left, right): left_blocks = sorted(left.blocks.items()) right_blocks = sorted(right.blocks.items()) return left_blocks == right_blocks dvid_labelindex = fetch_labelindex(*instance_info, sv, format='protobuf') assert compare_proto_blocks(labelindex, dvid_labelindex) # Check post/get roundtrip post_labelindex(*instance_info, sv, labelindex) dvid_labelindex = fetch_labelindex(*instance_info, sv, format='protobuf') assert compare_proto_blocks(labelindex, dvid_labelindex)
def test_pad_brick_data_from_volume_source_NO_PADDING_NEEDED(): source_volume = np.random.randint(0,10, (100,300) ) logical_box = [(1,0), (11,20)] physical_box = [(6,10), (11, 15)] brick = Brick( logical_box, physical_box, extract_subvol(source_volume, physical_box) ) padding_grid = Grid( (5,5), offset=(1,0) ) padded_brick = pad_brick_data_from_volume_source( padding_grid, partial(extract_subvol, source_volume), brick ) assert padded_brick is brick, "Expected to get the same brick back."
def split_brick(new_grid, original_brick): """ Given a single brick and a new grid to which its data should be redistributed, split the brick into pieces, indexed by their NEW grid locations. The brick fragments are returned as Bricks themselves, but with relatively small volume and physical_box members. Note: It is probably a mistake to call this function for Bricks which have a larger physical_box than logical_box, so that is currently forbidden. (It would work here, but it implies that you will end up with some voxels represented multiple times in a given RDD of Bricks, with undefined results as to which ones are kept after you consolidate them into a new alignment. However, the reverse is permitted, i.e. it is permitted for the DESTINATION grid to use a halo, in which case some pixels in the original brick will be duplicated to multiple destinations. Returns: [(box,Brick), (box, Brick), ....], where each Brick is a fragment (to be assembled later into the new grid's bricks), and 'box' is the logical_box of the Brick into which this fragment should be assembled. """ new_logical_boxes_and_fragments = [] # Forbid out-of-bounds physical_boxes. (See note above.) assert ((original_brick.physical_box[0] >= original_brick.logical_box[0]).all() and (original_brick.physical_box[1] <= original_brick.logical_box[1]).all()) # Iterate over the new boxes that intersect with the original brick for destination_box in boxes_from_grid(original_brick.physical_box, new_grid, include_halos=True): # Physical intersection of original with new split_box = box_intersection(destination_box, original_brick.physical_box) # Extract portion of original volume data that belongs to this new box split_box_internal = split_box - original_brick.physical_box[0] fragment_vol = extract_subvol(original_brick.volume, split_box_internal) # Subtract out halo to get logical_box new_logical_box = destination_box - (-new_grid.halo_shape, new_grid.halo_shape) fragment_brick = Brick(new_logical_box, split_box, fragment_vol) fragment_brick.compress() # Append key (the new_logical_box, but with a special type and hash, # to avoid bad collisions with the default spark hash function), # and new brick fragment, to be assembled into the final brick in a later stage. key = rt.tuple_with_hash( box_as_tuple(new_logical_box) ) key.set_hash( hash(tuple(new_logical_box[0] / new_grid.block_shape)) ) new_logical_boxes_and_fragments.append( (key, fragment_brick) ) return new_logical_boxes_and_fragments
def test_split_brick(): grid = Grid( (10,20), (12,3) ) volume = np.random.randint(0,10, (100,300) ) # Test with the first brick in the grid physical_start = np.array(grid.offset) logical_start = physical_start // grid.block_shape * grid.block_shape logical_stop = logical_start + grid.block_shape physical_stop = logical_stop # Not always true, but happens to be true in this case. logical_box = np.array([logical_start, logical_stop]) physical_box = np.array([physical_start, physical_stop]) assert (logical_box == [(10,0), (20,20)]).all() assert (physical_box == [(12,3), (20,20)]).all() original_brick = Brick( logical_box, physical_box, extract_subvol(volume, physical_box) ) # New grid scheme new_grid = Grid((2,10), (0,0)) fragments = split_brick(new_grid, original_brick) boxes = list(box_as_tuple(frag.logical_box) for frag in fragments) assert boxes == [ # ((10, 0), (14, 10)), # <--- Not present. These new boxes intersect with the original logical_box, # ((10, 10), (14, 20)), # <--- but there is no physical data for them in the original brick. ((12, 0), (14, 10)), ((12, 10), (14, 20)), ((14, 0), (16, 10)), ((14, 10), (16, 20)), ((16, 0), (18, 10)), ((16, 10), (18, 20)), ((18, 0), (20, 10)), ((18, 10), (20, 20)) ] for frag in fragments: assert (frag.volume == extract_subvol(volume, frag.physical_box)).all()
def _execute_scale(self, scale, starting_batch, mask_s5, mask_box_s5): options = self.config["masksegmentation"] block_width = self.output_service.block_width def scale_box(box, scale): # Scale down, then round up to the nearest multiple of the block width box = np.ceil(box / 2**scale).astype(np.int32) return round_box(box, block_width) # bounding box of the segmentation at the current scale. bounding_box = scale_box(self.input_service.bounding_box_zyx, scale) # Don't make bricks that are wider than the bounding box at this scale brick_shape = np.minimum(self.input_service.preferred_message_shape, bounding_box[1]) assert not (brick_shape % block_width).any() brick_boxes = boxes_from_grid(bounding_box, brick_shape, clipped=True) with Timer(f"Scale {scale}: Preparing bricks", logger): boxes_and_masks = [] for box in brick_boxes: mask_block_box = ((box // 2**(5 - scale)) - mask_box_s5[0]) mask_block_box = mask_block_box.astype( np.int32) # necessary when scale is > 5 mask_block_s5 = np.zeros(box_shape(mask_block_box), bool) mask_block_s5 = extract_subvol(mask_s5, mask_block_box) if mask_block_s5.any(): boxes_and_masks.append((box, mask_block_s5)) batches = [*iter_batches(boxes_and_masks, options["batch-size"])] if starting_batch == 0: logger.info(f"Scale {scale}: Processing {len(batches)} batches") else: logger.info( f"Scale {scale}: Processing {len(batches) - starting_batch} " f"remaining batches from {len(batches)} original batches") assert starting_batch < len(batches), \ f"Can't start at batch {starting_batch}; there are only {len(batches)} in total." batches = batches[starting_batch:] for batch_index, batch_boxes_and_masks in enumerate( batches, start=starting_batch): with Timer(f"Scale {scale}: Batch {batch_index:02d}", logger): self._execute_batch(scale, batch_index, batch_boxes_and_masks)
def downsample_brick(brick): assert (brick.logical_box % factor == 0).all() # If the factor doesn't perfectly divide into # the brick's physical dimensions, # then chop off the edges until it does. if (brick.physical_box % factor != 0).any(): clipped_box = round_box(brick.physical_box, factor, 'in') volume = extract_subvol(brick.volume, clipped_box - brick.physical_box[0]) else: clipped_box = brick.physical_box volume = brick.volume downsampled_volume = downsample(volume, factor, method) downsampled_logical_box = brick.logical_box // factor downsampled_physical_box = clipped_box // factor return Brick(downsampled_logical_box, downsampled_physical_box, downsampled_volume, compression=brick.compression)
def test_compression(): vol_box = [(0,0,0), (100,100,120)] volume = np.random.randint(10, size=vol_box[1], dtype=np.uint64) for method in COMPRESSION_METHODS: wall = BrickWall.from_accessor_func(vol_box, Grid((64,64,128)), lambda box: extract_subvol(volume, box), compression=method) # Compress them all wall.bricks.map(Brick.compress).compute() def check_pickle(brick): pickle.dumps(brick) # Compress them all wall.bricks.map(check_pickle).compute() def check_brick(brick): assert (brick.volume.shape == (brick.physical_box[1] - brick.physical_box[0])).all() assert (brick.volume == extract_subvol(volume, brick.physical_box)).all() # Check them all (implicit decompression) wall.bricks.map(check_brick).compute()
def mitos_in_neighborhood(mito_roi_source, neighborhood_origin_xyz, neighborhood_id, mito_res_scale_diff): """ Determine how many non-trivial mito objects overlap with the given "neighborhood object", and return a table of their IDs and sizes. 1. Download the neighborhood mask for the given neighborhood_id. 2. Erode the neighborhood mask by 1 px (see note in the comment above). 3. Fetch the mito segmentation for the voxels within the neighborhood. 4. Fetch (from dvid) the sizes of each mito object. 5. Filter out the mitos that are smaller than the minimum size that is actually used in our published mito analyses. 6. Just for additional info, determine how many connected components are formed by the mito objects. 7. Return the mito IDs, sizses, and CC info as a DataFrame. """ # The neighborhood segmentation source protocol, url = mito_roi_source.split('://')[-2:] server, uuid, instance = url.split('/') server = f'{protocol}://{server}' origin_zyx = np.array(neighborhood_origin_xyz[::-1]) box = [origin_zyx - RADIUS, 1 + origin_zyx + RADIUS] # Align box to the analysis scale before scaling it. box = round_box(box, (2**ANALYSIS_SCALE)) # Scale box box //= (2**ANALYSIS_SCALE) neighborhood_seg = fetch_labelmap_voxels(server, uuid, instance, box, scale=ANALYSIS_SCALE) neighborhood_mask = (neighborhood_seg == neighborhood_id) # This is equivalent to a 1-px erosion # See note above for why we do this. neighborhood_mask ^= binary_edge_mask(neighborhood_mask, 'inner') mito_seg = fetch_labelmap_voxels(*MITO_SEG, box, supervoxels=True, scale=ANALYSIS_SCALE - mito_res_scale_diff) assert neighborhood_mask.shape == mito_seg.shape mito_seg = np.where(neighborhood_mask, mito_seg, 0) # The mito segmentation includes little scraps and slivers # that were filtered out of the "real" mito set. # Filter those scraps out of our results here. mito_ids = set(pd.unique(mito_seg.ravel())) - {0} mito_sizes = fetch_sizes(*MITO_SEG, [*mito_ids], supervoxels=True) mito_sizes = mito_sizes.rename_axis('mito') mito_sizes *= (2**mito_res_scale_diff)**3 # This is our main result: mito IDs (and their sizes) mito_sizes = mito_sizes.loc[mito_sizes >= MIN_MITO_SIZE] # Just for extra info, group the mitos we found into connected components. mito_mask = mask_for_labels(mito_seg, mito_sizes.index) mito_box = compute_nonzero_box(mito_mask) mito_mask = extract_subvol(mito_mask, mito_box) mito_seg = extract_subvol(mito_seg, mito_box) mito_cc = label(mito_mask, connectivity=1) ct = contingency_table(mito_seg, mito_cc).reset_index() ct = ct.rename(columns={ 'left': 'mito', 'right': 'cc', 'voxel_count': 'cc_size' }) ct = ct.set_index('mito') mito_sizes = pd.DataFrame(mito_sizes).merge(ct, 'left', left_index=True, right_index=True) return mito_sizes
def test_dvid_volume_service_labelmap(setup_dvid_repo, random_segmentation, disable_auto_retry): server, uuid = setup_dvid_repo instance_name = 'test-dvs-labelmap' volume = random_segmentation[:256, :192, :128] max_scale = 2 voxel_dimensions = [4.0, 4.0, 32.0] config_text = textwrap.dedent(f"""\ dvid: server: {server} uuid: {uuid} segmentation-name: {instance_name} supervoxels: true create-if-necessary: true creation-settings: max-scale: {max_scale} voxel-size: {voxel_dimensions} geometry: bounding-box: [[0,0,0], {list(volume.shape[::-1])}] message-block-shape: [64,64,64] """) yaml = YAML() with StringIO(config_text) as f: volume_config = yaml.load(f) assert instance_name not in fetch_repo_instances(server, uuid) service = VolumeService.create_from_config(volume_config) repo_instances = fetch_repo_instances(server, uuid) assert instance_name in repo_instances assert repo_instances[instance_name] == 'labelmap' info = fetch_instance_info(server, uuid, instance_name) assert info["Extended"]["VoxelSize"] == voxel_dimensions scaled_volumes = {} for scale in range(max_scale + 1): vol = downsample(volume, 2**scale, 'label') aligned_shape = (np.ceil(np.array(vol.shape) / 64) * 64).astype(int) aligned_vol = np.zeros(aligned_shape, np.uint64) overwrite_subvol(aligned_vol, [(0, 0, 0), vol.shape], vol) service.write_subvolume(aligned_vol, (0, 0, 0), scale) scaled_volumes[scale] = aligned_vol box = np.array([[40, 80, 40], [240, 160, 100]]) for scale in range(max_scale + 1): scaled_box = box // 2**scale vol = service.get_subvolume(scaled_box, scale) assert (vol == extract_subvol(scaled_volumes[scale], scaled_box)).all() # # Check sparse coords function # labels = list({*pd.unique(volume.reshape(-1))} - {0}) brick_coords_df = service.sparse_brick_coords_for_labels(labels) assert brick_coords_df.columns.tolist() == ['z', 'y', 'x', 'label'] assert set(brick_coords_df['label'].values) == set(labels), \ "Some labels were missing from the sparse brick coords!" def ndi(shape): return np.indices(shape).reshape(len(shape), -1).transpose() expected_df = pd.DataFrame(ndi(volume.shape), columns=[*'zyx']) expected_df['label'] = volume.reshape(-1) expected_df['z'] //= 64 expected_df['y'] //= 64 expected_df['x'] //= 64 expected_df = expected_df.drop_duplicates() expected_df['z'] *= 64 expected_df['y'] *= 64 expected_df['x'] *= 64 expected_df = expected_df.query('label != 0') expected_df.sort_values(['z', 'y', 'x', 'label'], inplace=True) brick_coords_df.sort_values(['z', 'y', 'x', 'label'], inplace=True) expected_df.reset_index(drop=True, inplace=True) brick_coords_df.reset_index(drop=True, inplace=True) assert expected_df.shape == brick_coords_df.shape assert (brick_coords_df == expected_df).all().all() # # Check sample_labels() # points = [np.random.randint(d, size=(10, )) for d in vol.shape] points = np.transpose(points) labels = service.sample_labels(points) assert (labels == volume[(*points.transpose(), )]).all()
def test_masksegmentation_basic(setup_dvid_segmentation_input, invert_mask, roi_dilation, disable_auto_retry): template_dir, config, volume, dvid_address, repo_uuid, roi_mask_s5, input_segmentation_name, output_segmentation_name = setup_dvid_segmentation_input if invert_mask: roi_mask_s5 = ~roi_mask_s5 config["masksegmentation"]["invert-mask"] = invert_mask config["masksegmentation"]["dilate-roi"] = roi_dilation # re-dump config yaml = YAML() yaml.default_flow_style = False with open(f"{template_dir}/workflow.yaml", 'w') as f: yaml.dump(config, f) execution_dir, workflow = launch_flow(template_dir, 1) final_config = workflow.config input_box_xyz = np.array(final_config['input']['geometry']['bounding-box']) input_box_zyx = input_box_xyz[:, ::-1] roi_mask = upsample(roi_mask_s5, 2**5) roi_mask = extract_subvol(roi_mask, input_box_zyx) expected_vol = extract_subvol(volume.copy(), input_box_zyx) expected_vol[roi_mask] = 0 output_box_xyz = np.array( final_config['output']['geometry']['bounding-box']) output_box_zyx = output_box_xyz[:, ::-1] output_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, output_segmentation_name, output_box_zyx, scale=0, supervoxels=True) # Create a copy of the volume that contains only the voxels we removed erased_vol = volume.copy() erased_vol[~roi_mask] = 0 if EXPORT_DEBUG_FILES: original_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, input_segmentation_name, output_box_zyx, scale=0, supervoxels=True) original_agglo_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, input_segmentation_name, output_box_zyx, scale=0) output_agglo_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, output_segmentation_name, output_box_zyx, scale=0) np.save('/tmp/original-svs.npy', original_vol) np.save('/tmp/original-agglo.npy', original_agglo_vol) np.save('/tmp/output.npy', output_vol) np.save('/tmp/output-agglo.npy', output_agglo_vol) np.save('/tmp/expected.npy', expected_vol) np.save('/tmp/erased.npy', erased_vol) shutil.copyfile(f'{execution_dir}/roi-mask.h5', '/tmp/roi-mask.h5') if roi_dilation: shutil.copyfile(f'{execution_dir}/dilated-roi-mask.h5', '/tmp/dilated-roi-mask.h5') if invert_mask: shutil.copyfile(f'{execution_dir}/segmentation-mask.h5', '/tmp/segmentation-mask.h5') shutil.copyfile(f'{execution_dir}/final-mask.h5', '/tmp/final-mask.h5') if roi_dilation > 0: # FIXME: We don't yet verify voxel-accuracy of ROI dilation. return assert (output_vol == expected_vol).all(), \ "Written vol does not match expected" scaled_expected_vol = expected_vol for scale in range(1, 1 + MAX_SCALE): scaled_expected_vol = downsample(scaled_expected_vol, 2, 'labels-numba') scaled_output_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, output_segmentation_name, output_box_zyx // 2**scale, scale=scale, supervoxels=True) if EXPORT_DEBUG_FILES: np.save(f'/tmp/expected-{scale}.npy', scaled_expected_vol) np.save(f'/tmp/expected-{scale}.npy', scaled_expected_vol) np.save(f'/tmp/output-{scale}.npy', scaled_output_vol) if scale <= 5: assert (scaled_output_vol == scaled_expected_vol).all(), \ f"Written vol does not match expected at scale {scale}" else: # For scale 6 and 7, some blocks are not even changed, # but that means we would be comparing DVID's label # downsampling method to our method ('labels-numba'). # The two don't necessarily give identical results in the case of 'ties', # so we'll just verify that the nonzero voxels match, at least. assert ((scaled_output_vol == 0) == (scaled_expected_vol == 0)).all(), \ f"Written vol does not match expected at scale {scale}" block_stats_path = f'{execution_dir}/erased-block-statistics.h5' with h5py.File(block_stats_path, 'r') as f: stats_df = pd.DataFrame(f['stats'][:]) # # Check the exported block statistics # stats_cols = [*BLOCK_STATS_DTYPES.keys()] assert stats_df.columns.tolist() == stats_cols stats_df = stats_df.sort_values(stats_cols).reset_index() expected_stats_df = block_stats_for_volume((64, 64, 64), erased_vol, input_box_zyx) expected_stats_df = expected_stats_df.sort_values(stats_cols).reset_index() assert len(stats_df) == len(expected_stats_df) assert (stats_df == expected_stats_df).all().all() # # Try updating the labelindexes # src_info = (dvid_address, repo_uuid, input_segmentation_name) dest_info = (dvid_address, repo_uuid, output_segmentation_name) with switch_cwd(execution_dir): erase_from_labelindexes(src_info, dest_info, block_stats_path, batch_size=10, threads=4) # Verify deleted supervoxels assert os.path.exists(f'{execution_dir}/deleted-supervoxels.csv') deleted_svs = set( pd.read_csv(f'{execution_dir}/deleted-supervoxels.csv')['sv']) orig_svs = {*pd.unique(volume.reshape(-1))} - {0} remaining_svs = {*pd.unique(expected_vol.reshape(-1))} - {0} expected_deleted_svs = orig_svs - remaining_svs assert deleted_svs == expected_deleted_svs # Verify remaining sizes expected_sv_counts = (pd.Series( expected_vol.reshape(-1), name='sv').value_counts().drop(0).sort_index().rename('count')) index_dfs = [] for body in np.unique(fetch_mapping(*dest_info, remaining_svs)): index_df = fetch_labelindex(*dest_info, body, format='pandas').blocks index_dfs.append(index_df) sv_counts = (pd.concat(index_dfs, ignore_index=True)[[ 'sv', 'count' ]].groupby('sv')['count'].sum().sort_index()) assert set(sv_counts.index.values) == set(expected_sv_counts.index.values) assert (sv_counts == expected_sv_counts).all(), \ pd.DataFrame({'stored_count': sv_counts, 'expected_count': expected_sv_counts}).query('stored_count != expected_count') # Verify mapping # Deleted supervoxels exist in the mapping, but they map to 0. assert (fetch_mapping(*dest_info, [*deleted_svs]) == 0).all() # Remaining supervoxels still map to their original bodies assert (fetch_mapping(*dest_info, [*remaining_svs]) == fetch_mapping( *src_info, [*remaining_svs])).all()
def init_boxes(self, volume_service, subset_labels, roi): sbm = None if roi: base_service = volume_service.base_service assert isinstance(base_service, DvidVolumeService), \ "Can't specify an ROI unless you're using a dvid input" assert isinstance(volume_service, (ScaledVolumeService, DvidVolumeService)), \ "The 'roi' option doesn't support adapters other than 'rescale-level'" scale = 0 if isinstance(volume_service, ScaledVolumeService): scale = volume_service.scale_delta assert scale <= 5, \ "The 'roi' option doesn't support volumes downscaled beyond level 5" server, uuid, _seg_instance = base_service.instance_triple brick_shape = volume_service.preferred_message_shape assert not (brick_shape % 2**(5-scale)).any(), \ "If using an ROI, select a brick shape that is divisible by 32" seg_box = volume_service.bounding_box_zyx seg_box = round_box(seg_box, brick_shape) seg_box_s5 = seg_box // 2**(5 - scale) with Timer(f"Fetching mask for ROI '{roi}'", logger): roi_mask_s5, roi_box_s5 = fetch_roi(server, uuid, roi, format='mask') # Restrict to input bounding box clipped_roi_box_s5 = box_intersection(seg_box_s5, roi_box_s5) clipped_roi_mask_s5 = extract_subvol( roi_mask_s5, clipped_roi_box_s5 - roi_box_s5[0]) # Align to brick grid aligned_roi_box_s5 = round_box(clipped_roi_box_s5, brick_shape // 2**5, 'out') padding = (aligned_roi_box_s5 - clipped_roi_box_s5) padding[0] *= -1 aligned_roi_mask_s5 = np.pad(clipped_roi_mask_s5, padding.transpose()) # At the service native scale aligned_roi_box = (2**(5 - scale) * aligned_roi_box_s5) logger.info( f"Brick-aligned ROI '{roi}' has bounding-box {aligned_roi_box[:, ::-1].tolist()}" ) # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0. sbm = SparseBlockMask.create_from_highres_mask( aligned_roi_mask_s5, 2**(5 - scale), aligned_roi_box, brick_shape) elif subset_labels: try: sbm = volume_service.sparse_block_mask_for_labels( [*subset_labels]) if ((sbm.box[1] - sbm.box[0]) == 0).any(): raise RuntimeError( "Could not find sparse masks for any of the subset-labels" ) except NotImplementedError: sbm = None if sbm is None: boxes = boxes_from_grid(volume_service.bounding_box_zyx, volume_service.preferred_message_shape, clipped=True) return np.array([*boxes]) else: boxes = sbm.sparse_boxes(brick_shape) boxes = np.array(boxes) # Clip boxes[:, 0, :] = np.maximum(volume_service.bounding_box_zyx[0], boxes[:, 0, :]) boxes[:, 1, :] = np.minimum(volume_service.bounding_box_zyx[1], boxes[:, 1, :]) assert (boxes[:,0,:] < boxes[:,1,:]).all(), \ "After cropping to input volume, some bricks disappeared." return boxes
def _init_masks(self): options = self.config["copysegmentation"] self.sbm = None if options["sparse-block-mask"]: # In theory, we could just take the intersection of the masks involved. # But I'm too lazy to think about that right now. assert not options["input-mask-labels"] and not options["output-mask-labels"], \ "Not Implemented: Can't use sparse-block-mask in conjunction with input-mask-labels or output-mask-labels" with open(options["sparse-block-mask"], 'rb') as f: self.sbm = pickle.load(f) is_supervoxels = False if isinstance(self.input_service.base_service, DvidVolumeService): is_supervoxels = self.input_service.base_service.supervoxels output_mask_labels = load_body_list(options["output-mask-labels"], is_supervoxels) self.output_mask_labels = set(output_mask_labels) output_sbm = None if len(output_mask_labels) > 0: if (self.output_service.preferred_message_shape != self.input_service.preferred_message_shape).any(): logger.warn( "Not using output mask to reduce data fetching: Your input service and output service don't have the same brick shape" ) elif (self.output_service.bounding_box_zyx != self.input_service.bounding_box_zyx).any(): logger.warn( "Not using output mask to reduce data fetching: Your input service and output service don't have the same bounding box" ) else: try: output_sbm = self.output_service.sparse_block_mask_for_labels( output_mask_labels) except NotImplementedError: output_sbm = None input_mask_labels = load_body_list(options["input-mask-labels"], is_supervoxels) input_sbm = None if len(input_mask_labels) > 0: try: input_sbm = self.input_service.sparse_block_mask_for_labels( input_mask_labels) except NotImplementedError: input_sbm = None if self.sbm is not None: pass elif input_sbm is None: self.sbm = output_sbm elif output_sbm is None: self.sbm = input_sbm else: assert (input_sbm.resolution == output_sbm.resolution).all(), \ "FIXME: At the moment, you can't supply both an input mask and an output "\ "mask unless the input and output sources use the same brick shape (message-block-shape)" final_box = box_intersection(input_sbm.box, output_sbm.box) input_box = (input_sbm.box - final_box) // input_sbm.resolution input_mask = extract_subvol(input_sbm.lowres_mask, input_box) output_box = (output_sbm - final_box) // output_sbm.resolution output_mask = extract_subvol(output_sbm.lowres_mask, output_box) assert input_mask.shape == output_mask.shape assert input_mask.dtype == output_mask.dtype == np.bool final_mask = (input_mask & output_mask) self.sbm = SparseBlockMask(final_mask, final_box, input_sbm.resolution) id_offset = options["add-offset-to-ids"] if id_offset != 0: id_offset = options["add-offset-to-ids"] input_mask_labels = np.asarray(input_mask_labels, np.uint64) input_mask_labels += id_offset self.input_mask_labels = set(input_mask_labels)
def overwrite_box(box, lowres_mask): assert lowres_mask.dtype == np.bool assert not (box[0] % block_width).any() assert lowres_mask.any(), \ "This function is supposed to be called on bricks that actually need masking" # Crop box and mask to only include the extent of the masked voxels nonzero_mask_box = compute_nonzero_box(lowres_mask) nonzero_mask_box = round_box(nonzero_mask_box, (block_width * 2**scale) // 2**5) lowres_mask = extract_subvol(lowres_mask, nonzero_mask_box) box = box[0] + (nonzero_mask_box * 2**(5 - scale)) box = box.astype(np.int32) if scale <= 5: mask = upsample(lowres_mask, 2**(5 - scale)) else: # Downsample, but favor UNmasked voxels mask = ~view_as_blocks(~lowres_mask, 3 * (2**(scale - 5), )).any(axis=(3, 4, 5)) old_seg = input_service.get_subvolume(box, scale) assert mask.dtype == np.bool new_seg = old_seg.copy() new_seg[mask] = 0 if (new_seg == old_seg).all(): # It's possible that there are no changed voxels, but only # at high scales where the masked voxels were downsampled away. # # So if the original downscale pyramids are perfect, # then the following assumption ought to hold. # # But I'm commenting it out in case the DVID pyramid at scale 5 # isn't pixel-perfect in some places. # # assert scale > 5 return None def post_changed_blocks(old_seg, new_seg): # If we post the whole volume, we'll be overwriting blocks that haven't changed, # wasting space in DVID (for duplicate blocks stored in the child uuid). # Instead, we need to only post the blocks that have changed. # So, can't just do this: # output_service.write_subvolume(new_seg, box[0], scale) seg_diff = (old_seg != new_seg) block_diff = view_as_blocks(seg_diff, 3 * (block_width, )) changed_block_map = block_diff.any(axis=(3, 4, 5)).nonzero() changed_block_corners = box[0] + np.transpose( changed_block_map) * block_width changed_blocks = view_as_blocks( new_seg, 3 * (block_width, ))[changed_block_map] encoded_blocks = encode_labelarray_blocks( changed_block_corners, changed_blocks) mgr = output_service.resource_manager_client with mgr.access_context(output_service.server, True, 1, changed_blocks.nbytes): post_labelmap_blocks(*output_service.instance_triple, None, encoded_blocks, scale, downres=False, noindexing=True, throttle=False, is_raw=True) assert not (box % block_width).any(), \ "Should not write partial blocks" post_changed_blocks(old_seg, new_seg) del new_seg if scale != 0: # Don't collect statistics for higher scales return None erased_seg = old_seg.copy() erased_seg[~mask] = 0 block_shape = 3 * (input_service.block_width, ) erased_stats_df = block_stats_for_volume(block_shape, erased_seg, box) return erased_stats_df
def execute(self): scale = self._init_service() options = self.config["roistats"] server = self.input_service.base_service.server uuid = self.input_service.base_service.uuid rois = options["rois"] bodies = load_body_list(options["subset-bodies"], self.input_service.base_service.supervoxels) assert len( bodies) > 0, "Please provide a list of subset-bodies to process" bounding_box = self.input_service.bounding_box_zyx assert not (bounding_box % 2**(5-scale)).any(), \ "Make sure your configured bounding box is divisible by 32px at scale 0" brick_shape = self.input_service.preferred_message_shape assert not (brick_shape % 2**(5-scale)).any(), \ "Make sure your preferred message shape divides into 32px blocks at scale 0" with Timer("Fetching ROI volume", logger): roi_vol_s5, roi_box_s5, overlaps = fetch_combined_roi_volume( server, uuid, rois, False, bounding_box // 2**(5 - scale)) if len(overlaps) > 0: logger.warn( f"Some of your ROIs overlap! Here's an incomplete list:\n{overlaps}" ) with Timer("Determining brick set", logger): brick_coords_df = self.input_service.sparse_brick_coords_for_labels( bodies) np.save('brick-coords.npy', brick_coords_df.to_records(index=False)) with Timer(f"Preparing bricks", logger): boxes_and_roi_bricks = [] for coord, labels in brick_coords_df.groupby( [*'zyx'])['label'].agg(tuple).iteritems(): box = np.array((coord, coord)) box[1] += brick_shape box = box_intersection(box, bounding_box) roi_brick_box = ((box // 2**(5 - scale)) - roi_box_s5[0]) roi_brick_s5 = extract_subvol(roi_vol_s5, roi_brick_box) boxes_and_roi_bricks.append((box, roi_brick_s5, labels)) logger.info( f"Prepared {len(boxes_and_roi_bricks)} bricks of shape {(*brick_shape[::-1],)}" ) all_stats = [] batches = [*iter_batches(boxes_and_roi_bricks, options["batch-size"])] logger.info(f"Processing {len(batches)} batches") for i, batch_boxes_and_bricks in enumerate(batches): with Timer(f"Batch {i:02d}", logger): batch_stats = self._execute_batch(scale, batch_boxes_and_bricks) all_stats.append(batch_stats) all_stats = pd.concat(all_stats, ignore_index=True) all_stats = all_stats.groupby(['body', 'roi_id'], as_index=False)['voxels'].sum() roi_names = pd.Series(["<none>", *rois], name='roi') roi_names.index.name = 'roi_id' all_stats = all_stats.merge(roi_names, 'left', on='roi_id') all_stats = all_stats.sort_values(['body', 'roi_id']) if scale > 0: all_stats.rename(columns={'voxels': f'voxels_s{scale}'}, inplace=True) with Timer(f"Writing stats ({len(all_stats)} rows)", logger): np.save('roi-stats.npy', all_stats.to_records(index=False)) all_stats.to_csv('roi-stats.csv', index=False, header=True)
def split_brick(new_grid, original_brick): """ Given a single brick and a new grid to which its data should be redistributed, split the brick into pieces, indexed by their NEW grid locations. The brick fragments are returned as Bricks themselves, but with relatively small volume and physical_box members. Note: It is probably a mistake to call this function for Bricks which have a larger physical_box than logical_box, so that is currently forbidden. (It would work here, but it implies that you will end up with some voxels represented multiple times in a given RDD of Bricks, with undefined results as to which ones are kept after you consolidate them into a new alignment. However, the reverse is permitted, i.e. it is permitted for the DESTINATION grid to use a halo, in which case some pixels in the original brick will be duplicated to multiple destinations. Returns: [Brick, Brick, ....], where each Brick is a fragment (to be assembled later into the new grid's bricks), """ fragments = [] # Forbid out-of-bounds physical_boxes. (See note above.) assert ((original_brick.physical_box[0] >= original_brick.logical_box[0]).all() and (original_brick.physical_box[1] <= original_brick.logical_box[1]).all()), \ f"{original_brick.physical_box[:,::-1].tolist()} extends outside of {original_brick.logical_box[:,::-1].tolist()}" ## FIXME: ## If the brick lies completely within a single grid square for the destination block, ## Then boxes_from_grid() will only return a single box and the brick's volume will remain unchanged. ## In that case, it's probably best not to uncompress/recompress the brick. ## Just create a new brick with the same compressed data and a different logical_box. # Iterate over the new boxes that intersect with the original brick for destination_box in boxes_from_grid(original_brick.physical_box, new_grid, include_halos=True): # Physical intersection of original with new split_box = box_intersection(destination_box, original_brick.physical_box) # Extract portion of original volume data that belongs to this new box split_box_internal = split_box - original_brick.physical_box[0] fragment_vol = extract_subvol(original_brick.volume, split_box_internal) # Subtract out halo to get logical_box new_logical_box = destination_box - (-new_grid.halo_shape, new_grid.halo_shape) new_location_id = tuple(new_logical_box[0] // new_grid.block_shape) fragment_brick = Brick(new_logical_box, split_box, fragment_vol, location_id=new_location_id, compression=original_brick.compression) fragment_brick.compress() fragments.append(fragment_brick) original_brick.compress() return fragments
def check_brick(brick): assert (brick.volume.shape == (brick.physical_box[1] - brick.physical_box[0])).all() assert (brick.volume == extract_subvol(volume, brick.physical_box)).all()
def test_extract_halos(): halo = 1 grid = Grid( (10,20), (0,0), halo ) bounding_box = np.array([(15,30), (95,290)]) volume = np.random.randint(0,10, (100,300) ) bricks, _num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() ) outer_halos = extract_halos(bricks, grid, 'outer').compute() inner_halos = extract_halos(bricks, grid, 'inner').compute() for halo_type, halo_bricks in zip(('outer', 'inner'), (outer_halos, inner_halos)): for hb in halo_bricks: # Even bricks on the edge of the volume # (which have smaller physical boxes than logical boxes) # return halos which correspond to the original # logical box (except for the halo axis). # (Each halo's "logical box" still corresponds to # the brick it was extracted from.) if halo_type == 'outer': assert (hb.physical_box[0] != hb.logical_box[0]).sum() == 1 assert (hb.physical_box[1] != hb.logical_box[1]).sum() == 1 else: assert (hb.physical_box != hb.logical_box).sum() == 1 # The bounding box above is not grid aligned, # so blocks on the volume edge will only have partial data # (i.e. a smaller physical_box than logical_box) # However, halos are always produced to correspond to the logical_box size, # and zero-padded if necessary to achieve that size. # Therefore, only compare the actually valid portion of the halo here with the expected volume. # The other voxels should be zeros. valid_box = box_intersection(bounding_box, hb.physical_box) halo_vol = extract_subvol(hb.volume, valid_box - hb.physical_box[0]) expected_vol = extract_subvol(volume, valid_box) assert (halo_vol == expected_vol).all() # Other voxels should be zero full_halo_vol = hb.volume.copy() overwrite_subvol(full_halo_vol, valid_box - hb.physical_box[0], 0) assert (full_halo_vol == 0).all() rows = [] for hb in chain(outer_halos): rows.append([*hb.physical_box.flat, hb, 'outer']) for hb in chain(inner_halos): rows.append([*hb.physical_box.flat, hb, 'inner']) halo_df = pd.DataFrame(rows, columns=['y0', 'x0', 'y1', 'x1', 'brick', 'halo_type']) halo_counts = halo_df.groupby(['y0', 'x0', 'y1', 'x1']).size() # Since the bricks' physical boxes are all clipped to the overall bounding-box, # every outer halo should have a matching inner halo from a neighboring brick. # (This would not necessarily be true for Bricks that are initialized from a sparse mask.) assert halo_counts.min() == 2 assert halo_counts.max() == 2 for _box, halos_df in halo_df.groupby(['y0', 'x0', 'y1', 'x1']): assert set(halos_df['halo_type']) == set(['outer', 'inner']) brick0 = halos_df.iloc[0]['brick'] brick1 = halos_df.iloc[1]['brick'] assert (brick0.volume == brick1.volume).all()