def test_dvid_volume_service_branch(setup_dvid_repo, disable_auto_retry): server, uuid = setup_dvid_repo instance_name = 'test-dvs-branch' volume = np.random.randint(100, size=(256, 192, 128), dtype=np.uint8) max_scale = 2 voxel_dimensions = [4.0, 4.0, 32.0] config_text = textwrap.dedent(f"""\ dvid: server: {server} uuid: master grayscale-name: {instance_name} create-if-necessary: true creation-settings: max-scale: {max_scale} voxel-size: {voxel_dimensions} geometry: bounding-box: [[0,0,0], {list(volume.shape[::-1])}] """) yaml = YAML() with StringIO(config_text) as f: volume_config = yaml.load(f) assert instance_name not in fetch_repo_instances(server, uuid) service = VolumeService.create_from_config(volume_config) assert service.uuid == uuid
def test_no_adapter(setup_hdf5_service): _raw_volume, volume_config, _full_from_h5, _h5_reader = setup_hdf5_service validate(volume_config, GrayscaleVolumeSchema, inject_defaults=True) assert volume_config["adapters"]["rescale-level"] is None reader = VolumeService.create_from_config(volume_config) assert isinstance(reader, Hdf5VolumeService), \ "Should not create a ScaledVolumeService adapter at all if rescale-level is null"
def parse_workflow_config(template_dir): """ Load workflow.yaml to determine the input volume info and subset-bodies CSV path. """ # Late imports so --help works quickly from flyemflows.volumes import VolumeService from flyemflows.workflow import Workflow, CreateMeshes workflow_cls, workflow_config = Workflow.load_workflow_config(template_dir) assert workflow_cls == CreateMeshes assert 'dvid' in workflow_config["input"], \ "This daemon only works with DVID labelmap sources" dvid_service = VolumeService.create_from_config( workflow_config["input"]).original_volume_service server, _uuid, instance = dvid_service.instance_triple # If the config mentions a branch instead of a # specific uuid, keep that, not the pre-resolved uuid uuid = workflow_config["input"]["dvid"]["uuid"] body_csv = workflow_config["createmeshes"]["subset-bodies"] assert body_csv == "bodies-to-update.csv", \ "Your config must have a 'subset-bodies' setting, and it must point to "\ "bodies-to-update.csv (which will be overwritten by this daemon)" seg_instance = (server, uuid, instance) return seg_instance, body_csv
def test_dvid_volume_service_grayscale(setup_dvid_repo, disable_auto_retry): server, uuid = setup_dvid_repo instance_name = 'test-dvs-grayscale' volume = np.random.randint(100, size=(256, 192, 128), dtype=np.uint8) max_scale = 2 voxel_dimensions = [4.0, 4.0, 32.0] config_text = textwrap.dedent(f"""\ dvid: server: {server} uuid: {uuid} grayscale-name: {instance_name} create-if-necessary: true creation-settings: max-scale: {max_scale} voxel-size: {voxel_dimensions} geometry: bounding-box: [[0,0,0], {list(volume.shape[::-1])}] """) yaml = YAML() with StringIO(config_text) as f: volume_config = yaml.load(f) assert instance_name not in fetch_repo_instances(server, uuid) service = VolumeService.create_from_config(volume_config) repo_instances = fetch_repo_instances(server, uuid) info = fetch_instance_info(server, uuid, instance_name) assert info["Extended"]["VoxelSize"] == voxel_dimensions scaled_volumes = {} for scale in range(max_scale + 1): if scale == 0: assert instance_name in repo_instances assert repo_instances[instance_name] == 'uint8blk' else: assert f"{instance_name}_{scale}" in repo_instances assert repo_instances[f"{instance_name}_{scale}"] == 'uint8blk' vol = downsample(volume, 2**scale, 'label') # label downsampling is easier to test with aligned_shape = (np.ceil(np.array(vol.shape) / 64) * 64).astype(int) aligned_vol = np.zeros(aligned_shape, np.uint8) overwrite_subvol(aligned_vol, [(0, 0, 0), aligned_shape], aligned_vol) service.write_subvolume(aligned_vol, (0, 0, 0), scale) scaled_volumes[scale] = aligned_vol box = np.array([[40, 80, 40], [240, 160, 100]]) for scale in range(max_scale + 1): scaled_box = box // 2**scale vol = service.get_subvolume(scaled_box, scale) assert (vol == extract_subvol(scaled_volumes[scale], scaled_box)).all()
def test_available_Scales(setup_hdf5_service): _raw_volume, volume_config, _full_from_h5, _h5_reader = setup_hdf5_service validate(volume_config, GrayscaleVolumeSchema, inject_defaults=True) volume_config["adapters"]["rescale-level"] = { "level": 1, "available-scales": [0, 1, 2] } reader = VolumeService.create_from_config(volume_config) assert reader.available_scales == [0, 1, 2]
def test_sample_labels(setup_hdf5_service): _raw_volume, volume_config, full_from_h5, h5_reader = setup_hdf5_service validate(volume_config, GrayscaleVolumeSchema, inject_defaults=True) downsampled = downsample(full_from_h5, 2, 'block-mean') points = [np.random.randint(d, size=(10, )) for d in downsampled.shape] points = np.transpose(points) # Scale 1 volume_config["adapters"]["rescale-level"] = 1 scaled_reader = VolumeService.create_from_config(volume_config) labels = scaled_reader.sample_labels(points) assert (labels == downsampled[(*points.transpose(), )]).all()
def test_full_volume_downsample_1(setup_hdf5_service): _raw_volume, volume_config, full_from_h5, h5_reader = setup_hdf5_service validate(volume_config, GrayscaleVolumeSchema, inject_defaults=True) # Scale 1 volume_config["adapters"]["rescale-level"] = 1 scaled_reader = VolumeService.create_from_config(volume_config) assert (scaled_reader.bounding_box_zyx == h5_reader.bounding_box_zyx // 2).all() assert (scaled_reader.preferred_message_shape == h5_reader.preferred_message_shape // 2).all() assert scaled_reader.block_width == h5_reader.block_width // 2 assert scaled_reader.dtype == h5_reader.dtype full_scaled = scaled_reader.get_subvolume(scaled_reader.bounding_box_zyx) assert (full_scaled == downsample(full_from_h5, 2, 'block-mean')).all() assert full_scaled.flags.c_contiguous
"zarr": { "path": "/Users/bergs/data/hemibrain-v1.2.zarr", "dataset": "s2", "store-type": "NestedDirectoryStore", "out-of-bounds-access": "permit-empty" }, "adapters": { "rescale-level": -2 } } mito_cfg = { "zarr": { "path": "/Users/bergs/data/hemibrain-v1.2-filtered-mito-cc.zarr", "dataset": "s3", "store-type": "NestedDirectoryStore", "out-of-bounds-access": "permit-empty" }, "adapters": { "rescale-level": -3 } } seg_svc = VolumeService.create_from_config(seg_cfg) mito_svc = VolumeService.create_from_config(mito_cfg) valid_mitos = fetch_supervoxels('emdata4:8900', '3159', 'mito-objects', body) processed_tbars = measure_tbar_mito_distances(seg_svc, mito_svc, body, tbars=tbars, valid_mitos=valid_mitos) cols = ['bodyId', 'type', *'xyz', 'mito-distance', 'crossed-gap', 'mito-id', 'mito-x', 'mito-y', 'mito-z', 'search-radius', 'download-scale', 'analysis-scale', 'focal-x'] print(processed_tbars[cols]) processed_tbars.to_csv('/tmp/tbar-test-results.csv')
def correct_centroids(config, stats_df, check_scale=0, verify=False, threads=0, processes=8): import numpy as np import pandas as pd from neuclease.util import compute_parallel, Timer from flyemflows.volumes import VolumeService with Timer("Pre-sorting points by block", logger): stats_df['bz'] = stats_df['by'] = stats_df['bx'] = np.int32(0) stats_df[['bz', 'by', 'bx']] = stats_df[[*'zyx']] // 64 stats_df.sort_values(['bz', 'by', 'bx'], inplace=True) stats_df.drop(columns=['bz', 'by', 'bx'], inplace=True) if config['mito-sparsevol-source'] is not None: sparsevol_source = VolumeService.create_from_config( config['mito-sparsevol-source']) point_source = sparsevol_source else: sparsevol_source = None point_source = None if config['mito-point-source']: point_source = VolumeService.create_from_config( config['mito-point-source']) assert point_source or sparsevol_source, \ "You must provide either a point-source or sparsevol-source." stats_df['centroid_label'] = sample_labels(point_source, stats_df, check_scale, threads, processes) mismatched_mitos = stats_df.query('centroid_label != mito_id') logger.info( f"Correcting {len(mismatched_mitos)} mismatched mito centroids") if sparsevol_source: _find_mito = partial(find_mito_from_sparsevol, *sparsevol_source.instance_triple) mitos_and_coords = compute_parallel(_find_mito, mismatched_mitos.index, ordered=False, threads=threads, processes=processes) else: _find_mito = partial(find_mito_from_seg, point_source, check_scale) mismatched_rows = mismatched_mitos.reset_index()[['mito_id', *'zyx']].astype( np.int64).values mitos_and_coords = compute_parallel(_find_mito, mismatched_rows, starmap=True, ordered=False, threads=threads, processes=processes) corrected_df = pd.DataFrame(mitos_and_coords, columns=['mito_id', *'zyx']).set_index('mito_id') stats_df.loc[corrected_df.index, [*'zyx']] = corrected_df[[*'zyx']] stats_df['centroid_type'] = 'exact' stats_df.loc[corrected_df.index, 'centroid_type'] = 'adjusted' # Sanity check: they should all be correct now! if verify: new_labels = sample_labels(point_source, stats_df.loc[mismatched_mitos.index], check_scale, threads, processes) if (new_labels != mismatched_mitos.index).any(): logger.error("Some mitos remained mismstached!") return stats_df
def correct_centroids(config, stats_df, check_scale=0, verify=False, threads=0, processes=8): import numpy as np import pandas as pd from neuclease.util import tqdm_proxy, compute_parallel, Timer from neuclease.dvid import fetch_labels_batched from flyemflows.volumes import VolumeService, DvidVolumeService with Timer("Pre-sorting points by block", logger): stats_df['bz'] = stats_df['by'] = stats_df['bx'] = np.int32(0) stats_df[['bz', 'by', 'bx']] = stats_df[[*'zyx']] // 64 stats_df.sort_values(['bz', 'by', 'bx'], inplace=True) stats_df.drop(columns=['bz', 'by', 'bx'], inplace=True) sparsevol_source = VolumeService.create_from_config(config['mito-sparsevol-source']) if config['mito-point-source'] is None: point_source = sparsevol_source else: point_source = VolumeService.create_from_config(config['mito-point-source']) if isinstance(point_source, DvidVolumeService): stats_df['centroid_label'] = fetch_labels_batched(*point_source.instance_triple, stats_df[[*'zyx']] // (2**check_scale), supervoxels=point_source.supervoxels, scale=check_scale, batch_size=1000, threads=threads, processes=processes) else: import multiprocessing as mp import dask from dask.diagnostics import ProgressBar if threads: pool = mp.pool.ThreadPool(threads) else: pool = mp.pool.Pool(processes) dask.config.set(scheduler='processes') with pool, dask.config.set(pool=pool), ProgressBar(): centroids = stats_df[[*'zyx']] // (2**check_scale) stats_df['centroid_label'] = point_source.sample_labels( centroids, scale=check_scale ) mismatched_mitos = stats_df.query('centroid_label != mito_id').index logger.info(f"Correcting {len(mismatched_mitos)} mismatched mito centroids") _find_mito = partial(find_mito, *sparsevol_source.instance_triple) mitos_and_coords = compute_parallel(_find_mito, mismatched_mitos, ordered=False, threads=threads, processes=processes) corrected_df = pd.DataFrame(mitos_and_coords, columns=['mito_id', *'zyx']).set_index('mito_id') stats_df.loc[corrected_df.index, [*'zyx']] = corrected_df[[*'zyx']] stats_df.loc[corrected_df.index, 'centroid_type'] = 'adjusted' # Sanity check: they should all be correct now! if verify: new_centroids = stats_df.loc[mismatched_mitos, [*'zyx']].values new_labels = fetch_labels_batched(*sparsevol_source.instance_triple, new_centroids, supervoxels=True, threads=threads, processes=processes) if (new_labels != mismatched_mitos).any(): logger.error("Some mitos remained mismstached!") return stats_df
def test_dvid_volume_service_labelmap(setup_dvid_repo, random_segmentation, disable_auto_retry): server, uuid = setup_dvid_repo instance_name = 'test-dvs-labelmap' volume = random_segmentation[:256, :192, :128] max_scale = 2 voxel_dimensions = [4.0, 4.0, 32.0] config_text = textwrap.dedent(f"""\ dvid: server: {server} uuid: {uuid} segmentation-name: {instance_name} supervoxels: true create-if-necessary: true creation-settings: max-scale: {max_scale} voxel-size: {voxel_dimensions} geometry: bounding-box: [[0,0,0], {list(volume.shape[::-1])}] message-block-shape: [64,64,64] """) yaml = YAML() with StringIO(config_text) as f: volume_config = yaml.load(f) assert instance_name not in fetch_repo_instances(server, uuid) service = VolumeService.create_from_config(volume_config) repo_instances = fetch_repo_instances(server, uuid) assert instance_name in repo_instances assert repo_instances[instance_name] == 'labelmap' info = fetch_instance_info(server, uuid, instance_name) assert info["Extended"]["VoxelSize"] == voxel_dimensions scaled_volumes = {} for scale in range(max_scale + 1): vol = downsample(volume, 2**scale, 'label') aligned_shape = (np.ceil(np.array(vol.shape) / 64) * 64).astype(int) aligned_vol = np.zeros(aligned_shape, np.uint64) overwrite_subvol(aligned_vol, [(0, 0, 0), vol.shape], vol) service.write_subvolume(aligned_vol, (0, 0, 0), scale) scaled_volumes[scale] = aligned_vol box = np.array([[40, 80, 40], [240, 160, 100]]) for scale in range(max_scale + 1): scaled_box = box // 2**scale vol = service.get_subvolume(scaled_box, scale) assert (vol == extract_subvol(scaled_volumes[scale], scaled_box)).all() # # Check sparse coords function # labels = list({*pd.unique(volume.reshape(-1))} - {0}) brick_coords_df = service.sparse_brick_coords_for_labels(labels) assert brick_coords_df.columns.tolist() == ['z', 'y', 'x', 'label'] assert set(brick_coords_df['label'].values) == set(labels), \ "Some labels were missing from the sparse brick coords!" def ndi(shape): return np.indices(shape).reshape(len(shape), -1).transpose() expected_df = pd.DataFrame(ndi(volume.shape), columns=[*'zyx']) expected_df['label'] = volume.reshape(-1) expected_df['z'] //= 64 expected_df['y'] //= 64 expected_df['x'] //= 64 expected_df = expected_df.drop_duplicates() expected_df['z'] *= 64 expected_df['y'] *= 64 expected_df['x'] *= 64 expected_df = expected_df.query('label != 0') expected_df.sort_values(['z', 'y', 'x', 'label'], inplace=True) brick_coords_df.sort_values(['z', 'y', 'x', 'label'], inplace=True) expected_df.reset_index(drop=True, inplace=True) brick_coords_df.reset_index(drop=True, inplace=True) assert expected_df.shape == brick_coords_df.shape assert (brick_coords_df == expected_df).all().all() # # Check sample_labels() # points = [np.random.randint(d, size=(10, )) for d in vol.shape] points = np.transpose(points) labels = service.sample_labels(points) assert (labels == volume[(*points.transpose(), )]).all()
def convert_grayscale(config_path, client=None): """ Simple example showing how to: - create an input service (agnostic to data format) - read it into a distributed array (BrickWall) - realign it to an output array - write the realigned data (agnostic to format) The input will be accessed according to it's preferred access pattern, and the output will be written according to it's preferreed access pattern (e.g. entire slices if reading from a PNG stack, or blocks if using N5.) Caveats: - This does not implement a Workflow subclass (though there isn't much more to it). - For simplicity, this code assumes that the entire volume can be loaded into your cluster's RAM. For large volumes, that won't work. A more robust solution would split the input volume into large "slabs" and process them each in turn. Example: # Set up some input data from flyemflows.util.n5 import export_to_multiscale_n5 volume = np.random.randint(255, size=(500,500,500), dtype=np.uint8) export_to_multiscale_n5(volume, '/tmp/test-vol.n5') # Write the config file: cat < /tmp/test-config.yaml input: n5: path: /tmp/test-vol.n5 dataset: 's0' output: slice-files: slice-path-format: '/tmp/test-slices/z{:04}.png' # Run this script: python convert_grayscale.py /tmp/test-config.yaml """ # Define the config file schema schema = { "properties": { "input": GrayscaleVolumeSchema, "output": GrayscaleVolumeSchema } } # Load config (injects defaults for missing values) config = load_config(config_path, schema) # Create input service and input 'bricks' input_svc = VolumeService.create_from_config(config["input"]) input_wall = BrickWall.from_volume_service(input_svc, client=client) # Copy bounding box from input to output config["output"]["geometry"]["bounding-box"] = config["input"]["geometry"]["bounding-box"] # Create output service and redistribute # data using the output's preferred grid output_svc = VolumeService.create_from_config(config["output"]) output_grid = Grid(output_svc.preferred_message_shape) output_wall = input_wall.realign_to_new_grid(output_grid) # Write the data: one task per output brick # (e.g. output slices, if exporting to PNGs) def write_brick(brick): output_svc.write_subvolume(brick.volume, brick.physical_box[0]) output_wall.bricks.map(write_brick).compute() print(f"DONE exporting")
def test_copygrayscale_from_hdf5_to_n5(disable_auto_retry): template_dir = tempfile.mkdtemp(suffix="copygrayscale-hdf5-to-n5-template") SHAPE = (250, 240, 230) # Create volume, write to HDF5 volume = np.random.randint(10, size=SHAPE, dtype=np.uint8) volume_path = f"{template_dir}/volume.h5" with h5py.File(volume_path, 'w') as f: f['volume'] = volume config_text = textwrap.dedent(f"""\ workflow-name: copygrayscale cluster-type: {CLUSTER_TYPE} input: hdf5: path: {volume_path} dataset: volume geometry: message-block-shape: [64,64,256] bounding-box: [[0,0,0], {[*SHAPE[::-1]]}] adapters: # Enable multi-scale, since otherwise # Hdf5VolumeService doesn't support it out-of-the box rescale-level: 0 output: n5: path: output.n5 dataset: s0 create-if-necessary: true creation-settings: dtype: uint8 # max-scale: 2 # Should be set automatically from max-pyramid-scale geometry: message-block-shape: [256,128,128] available-scales: [0,1,2] copygrayscale: max-pyramid-scale: 2 slab-depth: 128 fill-blocks: false # N5 is block-based, but does not require (or allow) us to pad the boundary blocks. """) with open(f"{template_dir}/workflow.yaml", 'w') as f: f.write(config_text) # Run _execution_dir, workflow = launch_flow(template_dir, 1) final_config = workflow.config input_service = VolumeService.create_from_config(final_config['input']) output_service = VolumeService.create_from_config(final_config['output']) # Check results -- must use half-brick-aligned checks to ensure that the downsampling is the same. # And don't check scale 2 -- there is too much difference between downsampled by 4 vs downsampled-by-2-twice for scale in range(2): scaled_box = output_service.bounding_box_zyx // 2**scale brick_shape = output_service.preferred_message_shape // 2 for brick_box in boxes_from_grid(scaled_box, brick_shape, clipped=True): expected_vol = input_service.get_subvolume(brick_box, scale) output_vol = output_service.get_subvolume(brick_box, scale) try: assert np.allclose(output_vol, expected_vol, 2, 5), \ f"Written vol does not match expected at scale {scale}" except: raise