def downsample_mags( path: Path, layer_name: Optional[str] = None, from_mag: Optional[Mag] = None, max_mag: Optional[Mag] = None, interpolation_mode: str = "default", buffer_shape: Optional[Vec3Int] = None, compress: bool = True, args: Optional[Namespace] = None, sampling_mode: Union[str, SamplingModes] = SamplingModes.ANISOTROPIC, force_sampling_scheme: bool = False, ) -> None: """ Argument `path` expects the directory containing the dataset. Argument `layer_name` expects the name of the layer (color or segmentation). Argument `from_mag` expects the resolution to base downsampling on. For the other parameters see the CLI help or `Layer.downsample` and `Layer.downsampling_mag`. """ assert layer_name and from_mag or not layer_name and not from_mag, ( "You provided only one of the following " "parameters: layer_name, from_mag but both " "need to be set or none. If you don't provide " "the parameters you need to provide the path " "argument with the mag and layer to downsample" " (e.g dataset/color/1).") if not layer_name or not from_mag: layer_name = path.parent.name from_mag = Mag(path.name) path = path.parent.parent assert layer_name is not None # for mypy assert from_mag is not None # for mypy Dataset.open(path).get_layer(layer_name).downsample( from_mag=from_mag, coarsest_mag=max_mag, interpolation_mode=interpolation_mode, compress=compress, sampling_mode=sampling_mode, buffer_shape=buffer_shape, force_sampling_scheme=force_sampling_scheme, args=args, )
def test_url_open_remote(url: str, sample_dataset: wk.Dataset, sample_bbox: wk.BoundingBox) -> None: ds = wk.Dataset.open_remote(url, ) assert set(ds.layers.keys()) == {"color", "segmentation"} data = (ds.get_color_layers()[0].get_finest_mag().read( absolute_bounding_box=sample_bbox)) assert data.sum() == 122507 assert np.array_equal( data, sample_dataset.get_color_layers()[0].get_finest_mag().read(), )
def test_url_download(url: str, tmp_path: Path, sample_dataset: wk.Dataset, sample_bbox: wk.BoundingBox) -> None: ds = wk.Dataset.download(url, path=tmp_path / "ds", mags=[wk.Mag(1)], bbox=sample_bbox) assert set(ds.layers.keys()) == {"color", "segmentation"} data = ds.get_color_layers()[0].get_finest_mag().read() assert data.sum() == 122507 assert np.array_equal( data, sample_dataset.get_color_layers()[0].get_finest_mag().read(), )
def test_downsample_multi_channel(tmp_path: Path) -> None: num_channels = 3 size = (32, 32, 10) source_data = (128 * np.random.randn(num_channels, size[0], size[1], size[2])).astype("uint8") ds = Dataset(tmp_path / "multi-channel-test", (1, 1, 1)) l = ds.add_layer( "color", COLOR_CATEGORY, dtype_per_channel="uint8", num_channels=num_channels, ) mag1 = l.add_mag("1", chunks_per_shard=32) print("writing source_data shape", source_data.shape) mag1.write(source_data) assert np.any(source_data != 0) mag2 = l._initialize_mag_from_other_mag("2", mag1, False) downsample_cube_job( (l.get_mag("1").get_view(), l.get_mag("2").get_view(), 0), Vec3Int(2, 2, 2), InterpolationModes.MAX, BUFFER_SHAPE, ) channels = [] for channel_index in range(num_channels): channels.append( downsample_cube(source_data[channel_index], [2, 2, 2], InterpolationModes.MAX)) joined_buffer = np.stack(channels) target_buffer = mag2.read() assert np.any(target_buffer != 0) assert np.all(target_buffer == joined_buffer)
def test_upsampling(sample_wkw_path: Path, tmp_path: Path, tiff_mag_2_reference_path: Path) -> None: copytree(sample_wkw_path, tmp_path) color_layer = Dataset.open(tmp_path).get_layer("color") color_layer.delete_mag("1") color_layer.bounding_box = color_layer.bounding_box.align_with_mag( Mag("2"), ceil=True) check_call( "python", "-m", "wkcuber.upsampling", "--jobs", 2, "--from_mag", "2-2-2", "--target_mag", 1, "--buffer_cube_size", 1024, "--layer_name", "color", tmp_path, ) color_layer = Dataset.open(tmp_path).get_layer("color") color_layer.delete_mag("2") check_call( "python", "-m", "wkcuber.downsampling", "--jobs", 2, "--from_mag", 1, "--max", 2, "--sampling_mode", "isotropic", "--buffer_cube_size", 256, "--layer_name", "color", "--interpolation_mode", "nearest", tmp_path, ) assert (Dataset.open(tmp_path).get_layer("color").get_mag("2").bounding_box ) == (Dataset.open(tiff_mag_2_reference_path).get_layer( "color").get_mag("2").bounding_box) assert (Dataset.open(tmp_path).get_layer("color").get_mag( "2").content_is_equal( Dataset.open(tiff_mag_2_reference_path).get_layer("color").get_mag( "2")))
def upsample_test_helper(tmp_path: Path, use_compress: bool) -> None: ds = Dataset(tmp_path, voxel_size=(10.5, 10.5, 5)) layer = ds.add_layer("color", COLOR_CATEGORY) mag2 = layer.add_mag([2, 2, 2]) offset = Vec3Int(WKW_CUBE_SIZE, 2 * WKW_CUBE_SIZE, 0) mag2.write( absolute_offset=offset, data=(np.random.rand(*BUFFER_SHAPE) * 255).astype(np.uint8), ) mag1 = layer._initialize_mag_from_other_mag("1-1-2", mag2, use_compress) source_buffer = mag2.read( absolute_offset=offset, size=BUFFER_SHAPE, )[0] assert np.any(source_buffer != 0) upsample_cube_job( ( mag2.get_view(absolute_offset=offset, size=BUFFER_SHAPE), mag1.get_view( absolute_offset=offset, size=BUFFER_SHAPE, ), 0, ), [0.5, 0.5, 1.0], BUFFER_SHAPE, ) assert np.any(source_buffer != 0) target_buffer = mag1.read(absolute_offset=offset, size=BUFFER_SHAPE)[0] assert np.any(target_buffer != 0) assert np.all(target_buffer == upsample_cube(source_buffer, [2, 2, 1]))
def sample_wkw_path() -> Path: ds_path = TESTDATA_DIR / "tiff_wkw" if ds_path.exists(): rmtree(ds_path) check_call( [ "python", "-m", "wkcuber.cubing", "--jobs", "2", "--voxel_size", "1,1,1", str(TESTDATA_DIR / "tiff"), str(ds_path), ] ) copytree( TESTDATA_DIR / "tiff" / "datasource-properties.wkw-fixture.json", ds_path / PROPERTIES_FILE_NAME, ) Dataset.open(ds_path).get_layer("color").downsample_mag(Mag(1), Mag(2)) return ds_path
def test_downsample_compressed(tmp_path: Path) -> None: ds = Dataset(tmp_path / "downsample_compressed", voxel_size=(1, 1, 2)) layer = ds.add_layer("color", COLOR_CATEGORY) mag = layer.add_mag(1, chunk_size=8, chunks_per_shard=8) mag.write(data=(np.random.rand(80, 240, 15) * 255).astype(np.uint8)) assert not mag._is_compressed() mag.compress() assert mag._is_compressed() layer.downsample( from_mag=Mag(1), coarsest_mag=Mag( 4 ), # Setting max_mag to "4" covers an edge case because the z-dimension (15) has to be rounded ) # Note: this test does not check if the data is correct. This is already covered by other test cases. assert len(layer.mags) == 3 assert Mag("1") in layer.mags.keys() assert Mag("2-2-1") in layer.mags.keys() assert Mag("4-4-2") in layer.mags.keys()
def downsample_test_helper(WT1_path: Path, tmp_path: Path, use_compress: bool, chunk_size: Vec3Int) -> None: source_path = WT1_path target_path = tmp_path / "WT1_wkw" source_ds = Dataset.open(source_path) target_ds = source_ds.copy_dataset(target_path, chunk_size=chunk_size, chunks_per_shard=16) target_layer = target_ds.get_layer("color") mag1 = target_layer.get_mag("1") target_layer.delete_mag("2-2-1") # This is not needed for this test # The bounding box has to be set here explicitly because the downsampled data is written to a different dataset. target_layer.bounding_box = source_ds.get_layer("color").bounding_box mag2 = target_layer._initialize_mag_from_other_mag("2", mag1, use_compress) # The actual size of mag1 is (4600, 4600, 512). # To keep this test case fast, we are only downsampling a small part offset = (4096, 4096, 0) size = (504, 504, 512) source_buffer = mag1.read( absolute_offset=offset, size=size, )[0] assert np.any(source_buffer != 0) downsample_cube_job( ( mag1.get_view(absolute_offset=offset, size=size), mag2.get_view( absolute_offset=offset, size=size, ), 0, ), Vec3Int(2, 2, 2), InterpolationModes.MAX, Vec3Int.full(128), ) assert np.any(source_buffer != 0) target_buffer = mag2.read(absolute_offset=offset, size=size)[0] assert np.any(target_buffer != 0) assert np.all(target_buffer == downsample_cube(source_buffer, [2, 2, 2], InterpolationModes.MAX))
def test_downsample_mag_list_with_only_setup_mags(tmp_path: Path) -> None: ds = Dataset(tmp_path / "downsample_mag_list", voxel_size=(1, 1, 2)) layer = ds.add_layer("color", COLOR_CATEGORY) mag = layer.add_mag(1) mag.write(data=(np.random.rand(10, 20, 30) * 255).astype(np.uint8)) target_mags = [Mag([4, 4, 8]), Mag(2), Mag([32, 32, 8]), Mag(32)] # unsorted list layer.downsample_mag_list(from_mag=Mag(1), target_mags=target_mags, only_setup_mags=True) for m in target_mags: assert np.all( layer.get_mag(m).read() == 0), "The mags should be empty." layer.downsample_mag_list(from_mag=Mag(1), target_mags=target_mags, allow_overwrite=True) for m in target_mags: assert m in layer.mags
def test_main(tmp_path: Path, order: str, flip_axes: Optional[Tuple[int, int]]) -> None: raw_file = tmp_path / "input.raw" input_dtype = "float32" shape = 64, 128, 256 data = np.arange(np.prod(shape), dtype=input_dtype).reshape(shape, order=order) with raw_file.open("wb") as f: f.write(data.tobytes(order=order)) output_path = tmp_path / "output" output_path.mkdir() args_list = [ str(raw_file), str(output_path), "--input_dtype", input_dtype, "--shape", ",".join(str(i) for i in shape), "--order", order, "--jobs", "1", ] if flip_axes is not None: args_list.extend( ["--flip_axes", ",".join(str(a + 1) for a in flip_axes)]) args = create_parser().parse_args(args_list) main(args) dataset = Dataset.open(output_path) layer = dataset.get_color_layers()[0] mag_view = layer.get_mag(1) view = mag_view.get_view() read_data = view.read() assert view.size == shape assert view.get_dtype() == data.dtype assert np.array_equal( read_data[0], data if flip_axes is None else np.flip(data, flip_axes), )
def test_main(tmp_path: Path, category: str) -> None: input_folder = tmp_path / "raw_dataset" / category input_folder.mkdir(parents=True, exist_ok=True) raw_file = input_folder / "input.tif" input_dtype = "uint32" shape = 64, 128, 256 data = np.arange(np.prod(shape), dtype=input_dtype).reshape(shape) with TiffWriter(raw_file) as tif: tif.write(data.transpose([2, 1, 0])) output_path = tmp_path / "output_2" output_path.mkdir() args_list = [ str(tmp_path / "raw_dataset"), str(output_path), "--jobs", "1", "--voxel_size", "11,11,11", "--max_mag", "4", ] args = create_parser().parse_args(args_list) cube_with_args(args) dataset = Dataset.open(output_path) if category == "color": layer = dataset.get_color_layers()[0] else: layer = dataset.get_segmentation_layers()[0] mag_view = layer.get_mag(1) view = mag_view.get_view() read_data = view.read() assert view.size == shape assert view.get_dtype() == data.dtype assert np.array_equal( read_data[0], data, )
def test_export_nifti_file(tmp_path: Path) -> None: destination_path = tmp_path / f"{DS_NAME}_nifti" destination_path.mkdir() bbox = BoundingBox((100, 100, 10), (100, 500, 50)) bbox_dict = bbox.to_config_dict() args_list = [ "--source_path", str(SOURCE_PATH), "--destination_path", str(destination_path), "--name", "test_export", "--source_bbox", bbox.to_csv(), "--mag", "1", ] export_wkw_as_nifti_from_arg_list(args_list) wk_ds = Dataset.open(SOURCE_PATH) for layer_name, layer in wk_ds.layers.items(): correct_image = layer.get_mag(Mag(1)).read(bbox_dict["topleft"], bbox_dict["size"]) # nifti is transposed correct_image = correct_image.transpose(1, 2, 3, 0) correct_image = np.squeeze(correct_image) nifti_path = destination_path.joinpath(f"test_export_{layer_name}.nii") assert nifti_path.is_file( ), f"Expected a nifti to be written at: {nifti_path}." nifti = nib.load(str(nifti_path)) test_image = np.array(nifti.get_fdata()) assert np.array_equal(correct_image, test_image), ( f"The nifti file {nifti_path} that was written is not " f"equal to the original wkw_file.")
def export_wkw_as_tiff(args: Namespace) -> None: setup_logging(args) mag_view = (Dataset.open(args.source_path).get_layer( args.layer_name).get_mag(args.mag)) bbox = mag_view.bounding_box if args.bbox is None else args.bbox logging.info(f"Starting tiff export for bounding box: {bbox}") if args.tiles_per_dimension is not None: args.tile_size = [ int(s.strip()) for s in args.tiles_per_dimension.split(",") ] assert len(args.tile_size) == 2 logging.info( f"Using tiling with {args.tile_size[0]},{args.tile_size[1]} tiles in the dimensions." ) args.tile_size[0] = ceil( bbox.in_mag(mag_view.mag).size.x / args.tile_size[0]) args.tile_size[1] = ceil( bbox.in_mag(mag_view.mag).size.y / args.tile_size[1]) elif args.tile_size is not None: args.tile_size = [int(s.strip()) for s in args.tile_size.split(",")] assert len(args.tile_size) == 2 logging.info( f"Using tiling with the size of {args.tile_size[0]},{args.tile_size[1]}." ) args.batch_size = int(args.batch_size) export_tiff_stack( mag_view=mag_view, bbox=bbox, destination_path=args.destination_path, name=args.name, tiling_slice_size=args.tile_size, batch_size=args.batch_size, downsample=args.downsample, args=args, )
def export_nifti( source_path: Path, source_bbox: Optional[BoundingBox], mag: Mag, destination_path: Path, name: str, padding: Optional[Tuple[int, ...]] = None, ) -> None: dataset = Dataset.open(source_path) for layer_name, layer in dataset.layers.items(): logging.info(f"Starting nifti export for bounding box: {source_bbox}") export_layer_to_nifti( source_path, layer.bounding_box if source_bbox is None else source_bbox, mag, layer_name, destination_path, name + "_" + layer_name, padding, )
def export_layer_to_nifti( source_path: Path, source_bbox: BoundingBox, mag: Mag, layer_name: str, destination_path: Path, name: str, padding: Optional[Tuple[int, ...]] = None, ) -> None: dataset = Dataset.open(source_path) layer = dataset.get_layer(layer_name) mag_layer = layer.get_mag(mag) is_segmentation_layer = layer.category == SEGMENTATION_CATEGORY data = mag_layer.read(source_bbox.topleft, source_bbox.size) data = data.transpose(1, 2, 3, 0) logging.info(f"Shape with layer {data.shape}") data = np.array(data) if is_segmentation_layer and data.max() > 0: factor = np.iinfo("uint8").max / data.max() data = data * factor data = data.astype(np.dtype("uint8")) if padding: assert len(padding) == 6, "padding needs 6 values" padding_per_axis = list(zip(padding[:3], padding[3:])) padding_per_axis.append((0, 0)) data = np.pad(data, padding_per_axis, mode="constant", constant_values=0) img = nib.Nifti1Image(data, np.eye(4)) destination_file = str(destination_path.joinpath(name + ".nii")) logging.info(f"Writing to {destination_file} with shape {data.shape}") nib.save(img, destination_file)
def tile_cubing( target_path: Path, layer_name: str, batch_size: int, input_path_pattern: str, voxel_size: Tuple[int, int, int], args: Optional[Namespace] = None, ) -> None: decimal_lengths = get_digit_counts_for_dimensions(input_path_pattern) ( min_dimensions, max_dimensions, arbitrary_file, file_count, ) = detect_interval_for_dimensions(input_path_pattern, decimal_lengths) if not arbitrary_file: logging.error( f"No source files found. Maybe the input_path_pattern was wrong. You provided: {input_path_pattern}" ) return # Determine tile size from first matching file tile_width, tile_height = image_reader.read_dimensions(arbitrary_file) num_z = max_dimensions["z"] - min_dimensions["z"] + 1 num_x = (max_dimensions["x"] - min_dimensions["x"] + 1) * tile_width num_y = (max_dimensions["y"] - min_dimensions["y"] + 1) * tile_height x_offset = min_dimensions["x"] * tile_width y_offset = min_dimensions["y"] * tile_height num_channels = image_reader.read_channel_count(arbitrary_file) logging.info("Found source files: count={} with tile_size={}x{}".format( file_count, tile_width, tile_height)) if args is None or not hasattr(args, "dtype") or args.dtype is None: dtype = image_reader.read_dtype(arbitrary_file) else: dtype = args.dtype target_ds = Dataset(target_path, voxel_size=voxel_size, exist_ok=True) is_segmentation_layer = layer_name == "segmentation" if is_segmentation_layer: target_layer = target_ds.get_or_add_layer( layer_name, SEGMENTATION_CATEGORY, dtype_per_channel=dtype, num_channels=num_channels, largest_segment_id=0, ) else: target_layer = target_ds.get_or_add_layer( layer_name, COLOR_CATEGORY, dtype_per_channel=dtype, num_channels=num_channels, ) bbox = BoundingBox( Vec3Int(x_offset, y_offset, min_dimensions["z"]), Vec3Int(num_x, num_y, num_z), ) if target_layer.bounding_box.volume() == 0: # If the layer is empty, we want to set the bbox directly because extending it # would mean that the bbox would always start at (0, 0, 0) target_layer.bounding_box = bbox else: target_layer.bounding_box = target_layer.bounding_box.extended_by(bbox) target_mag_view = target_layer.get_or_add_mag( Mag(1), block_len=DEFAULT_CHUNK_SIZE.z) with get_executor_for_args(args) as executor: job_args = [] # Iterate over all z batches for z_batch in get_regular_chunks(min_dimensions["z"], max_dimensions["z"], DEFAULT_CHUNK_SIZE.z): # The z_batch always starts and ends at a multiple of DEFAULT_CHUNK_SIZE.z. # However, we only want the part that is inside the bounding box z_batch = range( max(list(z_batch)[0], target_layer.bounding_box.topleft.z), min( list(z_batch)[-1] + 1, target_layer.bounding_box.bottomright.z), ) z_values = list(z_batch) job_args.append(( target_mag_view.get_view( (x_offset, y_offset, z_values[0]), (num_x, num_y, len(z_values)), ), z_values, input_path_pattern, batch_size, (tile_width, tile_height, num_channels), min_dimensions, max_dimensions, decimal_lengths, dtype, num_channels, )) largest_segment_id_per_chunk = wait_and_ensure_success( executor.map_to_futures(tile_cubing_job, job_args), f"Tile cubing layer {layer_name}", ) if is_segmentation_layer: largest_segment_id = max(largest_segment_id_per_chunk) cast(SegmentationLayer, target_layer).largest_segment_id = largest_segment_id
def cubing( source_path: Path, target_path: Path, layer_name: str, batch_size: Optional[int], channel_index: Optional[int], sample_index: Optional[int], dtype: Optional[str], target_mag_str: str, data_format: DataFormat, chunk_size: Vec3Int, chunks_per_shard: Vec3Int, interpolation_mode_str: str, start_z: int, skip_first_z_slices: int, pad: bool, voxel_size: Tuple[float, float, float], executor_args: Namespace, ) -> Layer: source_files = find_source_filenames(source_path) all_num_x, all_num_y = zip(*[ image_reader.read_dimensions(source_files[i]) for i in range(len(source_files)) ]) num_x = max(all_num_x) num_y = max(all_num_y) # All images are assumed to have equal channels and samples num_channels = image_reader.read_channel_count(source_files[0]) num_samples = image_reader.read_sample_count(source_files[0]) num_output_channels = num_channels * num_samples if channel_index is not None: # if there is no c axis, but someone meant to only use one channel/sample, set the sample index instead if sample_index is None and num_channels == 1 and channel_index > 0: sample_index = channel_index channel_index = 0 assert ( 0 <= channel_index < num_channels ), "Selected channel is invalid. Please check the number of channels in the source file." num_output_channels = num_samples if sample_index is not None: # if no channel axis exists, it is valid to only set the sample index. Set channel index to 0 to avoid confusion if channel_index is None and num_channels == 1: channel_index = 0 assert (channel_index is not None ), "Sample index is only valid if a channel index is also set." assert ( 0 <= sample_index < num_samples ), "Selected sample is invalid. Please check the number of samples in the source file." num_output_channels = 1 num_z_slices_per_file = image_reader.read_z_slices_per_file( source_files[0]) assert (num_z_slices_per_file == 1 or len(source_files) == 1), "Multi page TIFF support only for single files" if num_z_slices_per_file > 1: num_z = num_z_slices_per_file else: num_z = len(source_files) if dtype is None: dtype = image_reader.read_dtype(source_files[0]) if batch_size is None: batch_size = DEFAULT_CHUNK_SIZE.z target_mag = Mag(target_mag_str) target_ds = Dataset(target_path, voxel_size=voxel_size, exist_ok=True) is_segmentation_layer = layer_name == "segmentation" if is_segmentation_layer: target_layer = target_ds.get_or_add_layer( layer_name, SEGMENTATION_CATEGORY, dtype_per_channel=dtype, num_channels=num_output_channels, largest_segment_id=0, ) else: target_layer = target_ds.get_or_add_layer( layer_name, COLOR_CATEGORY, dtype_per_channel=dtype, num_channels=num_output_channels, data_format=data_format, ) target_layer.bounding_box = target_layer.bounding_box.extended_by( BoundingBox( Vec3Int(0, 0, start_z + skip_first_z_slices) * target_mag, Vec3Int(num_x, num_y, num_z - skip_first_z_slices) * target_mag, )) target_mag_view = target_layer.get_or_add_mag( target_mag, chunks_per_shard=chunks_per_shard, chunk_size=chunk_size, ) interpolation_mode = parse_interpolation_mode(interpolation_mode_str, target_layer.category) if target_mag != Mag(1): logging.info( f"Downsampling the cubed image to {target_mag} in memory with interpolation mode {interpolation_mode}." ) logging.info("Found source files: count={} size={}x{}".format( num_z, num_x, num_y)) with get_executor_for_args(executor_args) as executor: job_args = [] # We iterate over all z sections for z in range(skip_first_z_slices, num_z, DEFAULT_CHUNK_SIZE.z): # The z is used to access the source files. However, when writing the data, the `start_z` has to be considered. max_z = min(num_z, z + DEFAULT_CHUNK_SIZE.z) # Prepare source files array if len(source_files) > 1: source_files_array = source_files[z:max_z] else: source_files_array = source_files * (max_z - z) # Prepare job job_args.append(( target_mag_view.get_view( (0, 0, z + start_z), (num_x, num_y, max_z - z), ), target_mag, interpolation_mode, source_files_array, batch_size, pad, channel_index, sample_index, dtype, target_layer.num_channels, )) largest_segment_id_per_chunk = wait_and_ensure_success( executor.map_to_futures(cubing_job, job_args), progress_desc=f"Cubing from {skip_first_z_slices} to {num_z}", ) if is_segmentation_layer: largest_segment_id = max(largest_segment_id_per_chunk) cast(SegmentationLayer, target_layer).largest_segment_id = largest_segment_id # Return layer return target_layer
f"Number of simultaneous upload processes. Defaults to {DEFAULT_SIMULTANEOUS_UPLOADS}.", ) parser.add_argument( "--name", help= "Specify a new name for the dataset. Defaults to the name specified in `datasource-properties.json`.", default=None, ) add_verbose_flag(parser) return parser if __name__ == "__main__": setup_warnings() args = create_parser().parse_args() setup_logging(args) url = (args.url if args.url is not None else environ.get( "WK_URL", DEFAULT_WEBKNOSSOS_URL)) token = args.token if args.token is not None else environ.get( "WK_TOKEN", None) assert ( token is not None ), f"An auth token needs to be supplied either through the --token command line arg or the WK_TOKEN environment variable. Retrieve your auth token on {url}/auth/token." with webknossos_context(url=url, token=token): Dataset.open(args.source_path).upload(new_dataset_name=args.name, jobs=args.jobs)
def convert_nifti( source_nifti_path: Path, target_path: Path, layer_name: str, dtype: str, voxel_size: Tuple[float, ...], data_format: DataFormat, chunk_size: Vec3Int, chunks_per_shard: Vec3Int, is_segmentation_layer: bool = False, bbox_to_enforce: Optional[BoundingBox] = None, use_orientation_header: bool = False, flip_axes: Optional[Union[int, Tuple[int, ...]]] = None, ) -> None: shard_size = chunk_size * chunks_per_shard time_start(f"Converting of {source_nifti_path}") source_nifti = nib.load(str(source_nifti_path.resolve())) if use_orientation_header: # Get canonical representation of data to incorporate # encoded transformations. Needs to be flipped later # to match the coordinate system of WKW. source_nifti = nib.funcs.as_closest_canonical(source_nifti, enforce_diag=False) cube_data = np.array(source_nifti.get_fdata()) category_type: LayerCategoryType = ("segmentation" if is_segmentation_layer else "color") logging.debug(f"Assuming {category_type} as layer type for {layer_name}") if len(source_nifti.shape) == 3: cube_data = cube_data.reshape((1, ) + source_nifti.shape) elif len(source_nifti.shape) == 4: cube_data = np.transpose(cube_data, (3, 0, 1, 2)) else: logging.warning( "Converting of {} failed! Too many or too less dimensions".format( source_nifti_path)) return if use_orientation_header: # Flip y and z to transform data into wkw's coordinate system. cube_data = np.flip(cube_data, (2, 3)) if flip_axes: cube_data = np.flip(cube_data, flip_axes) if voxel_size is None: voxel_size = tuple(map(float, source_nifti.header["pixdim"][:3])) logging.info(f"Using voxel_size: {voxel_size}") cube_data = to_target_datatype(cube_data, dtype, is_segmentation_layer) # everything needs to be padded to if bbox_to_enforce is not None: target_topleft = np.array((0, ) + tuple(bbox_to_enforce.topleft)) target_size = np.array((1, ) + tuple(bbox_to_enforce.size)) cube_data = pad_or_crop_to_size_and_topleft(cube_data, target_size, target_topleft) # Writing wkw compressed requires files of shape (shard_size, shard_size, shard_size) # Pad data accordingly padding_offset = shard_size - np.array(cube_data.shape[1:4]) % shard_size cube_data = np.pad( cube_data, ( (0, 0), (0, int(padding_offset[0])), (0, int(padding_offset[1])), (0, int(padding_offset[2])), ), ) wk_ds = Dataset( target_path, voxel_size=cast(Tuple[float, float, float], voxel_size or (1, 1, 1)), exist_ok=True, ) wk_layer = (wk_ds.get_or_add_layer( layer_name, category_type, dtype_per_layer=np.dtype(dtype), data_format=data_format, largest_segment_id=int(np.max(cube_data) + 1), ) if is_segmentation_layer else wk_ds.get_or_add_layer( layer_name, category_type, data_format=data_format, dtype_per_layer=np.dtype(dtype), )) wk_mag = wk_layer.get_or_add_mag("1", chunk_size=chunk_size, chunks_per_shard=chunks_per_shard) wk_mag.write(cube_data) time_stop(f"Converting of {source_nifti_path}")
def test_anisotropic_downsampling(sample_wkw_path: Path, tmp_path: Path) -> None: copytree(sample_wkw_path, tmp_path) # We need to delete mag two as it already exists. Then it is replaced by an anisotropic mag. color_layer = Dataset.open(tmp_path).get_layer("color") color_layer.delete_mag("2") check_call( "python", "-m", "wkcuber.downsampling", "--jobs", 2, "--from", 1, "--max", 2, "--sampling_mode", "constant_z", "--buffer_cube_size", 128, "--layer_name", "color", tmp_path, ) check_call( "python", "-m", "wkcuber.downsampling", "--jobs", 2, "--from", "2-2-1", "--max", 4, "--sampling_mode", "constant_z", "--buffer_cube_size", 128, "--layer_name", "color", tmp_path, ) assert (tmp_path / "color" / "2-2-1").exists() assert (tmp_path / "color" / "4-4-1").exists() assert count_wkw_files(tmp_path / "color" / "2-2-1") == 1 assert count_wkw_files(tmp_path / "color" / "4-4-1") == 1 check_call( "python", "-m", "wkcuber.downsampling", "--jobs", 2, "--from", "4-4-1", "--max", 16, "--buffer_cube_size", 128, "--layer_name", "color", tmp_path, ) assert (tmp_path / "color" / "8-8-4").exists() assert (tmp_path / "color" / "16-16-8").exists() assert count_wkw_files(tmp_path / "color" / "8-8-4") == 1 assert count_wkw_files(tmp_path / "color" / "16-16-8") == 1
type=parse_path, ) parser.add_argument( "--no_compression", help="Use compression, default false", type=bool, default=False, ) add_verbose_flag(parser) add_distribution_flags(parser) add_data_format_flags(parser) return parser if __name__ == "__main__": setup_warnings() args = create_parser().parse_args() setup_logging(args) Dataset.open(args.source_path).copy_dataset( args.target_path, data_format=args.data_format, chunk_size=args.chunk_size, chunks_per_shard=args.chunks_per_shard, compress=not args.no_compression, args=args, )
def compress_mag_inplace(target_path: Path, layer_name: str, mag: Mag, args: Optional[Namespace] = None) -> None: Dataset.open(target_path).get_layer(layer_name).get_mag(mag).compress( args=args)