def compress_mags( source_path: Path, layer_name: str, target_path: Optional[Path] = None, mags: Optional[List[Mag]] = None, args: Optional[Namespace] = None, ) -> None: if target_path is None: target = source_path.with_suffix(".tmp") else: target = target_path layer = Dataset.open(source_path).get_layer(layer_name) if mags is None: mags = list(layer.mags.keys()) for mag, mag_view in Dataset.open(source_path).get_layer( layer_name).mags.items(): if mag in mags: mag_view.compress(target_path=target, args=args) if target_path is None: backup_dir = source_path.with_suffix(BACKUP_EXT) (backup_dir / layer_name).mkdir(parents=True, exist_ok=True) for mag in mags: (source_path / layer_name / str(mag)).rename( (backup_dir / layer_name / str(mag))) (target / layer_name / str(mag)).rename( str(source_path / layer_name / str(mag)), ) rmtree(target) logging.info( "Old files are still present in '{0}.bak'. Please remove them when not required anymore." .format(source_path))
def test_downsampling(sample_wkw_path: Path, tmp_path: Path, tiff_mag_2_reference_path: Path) -> None: copytree(sample_wkw_path, tmp_path) Dataset.open(tmp_path).get_layer("color").delete_mag("2") check_call( "python", "-m", "wkcuber.downsampling", "--jobs", 2, "--max", 8, "--buffer_cube_size", 128, "--layer_name", "color", "--sampling_mode", "isotropic", tmp_path, ) assert (tmp_path / "color" / "2").exists() assert (tmp_path / "color" / "4").exists() assert (tmp_path / "color" / "8").exists() assert not (tmp_path / "color" / "16").exists() assert count_wkw_files(tmp_path / "color" / "2") == 1 assert count_wkw_files(tmp_path / "color" / "4") == 1 assert count_wkw_files(tmp_path / "color" / "8") == 1 assert (Dataset.open(tmp_path).get_layer("color").get_mag( "2").content_is_equal( Dataset.open(tiff_mag_2_reference_path).get_layer("color").get_mag( "2")))
def upsample_mags( path: Path, layer_name: Optional[str] = None, from_mag: Optional[Mag] = None, target_mag: Mag = Mag(1), buffer_shape: Optional[Vec3Int] = None, compress: bool = True, args: Optional[Namespace] = None, sampling_mode: Union[str, SamplingModes] = SamplingModes.ANISOTROPIC, ) -> None: assert layer_name and from_mag or not layer_name and not from_mag, ( "You provided only one of the following " "parameters: layer_name, from_mag but both " "need to be set or none. If you don't provide " "the parameters you need to provide the path " "argument with the mag and layer to upsample" " (e.g dataset/color/1)." ) if not layer_name or not from_mag: layer_name = path.parent.name from_mag = Mag(path.name) path = path.parent.parent Dataset.open(path).get_layer(layer_name).upsample( from_mag=from_mag, finest_mag=target_mag, compress=compress, sampling_mode=sampling_mode, buffer_shape=buffer_shape, args=args, )
def check_equality(source_path: Path, target_path: Path, args: Optional[Namespace] = None) -> None: logging.info(f"Comparing {source_path} with {target_path}") source_dataset = Dataset.open(source_path) target_dataset = Dataset.open(target_path) source_layer_names = set(source_dataset.layers.keys()) target_layer_names = set(target_dataset.layers.keys()) layer_names = list(source_layer_names) if args is not None and args.layer_name is not None: assert ( args.layer_name in source_layer_names ), f"Provided layer {args.layer_name} does not exist in source dataset." assert ( args.layer_name in target_layer_names ), f"Provided layer {args.layer_name} does not exist in target dataset." layer_names = [args.layer_name] else: assert ( source_layer_names == target_layer_names ), f"The provided input datasets have different layers: {source_layer_names} != {target_layer_names}" for layer_name in layer_names: logging.info(f"Checking layer_name: {layer_name}") source_layer = source_dataset.layers[layer_name] target_layer = target_dataset.layers[layer_name] assert ( source_layer.bounding_box == target_layer.bounding_box ), f"The bounding boxes of {source_path}/{layer_name} and {target_path}/{layer_name} are not equal: {source_layer.bounding_box} != {target_layer.bounding_box}" source_mags = set(source_layer.mags.keys()) target_mags = set(target_layer.mags.keys()) assert ( source_mags == target_mags ), f"The mags of {source_path}/{layer_name} and {target_path}/{layer_name} are not equal: {source_mags} != {target_mags}" for mag in source_mags: source_mag = source_layer.mags[mag] target_mag = target_layer.mags[mag] logging.info(f"Start verification of {layer_name} in mag {mag}") assert source_mag.content_is_equal(target_mag, args) logging.info( f"The following datasets seem to be equal (with regard to the layers: {layer_names}):" ) logging.info(source_path) logging.info(target_path)
def compress_mag( source_path: Path, layer_name: str, target_path: Path, mag: Mag, args: Optional[Namespace] = None, ) -> None: Dataset.open(source_path).get_layer(layer_name).get_mag(mag).compress( target_path=target_path, args=args)
def test_in_place_compression(sample_wkw_path: Path, tmp_path: Path) -> None: copytree(sample_wkw_path, tmp_path) check_call( "python", "-m", "wkcuber.compress", "--jobs", 2, "--layer_name", "color", tmp_path, ) assert Dataset.open(tmp_path).get_layer("color").get_mag( "1").info.compression_mode assert Dataset.open(tmp_path).get_layer("color").get_mag( "2").info.compression_mode
def downsample_mags( path: Path, layer_name: Optional[str] = None, from_mag: Optional[Mag] = None, max_mag: Optional[Mag] = None, interpolation_mode: str = "default", buffer_shape: Optional[Vec3Int] = None, compress: bool = True, args: Optional[Namespace] = None, sampling_mode: Union[str, SamplingModes] = SamplingModes.ANISOTROPIC, force_sampling_scheme: bool = False, ) -> None: """ Argument `path` expects the directory containing the dataset. Argument `layer_name` expects the name of the layer (color or segmentation). Argument `from_mag` expects the resolution to base downsampling on. For the other parameters see the CLI help or `Layer.downsample` and `Layer.downsampling_mag`. """ assert layer_name and from_mag or not layer_name and not from_mag, ( "You provided only one of the following " "parameters: layer_name, from_mag but both " "need to be set or none. If you don't provide " "the parameters you need to provide the path " "argument with the mag and layer to downsample" " (e.g dataset/color/1).") if not layer_name or not from_mag: layer_name = path.parent.name from_mag = Mag(path.name) path = path.parent.parent assert layer_name is not None # for mypy assert from_mag is not None # for mypy Dataset.open(path).get_layer(layer_name).downsample( from_mag=from_mag, coarsest_mag=max_mag, interpolation_mode=interpolation_mode, compress=compress, sampling_mode=sampling_mode, buffer_shape=buffer_shape, force_sampling_scheme=force_sampling_scheme, args=args, )
def test_upsampling(sample_wkw_path: Path, tmp_path: Path, tiff_mag_2_reference_path: Path) -> None: copytree(sample_wkw_path, tmp_path) color_layer = Dataset.open(tmp_path).get_layer("color") color_layer.delete_mag("1") color_layer.bounding_box = color_layer.bounding_box.align_with_mag( Mag("2"), ceil=True) check_call( "python", "-m", "wkcuber.upsampling", "--jobs", 2, "--from_mag", "2-2-2", "--target_mag", 1, "--buffer_cube_size", 1024, "--layer_name", "color", tmp_path, ) color_layer = Dataset.open(tmp_path).get_layer("color") color_layer.delete_mag("2") check_call( "python", "-m", "wkcuber.downsampling", "--jobs", 2, "--from_mag", 1, "--max", 2, "--sampling_mode", "isotropic", "--buffer_cube_size", 256, "--layer_name", "color", "--interpolation_mode", "nearest", tmp_path, ) assert (Dataset.open(tmp_path).get_layer("color").get_mag("2").bounding_box ) == (Dataset.open(tiff_mag_2_reference_path).get_layer( "color").get_mag("2").bounding_box) assert (Dataset.open(tmp_path).get_layer("color").get_mag( "2").content_is_equal( Dataset.open(tiff_mag_2_reference_path).get_layer("color").get_mag( "2")))
def sample_wkw_path() -> Path: ds_path = TESTDATA_DIR / "tiff_wkw" if ds_path.exists(): rmtree(ds_path) check_call( [ "python", "-m", "wkcuber.cubing", "--jobs", "2", "--voxel_size", "1,1,1", str(TESTDATA_DIR / "tiff"), str(ds_path), ] ) copytree( TESTDATA_DIR / "tiff" / "datasource-properties.wkw-fixture.json", ds_path / PROPERTIES_FILE_NAME, ) Dataset.open(ds_path).get_layer("color").downsample_mag(Mag(1), Mag(2)) return ds_path
def downsample_test_helper(WT1_path: Path, tmp_path: Path, use_compress: bool, chunk_size: Vec3Int) -> None: source_path = WT1_path target_path = tmp_path / "WT1_wkw" source_ds = Dataset.open(source_path) target_ds = source_ds.copy_dataset(target_path, chunk_size=chunk_size, chunks_per_shard=16) target_layer = target_ds.get_layer("color") mag1 = target_layer.get_mag("1") target_layer.delete_mag("2-2-1") # This is not needed for this test # The bounding box has to be set here explicitly because the downsampled data is written to a different dataset. target_layer.bounding_box = source_ds.get_layer("color").bounding_box mag2 = target_layer._initialize_mag_from_other_mag("2", mag1, use_compress) # The actual size of mag1 is (4600, 4600, 512). # To keep this test case fast, we are only downsampling a small part offset = (4096, 4096, 0) size = (504, 504, 512) source_buffer = mag1.read( absolute_offset=offset, size=size, )[0] assert np.any(source_buffer != 0) downsample_cube_job( ( mag1.get_view(absolute_offset=offset, size=size), mag2.get_view( absolute_offset=offset, size=size, ), 0, ), Vec3Int(2, 2, 2), InterpolationModes.MAX, Vec3Int.full(128), ) assert np.any(source_buffer != 0) target_buffer = mag2.read(absolute_offset=offset, size=size)[0] assert np.any(target_buffer != 0) assert np.all(target_buffer == downsample_cube(source_buffer, [2, 2, 2], InterpolationModes.MAX))
def test_main(tmp_path: Path, order: str, flip_axes: Optional[Tuple[int, int]]) -> None: raw_file = tmp_path / "input.raw" input_dtype = "float32" shape = 64, 128, 256 data = np.arange(np.prod(shape), dtype=input_dtype).reshape(shape, order=order) with raw_file.open("wb") as f: f.write(data.tobytes(order=order)) output_path = tmp_path / "output" output_path.mkdir() args_list = [ str(raw_file), str(output_path), "--input_dtype", input_dtype, "--shape", ",".join(str(i) for i in shape), "--order", order, "--jobs", "1", ] if flip_axes is not None: args_list.extend( ["--flip_axes", ",".join(str(a + 1) for a in flip_axes)]) args = create_parser().parse_args(args_list) main(args) dataset = Dataset.open(output_path) layer = dataset.get_color_layers()[0] mag_view = layer.get_mag(1) view = mag_view.get_view() read_data = view.read() assert view.size == shape assert view.get_dtype() == data.dtype assert np.array_equal( read_data[0], data if flip_axes is None else np.flip(data, flip_axes), )
def test_main(tmp_path: Path, category: str) -> None: input_folder = tmp_path / "raw_dataset" / category input_folder.mkdir(parents=True, exist_ok=True) raw_file = input_folder / "input.tif" input_dtype = "uint32" shape = 64, 128, 256 data = np.arange(np.prod(shape), dtype=input_dtype).reshape(shape) with TiffWriter(raw_file) as tif: tif.write(data.transpose([2, 1, 0])) output_path = tmp_path / "output_2" output_path.mkdir() args_list = [ str(tmp_path / "raw_dataset"), str(output_path), "--jobs", "1", "--voxel_size", "11,11,11", "--max_mag", "4", ] args = create_parser().parse_args(args_list) cube_with_args(args) dataset = Dataset.open(output_path) if category == "color": layer = dataset.get_color_layers()[0] else: layer = dataset.get_segmentation_layers()[0] mag_view = layer.get_mag(1) view = mag_view.get_view() read_data = view.read() assert view.size == shape assert view.get_dtype() == data.dtype assert np.array_equal( read_data[0], data, )
def export_wkw_as_tiff(args: Namespace) -> None: setup_logging(args) mag_view = (Dataset.open(args.source_path).get_layer( args.layer_name).get_mag(args.mag)) bbox = mag_view.bounding_box if args.bbox is None else args.bbox logging.info(f"Starting tiff export for bounding box: {bbox}") if args.tiles_per_dimension is not None: args.tile_size = [ int(s.strip()) for s in args.tiles_per_dimension.split(",") ] assert len(args.tile_size) == 2 logging.info( f"Using tiling with {args.tile_size[0]},{args.tile_size[1]} tiles in the dimensions." ) args.tile_size[0] = ceil( bbox.in_mag(mag_view.mag).size.x / args.tile_size[0]) args.tile_size[1] = ceil( bbox.in_mag(mag_view.mag).size.y / args.tile_size[1]) elif args.tile_size is not None: args.tile_size = [int(s.strip()) for s in args.tile_size.split(",")] assert len(args.tile_size) == 2 logging.info( f"Using tiling with the size of {args.tile_size[0]},{args.tile_size[1]}." ) args.batch_size = int(args.batch_size) export_tiff_stack( mag_view=mag_view, bbox=bbox, destination_path=args.destination_path, name=args.name, tiling_slice_size=args.tile_size, batch_size=args.batch_size, downsample=args.downsample, args=args, )
def test_export_nifti_file(tmp_path: Path) -> None: destination_path = tmp_path / f"{DS_NAME}_nifti" destination_path.mkdir() bbox = BoundingBox((100, 100, 10), (100, 500, 50)) bbox_dict = bbox.to_config_dict() args_list = [ "--source_path", str(SOURCE_PATH), "--destination_path", str(destination_path), "--name", "test_export", "--source_bbox", bbox.to_csv(), "--mag", "1", ] export_wkw_as_nifti_from_arg_list(args_list) wk_ds = Dataset.open(SOURCE_PATH) for layer_name, layer in wk_ds.layers.items(): correct_image = layer.get_mag(Mag(1)).read(bbox_dict["topleft"], bbox_dict["size"]) # nifti is transposed correct_image = correct_image.transpose(1, 2, 3, 0) correct_image = np.squeeze(correct_image) nifti_path = destination_path.joinpath(f"test_export_{layer_name}.nii") assert nifti_path.is_file( ), f"Expected a nifti to be written at: {nifti_path}." nifti = nib.load(str(nifti_path)) test_image = np.array(nifti.get_fdata()) assert np.array_equal(correct_image, test_image), ( f"The nifti file {nifti_path} that was written is not " f"equal to the original wkw_file.")
def export_nifti( source_path: Path, source_bbox: Optional[BoundingBox], mag: Mag, destination_path: Path, name: str, padding: Optional[Tuple[int, ...]] = None, ) -> None: dataset = Dataset.open(source_path) for layer_name, layer in dataset.layers.items(): logging.info(f"Starting nifti export for bounding box: {source_bbox}") export_layer_to_nifti( source_path, layer.bounding_box if source_bbox is None else source_bbox, mag, layer_name, destination_path, name + "_" + layer_name, padding, )
def export_layer_to_nifti( source_path: Path, source_bbox: BoundingBox, mag: Mag, layer_name: str, destination_path: Path, name: str, padding: Optional[Tuple[int, ...]] = None, ) -> None: dataset = Dataset.open(source_path) layer = dataset.get_layer(layer_name) mag_layer = layer.get_mag(mag) is_segmentation_layer = layer.category == SEGMENTATION_CATEGORY data = mag_layer.read(source_bbox.topleft, source_bbox.size) data = data.transpose(1, 2, 3, 0) logging.info(f"Shape with layer {data.shape}") data = np.array(data) if is_segmentation_layer and data.max() > 0: factor = np.iinfo("uint8").max / data.max() data = data * factor data = data.astype(np.dtype("uint8")) if padding: assert len(padding) == 6, "padding needs 6 values" padding_per_axis = list(zip(padding[:3], padding[3:])) padding_per_axis.append((0, 0)) data = np.pad(data, padding_per_axis, mode="constant", constant_values=0) img = nib.Nifti1Image(data, np.eye(4)) destination_file = str(destination_path.joinpath(name + ".nii")) logging.info(f"Writing to {destination_file} with shape {data.shape}") nib.save(img, destination_file)
f"Number of simultaneous upload processes. Defaults to {DEFAULT_SIMULTANEOUS_UPLOADS}.", ) parser.add_argument( "--name", help= "Specify a new name for the dataset. Defaults to the name specified in `datasource-properties.json`.", default=None, ) add_verbose_flag(parser) return parser if __name__ == "__main__": setup_warnings() args = create_parser().parse_args() setup_logging(args) url = (args.url if args.url is not None else environ.get( "WK_URL", DEFAULT_WEBKNOSSOS_URL)) token = args.token if args.token is not None else environ.get( "WK_TOKEN", None) assert ( token is not None ), f"An auth token needs to be supplied either through the --token command line arg or the WK_TOKEN environment variable. Retrieve your auth token on {url}/auth/token." with webknossos_context(url=url, token=token): Dataset.open(args.source_path).upload(new_dataset_name=args.name, jobs=args.jobs)
def test_anisotropic_downsampling(sample_wkw_path: Path, tmp_path: Path) -> None: copytree(sample_wkw_path, tmp_path) # We need to delete mag two as it already exists. Then it is replaced by an anisotropic mag. color_layer = Dataset.open(tmp_path).get_layer("color") color_layer.delete_mag("2") check_call( "python", "-m", "wkcuber.downsampling", "--jobs", 2, "--from", 1, "--max", 2, "--sampling_mode", "constant_z", "--buffer_cube_size", 128, "--layer_name", "color", tmp_path, ) check_call( "python", "-m", "wkcuber.downsampling", "--jobs", 2, "--from", "2-2-1", "--max", 4, "--sampling_mode", "constant_z", "--buffer_cube_size", 128, "--layer_name", "color", tmp_path, ) assert (tmp_path / "color" / "2-2-1").exists() assert (tmp_path / "color" / "4-4-1").exists() assert count_wkw_files(tmp_path / "color" / "2-2-1") == 1 assert count_wkw_files(tmp_path / "color" / "4-4-1") == 1 check_call( "python", "-m", "wkcuber.downsampling", "--jobs", 2, "--from", "4-4-1", "--max", 16, "--buffer_cube_size", 128, "--layer_name", "color", tmp_path, ) assert (tmp_path / "color" / "8-8-4").exists() assert (tmp_path / "color" / "16-16-8").exists() assert count_wkw_files(tmp_path / "color" / "8-8-4") == 1 assert count_wkw_files(tmp_path / "color" / "16-16-8") == 1
type=parse_path, ) parser.add_argument( "--no_compression", help="Use compression, default false", type=bool, default=False, ) add_verbose_flag(parser) add_distribution_flags(parser) add_data_format_flags(parser) return parser if __name__ == "__main__": setup_warnings() args = create_parser().parse_args() setup_logging(args) Dataset.open(args.source_path).copy_dataset( args.target_path, data_format=args.data_format, chunk_size=args.chunk_size, chunks_per_shard=args.chunks_per_shard, compress=not args.no_compression, args=args, )
def compress_mag_inplace(target_path: Path, layer_name: str, mag: Mag, args: Optional[Namespace] = None) -> None: Dataset.open(target_path).get_layer(layer_name).get_mag(mag).compress( args=args)