def convert_cube_job( source_knossos_info: KnossosDatasetInfo, args: Tuple[View, int] ) -> None: target_view, _ = args time_start(f"Converting of {target_view.bounding_box}") cube_size = cast(Tuple[int, int, int], (CUBE_EDGE_LEN,) * 3) offset = target_view.bounding_box.in_mag(target_view.mag).topleft size = target_view.bounding_box.in_mag(target_view.mag).size buffer = np.zeros(size.to_tuple(), dtype=target_view.get_dtype()) with open_knossos(source_knossos_info) as source_knossos: for x in range(0, size.x, CUBE_EDGE_LEN): for y in range(0, size.y, CUBE_EDGE_LEN): for z in range(0, size.z, CUBE_EDGE_LEN): cube_data = source_knossos.read( (offset + Vec3Int(x, y, z)).to_tuple(), cube_size ) buffer[ x : (x + CUBE_EDGE_LEN), y : (y + CUBE_EDGE_LEN), z : (z + CUBE_EDGE_LEN), ] = cube_data target_view.write(buffer) time_stop(f"Converting of {target_view.bounding_box}")
def convert_raw( source_raw_path: Path, target_path: Path, layer_name: str, input_dtype: str, shape: Tuple[int, int, int], data_format: DataFormat, chunk_size: Vec3Int, chunks_per_shard: Vec3Int, order: str = "F", voxel_size: Optional[Tuple[float, float, float]] = (1.0, 1.0, 1.0), flip_axes: Optional[Union[int, Tuple[int, ...]]] = None, compress: bool = True, executor_args: Optional[argparse.Namespace] = None, ) -> MagView: assert order in ("C", "F") time_start(f"Conversion of {source_raw_path}") if voxel_size is None: voxel_size = 1.0, 1.0, 1.0 wk_ds = Dataset(target_path, voxel_size=voxel_size, exist_ok=True) wk_layer = wk_ds.get_or_add_layer( layer_name, "color", dtype_per_layer=np.dtype(input_dtype), num_channels=1, data_format=data_format, ) wk_layer.bounding_box = BoundingBox((0, 0, 0), shape) wk_mag = wk_layer.get_or_add_mag("1", chunk_size=chunk_size, chunks_per_shard=chunks_per_shard, compress=compress) # Parallel chunk conversion with get_executor_for_args(executor_args) as executor: wait_and_ensure_success( executor.map_to_futures( partial( _raw_chunk_converter, source_raw_path=source_raw_path, target_mag_view=wk_mag, input_dtype=input_dtype, shape=shape, order=order, flip_axes=flip_axes, ), wk_layer.bounding_box.chunk(chunk_size=chunk_size * chunks_per_shard), )) time_stop(f"Conversion of {source_raw_path}") return wk_mag
def main(args: argparse.Namespace) -> None: # Use the skeleton API to read the bounding boxes once # https://github.com/scalableminds/webknossos-libs/issues/482 is done. nml_regex = re.compile( r'<userBoundingBox .*name="Limits of flood-fill \(source_id=(\d+), target_id=(\d+), seed=([\d,]+), timestamp=(\d+)\)".*topLeftX="(\d+)" topLeftY="(\d+)" topLeftZ="(\d+)" width="(\d+)" height="(\d+)" depth="(\d+)" />' ) bboxes: List[FloodFillBbox] = [] nml_file = open(args.nml_path, "r", encoding="utf-8") lines = nml_file.readlines() nml_file.close() for line in lines: matches = nml_regex.findall(line) for match in matches: # each match is a tuple of (source_id, target_id, seed, timestamp, top_left_x, top_left_y, top_left_z, width, height, depth bboxes.append( FloodFillBbox( bounding_box=BoundingBox((match[4], match[5], match[6]), (match[7], match[8], match[9])), seed_position=Vec3Int(match[2].split(",")), source_id=int(match[0]), target_id=int(match[1]), timestamp=int(match[3]), )) bboxes = sorted(bboxes, key=lambda x: x.timestamp) time_start("Merge with fallback layer") data_mag = merge_with_fallback_layer( args.output_path, args.volume_path, args.segmentation_layer_path, ) time_stop("Merge with fallback layer") time_start("All floodfills") for floodfill in bboxes: time_start("Floodfill") execute_floodfill( data_mag, floodfill.seed_position, floodfill.bounding_box, floodfill.source_id, floodfill.target_id, ) time_stop("Floodfill") time_stop("All floodfills") time_start("Recompute downsampled mags") data_mag.layer.redownsample() time_stop("Recompute downsampled mags")
def tile_cubing_job( args: Tuple[View, List[int], str, int, Tuple[int, int, int], Dict[str, int], Dict[str, int], Dict[str, int], str, int, ] ) -> int: ( target_view, z_batches, input_path_pattern, batch_size, tile_size, min_dimensions, max_dimensions, decimal_lengths, dtype, num_channels, ) = args largest_value_in_chunk = 0 # This is used to compute the largest_segmentation_id if it is a segmentation layer # Iterate over the z batches # Batching is useful to utilize IO more efficiently for z_batch in get_chunks(z_batches, batch_size): try: time_start(f"Cubing of z={z_batch[0]}-{z_batch[-1]}") for x in range(min_dimensions["x"], max_dimensions["x"] + 1): for y in range(min_dimensions["y"], max_dimensions["y"] + 1): # Allocate a large buffer for all images in this batch # Shape will be (channel_count, x, y, z) # Using fortran order for the buffer, prevents that the data has to be copied in rust buffer_shape = [ num_channels, tile_size[0], tile_size[1], len(z_batch), ] buffer = np.empty(buffer_shape, dtype=dtype, order="F") for z in z_batch: # Read file if exists or use zeros instead file_name = find_file_with_dimensions( input_path_pattern, x, y, z, decimal_lengths) if file_name: # read the image image = read_image_file( file_name, target_view.header.voxel_type, z, None, None, ) else: # add zeros instead image = np.zeros( tile_size + (1, ), dtype=target_view.header.voxel_type, ) # The size of a image might be smaller than the buffer, if the tile is at the bottom/right border buffer[:, :image.shape[0], :image.shape[1], z - z_batch[0]] = image.transpose( (2, 0, 1, 3))[:, :, :, 0] if np.any(buffer != 0): offset = ( (x - min_dimensions["x"]) * tile_size[0], (y - min_dimensions["y"]) * tile_size[1], z_batch[0] - target_view.global_offset.z, ) target_view.write(data=buffer, offset=offset) largest_value_in_chunk = max(largest_value_in_chunk, np.max(buffer)) time_stop(f"Cubing of z={z_batch[0]}-{z_batch[-1]}") except Exception as exc: logging.error("Cubing of z={}-{} failed with: {}".format( z_batch[0], z_batch[-1], exc)) raise exc return largest_value_in_chunk
def merge_with_fallback_layer( output_path: Path, volume_annotation_path: Path, segmentation_layer_path: Path, ) -> MagView: assert not output_path.exists(), f"Dataset at {output_path} already exists" # Prepare output dataset by creatign a shallow copy of the dataset # determined by segmentation_layer_path, but do a deep copy of # segmentation_layer_path itself (so that we can mutate it). input_segmentation_dataset = wk.Dataset.open( segmentation_layer_path.parent) time_start("Prepare output dataset") output_dataset = input_segmentation_dataset.shallow_copy_dataset( output_path, name=output_path.name, make_relative=True, layers_to_ignore=[segmentation_layer_path.name], ) output_layer = output_dataset.add_copy_layer(segmentation_layer_path, segmentation_layer_path.name) time_stop("Prepare output dataset") input_segmentation_mag = input_segmentation_dataset.get_layer( segmentation_layer_path.name).get_finest_mag() with temporary_annotation_view( volume_annotation_path) as input_annotation_layer: input_annotation_mag = input_annotation_layer.get_finest_mag() bboxes = [ bbox.in_mag(input_annotation_mag._mag) for bbox in input_annotation_mag.get_bounding_boxes_on_disk() ] output_mag = output_layer.get_mag(input_segmentation_mag.mag) cube_size = output_mag.info.chunk_size[ 0] * output_mag.info.chunks_per_shard[0] chunks_with_bboxes = BoundingBox.group_boxes_with_aligned_mag( bboxes, Mag(cube_size)) assert (input_annotation_mag.info.chunks_per_shard == Vec3Int.ones() ), "volume annotation must have file_len=1" assert (input_annotation_mag.info.voxel_type == input_segmentation_mag.info.voxel_type ), "Volume annotation must have same dtype as fallback layer" chunk_count = 0 for chunk, bboxes in chunks_with_bboxes.items(): chunk_count += 1 logger.info(f"Processing chunk {chunk_count}...") time_start("Read chunk") data_buffer = output_mag.read(chunk.topleft, chunk.size)[0, :, :, :] time_stop("Read chunk") time_start("Read/merge bboxes") for bbox in bboxes: read_data = input_annotation_mag.read(bbox.topleft, bbox.size) data_buffer[bbox.offset( -chunk.topleft).to_slices()] = read_data time_stop("Read/merge bboxes") time_start("Write chunk") output_mag.write(data_buffer, chunk.topleft) time_stop("Write chunk") return output_mag
def execute_floodfill( data_mag: MagView, seed_position: Vec3Int, already_processed_bbox: BoundingBox, source_id: int, target_id: int, ) -> None: cube_size = data_mag.info.shard_size cube_bbox = BoundingBox(Vec3Int(0, 0, 0), cube_size) chunk_with_relative_seed: List[Tuple[Vec3Int, Vec3Int]] = [ get_chunk_pos_and_offset(seed_position, cube_size) ] # The `is_visited` variable is used to know which parts of the already processed bbox # were already traversed. Outside of that bounding box, the actual data already # is an indicator of whether the flood-fill has reached a voxel. is_visited = np.zeros(already_processed_bbox.size.to_tuple(), dtype=np.uint8) chunk_count = 0 while len(chunk_with_relative_seed) > 0: chunk_count += 1 if chunk_count % 10000 == 0: logger.info(f"Handled seed positions {chunk_count}") dirty_bucket = False current_cube, relative_seed = chunk_with_relative_seed.pop() global_seed = current_cube + relative_seed # Only reading one voxel for the seed can be up to 30,000 times faster # which is very relevent, since the chunk doesn't need to be traversed # if the seed voxel was already covered. value_at_seed_position = data_mag.read(current_cube + relative_seed, (1, 1, 1)) if value_at_seed_position == source_id or ( already_processed_bbox.contains(global_seed) and value_at_seed_position == target_id and not is_visited[global_seed - already_processed_bbox.topleft]): logger.info( f"Handling chunk {chunk_count} with current cube {current_cube}" ) time_start("read data") cube_data = data_mag.read(current_cube, cube_size) cube_data = cube_data[0, :, :, :] time_stop("read data") seeds_in_current_chunk: Set[Vec3Int] = set() seeds_in_current_chunk.add(relative_seed) time_start("traverse cube") while len(seeds_in_current_chunk) > 0: current_relative_seed = seeds_in_current_chunk.pop() current_global_seed = current_cube + current_relative_seed if already_processed_bbox.contains(current_global_seed): is_visited[current_global_seed - already_processed_bbox.topleft] = 1 if cube_data[current_relative_seed] != target_id: cube_data[current_relative_seed] = target_id dirty_bucket = True # check neighbors for neighbor in NEIGHBORS: neighbor_pos = current_relative_seed + neighbor global_neighbor_pos = current_cube + neighbor_pos if already_processed_bbox.contains(global_neighbor_pos): if is_visited[global_neighbor_pos - already_processed_bbox.topleft]: continue if cube_bbox.contains(neighbor_pos): if cube_data[neighbor_pos] == source_id or ( already_processed_bbox.contains( global_neighbor_pos) and cube_data[neighbor_pos] == target_id): seeds_in_current_chunk.add(neighbor_pos) else: chunk_with_relative_seed.append( get_chunk_pos_and_offset(global_neighbor_pos, cube_size)) time_stop("traverse cube") if dirty_bucket: time_start("write chunk") data_mag.write(cube_data, current_cube) time_stop("write chunk")
def cubing_job( args: Tuple[View, Mag, InterpolationModes, List[str], int, bool, Optional[int], Optional[int], str, int, ] ) -> Any: ( target_view, target_mag, interpolation_mode, source_file_batches, batch_size, pad, channel_index, sample_index, dtype, num_channels, ) = args downsampling_needed = target_mag != Mag(1) largest_value_in_chunk = 0 # This is used to compute the largest_segmentation_id if it is a segmentation layer max_image_size = (target_view.size[0], target_view.size[1]) # Iterate over batches of continuous z sections # The batches have a maximum size of `batch_size` # Batched iterations allows to utilize IO more efficiently first_z_idx = target_view.global_offset.z for source_file_batch in get_chunks(source_file_batches, batch_size): try: time_start( f"Cubing of z={first_z_idx}-{first_z_idx + len(source_file_batch)}" ) # Allocate a large buffer for all images in this batch # Shape will be (channel_count, x, y, z) # Using fortran order for the buffer, prevents that the data has to be copied in rust buffer_shape = ([num_channels] + list(max_image_size) + [len(source_file_batch)]) buffer = np.empty(buffer_shape, dtype=dtype, order="F") # Iterate over each z section in the batch for i, file_name in enumerate(source_file_batch): z = first_z_idx + i # Image shape will be (x, y, channel_count, z=1) image = read_image_file( file_name, target_view.info.voxel_type, z, channel_index, sample_index, ) if pad: image = np.pad( image, mode="constant", pad_width=[ (0, max_image_size[0] - image.shape[0]), (0, max_image_size[1] - image.shape[1]), (0, 0), (0, 0), ], ) else: assert ( image.shape[0:2] == max_image_size ), "Section z={} has the wrong dimensions: {} (expected {}). Consider using --pad.".format( z, image.shape, max_image_size) buffer[:, :, :, i] = image.transpose((2, 0, 1, 3))[:, :, :, 0] del image if downsampling_needed: buffer = downsample_unpadded_data(buffer, target_mag, interpolation_mode) buffer_z_offset = (first_z_idx - target_view.global_offset.z) // target_mag.z target_view.write(offset=(0, 0, buffer_z_offset), data=buffer) largest_value_in_chunk = max(largest_value_in_chunk, np.max(buffer)) time_stop( f"Cubing of z={first_z_idx}-{first_z_idx + len(source_file_batch)}" ) first_z_idx += len(source_file_batch) except Exception as exc: logging.error("Cubing of z={}-{} failed with {}".format( first_z_idx, first_z_idx + len(source_file_batch), exc)) raise exc return largest_value_in_chunk
def convert_nifti( source_nifti_path: Path, target_path: Path, layer_name: str, dtype: str, voxel_size: Tuple[float, ...], data_format: DataFormat, chunk_size: Vec3Int, chunks_per_shard: Vec3Int, is_segmentation_layer: bool = False, bbox_to_enforce: Optional[BoundingBox] = None, use_orientation_header: bool = False, flip_axes: Optional[Union[int, Tuple[int, ...]]] = None, ) -> None: shard_size = chunk_size * chunks_per_shard time_start(f"Converting of {source_nifti_path}") source_nifti = nib.load(str(source_nifti_path.resolve())) if use_orientation_header: # Get canonical representation of data to incorporate # encoded transformations. Needs to be flipped later # to match the coordinate system of WKW. source_nifti = nib.funcs.as_closest_canonical(source_nifti, enforce_diag=False) cube_data = np.array(source_nifti.get_fdata()) category_type: LayerCategoryType = ("segmentation" if is_segmentation_layer else "color") logging.debug(f"Assuming {category_type} as layer type for {layer_name}") if len(source_nifti.shape) == 3: cube_data = cube_data.reshape((1, ) + source_nifti.shape) elif len(source_nifti.shape) == 4: cube_data = np.transpose(cube_data, (3, 0, 1, 2)) else: logging.warning( "Converting of {} failed! Too many or too less dimensions".format( source_nifti_path)) return if use_orientation_header: # Flip y and z to transform data into wkw's coordinate system. cube_data = np.flip(cube_data, (2, 3)) if flip_axes: cube_data = np.flip(cube_data, flip_axes) if voxel_size is None: voxel_size = tuple(map(float, source_nifti.header["pixdim"][:3])) logging.info(f"Using voxel_size: {voxel_size}") cube_data = to_target_datatype(cube_data, dtype, is_segmentation_layer) # everything needs to be padded to if bbox_to_enforce is not None: target_topleft = np.array((0, ) + tuple(bbox_to_enforce.topleft)) target_size = np.array((1, ) + tuple(bbox_to_enforce.size)) cube_data = pad_or_crop_to_size_and_topleft(cube_data, target_size, target_topleft) # Writing wkw compressed requires files of shape (shard_size, shard_size, shard_size) # Pad data accordingly padding_offset = shard_size - np.array(cube_data.shape[1:4]) % shard_size cube_data = np.pad( cube_data, ( (0, 0), (0, int(padding_offset[0])), (0, int(padding_offset[1])), (0, int(padding_offset[2])), ), ) wk_ds = Dataset( target_path, voxel_size=cast(Tuple[float, float, float], voxel_size or (1, 1, 1)), exist_ok=True, ) wk_layer = (wk_ds.get_or_add_layer( layer_name, category_type, dtype_per_layer=np.dtype(dtype), data_format=data_format, largest_segment_id=int(np.max(cube_data) + 1), ) if is_segmentation_layer else wk_ds.get_or_add_layer( layer_name, category_type, data_format=data_format, dtype_per_layer=np.dtype(dtype), )) wk_mag = wk_layer.get_or_add_mag("1", chunk_size=chunk_size, chunks_per_shard=chunks_per_shard) wk_mag.write(cube_data) time_stop(f"Converting of {source_nifti_path}")