def upsample_cube_job( args: Tuple[View, View, int], mag_factors: List[float], buffer_shape: Vec3Int, ) -> None: (source_view, target_view, _i) = args assert all( 1 >= f for f in mag_factors ), f"mag_factors ({mag_factors}) for upsampling must be smaller than 1" try: num_channels = target_view.info.num_channels target_size = target_view.bounding_box.in_mag(target_view.mag).size shape = (num_channels, ) + target_size.to_tuple() file_buffer = np.zeros(shape, target_view.get_dtype()) tiles = product(*list([ list(range(0, math.ceil(len))) for len in target_size.to_np() / buffer_shape.to_np() ])) for tile in tiles: target_offset = Vec3Int(tile) * buffer_shape source_offset = _vec3int_mulf(target_offset, mag_factors) source_size = source_view.bounding_box.in_mag(source_view.mag).size source_size = _vec3int_mulf( buffer_shape, mag_factors).pairmin(source_size - source_offset) bbox = BoundingBox(source_offset, source_size) cube_buffer_channels = source_view.read( relative_bounding_box=bbox.from_mag_to_mag1(source_view.mag), ) for channel_index in range(num_channels): cube_buffer = cube_buffer_channels[channel_index] if not np.all(cube_buffer == 0): # Upsample the buffer inverse_factors = [int(1 / f) for f in mag_factors] data_cube = upsample_cube(cube_buffer, inverse_factors) buffer_offset = target_offset buffer_end = buffer_offset + data_cube.shape file_buffer[channel_index, buffer_offset[0]:buffer_end[0], buffer_offset[1]:buffer_end[1], buffer_offset[2]:buffer_end[2], ] = data_cube # Write the upsampled buffer to target if source_view.info.num_channels == 1: file_buffer = file_buffer[0] # remove channel dimension target_view.write(file_buffer) except Exception as exc: logging.error( f"Upsampling of target {target_view.bounding_box} failed with {exc}" ) raise exc
def _parse_bounding_box(cls, bounding_box_element: Element) -> BoundingBox: topleft = ( int(bounding_box_element.get("topLeftX", 0)), int(bounding_box_element.get("topLeftY", 0)), int(bounding_box_element.get("topLeftZ", 0)), ) size = ( int(bounding_box_element.get("width", 0)), int(bounding_box_element.get("height", 0)), int(bounding_box_element.get("depth", 0)), ) color = None if bounding_box_element.get("color.r"): # also checks for empty strings color = ( float(enforce_not_null(bounding_box_element.get("color.r"))), float(enforce_not_null(bounding_box_element.get("color.g"))), float(enforce_not_null(bounding_box_element.get("color.b"))), float(enforce_not_null(bounding_box_element.get("color.a"))), ) return BoundingBox( topleft, size, name=bounding_box_element.get("name"), is_visible=bounding_box_element.get("isVisible", "true") == "true", id=bounding_box_element.get("id"), color=color, )
def test_buffered_slice_reader_along_different_axis(tmp_path: Path) -> None: test_cube = (np.random.random((3, 13, 13, 13)) * 100).astype(np.uint8) cube_size_without_channel = Vec3Int(test_cube.shape[1:]) offset = Vec3Int(5, 10, 20) for dim in [0, 1, 2]: ds = Dataset(tmp_path / f"buffered_slice_reader_{dim}", voxel_size=(1, 1, 1)) mag_view = ds.add_layer("color", COLOR_CATEGORY, num_channels=3).add_mag(1) mag_view.write(test_cube, absolute_offset=offset) with mag_view.get_buffered_slice_reader( buffer_size=5, dimension=dim) as reader_a, mag_view.get_buffered_slice_reader( absolute_bounding_box=BoundingBox( offset, cube_size_without_channel), buffer_size=5, dimension=dim, ) as reader_b: i = 0 for slice_data_a, slice_data_b in zip(reader_a, reader_b): if dim == 0: original_slice = test_cube[:, i, :, :] elif dim == 1: original_slice = test_cube[:, :, i, :] else: # dim == 2 original_slice = test_cube[:, :, :, i] i += 1 assert np.array_equal(slice_data_a, original_slice) assert np.array_equal(slice_data_b, original_slice)
def bounding_box(self, bbox: BoundingBox) -> None: """ Updates the offset and size of the bounding box of this layer in the properties. """ self.dataset._ensure_writable() assert ( bbox.topleft.is_positive() ), f"Updating the bounding box of layer {self} to {bbox} failed, topleft must not contain negative dimensions." self._properties.bounding_box = bbox self.dataset._export_as_json() for mag in self.mags.values(): mag._array.ensure_size( bbox.align_with_mag(mag.mag).in_mag(mag.mag).bottomright)
def main(args: argparse.Namespace) -> None: # Use the skeleton API to read the bounding boxes once # https://github.com/scalableminds/webknossos-libs/issues/482 is done. nml_regex = re.compile( r'<userBoundingBox .*name="Limits of flood-fill \(source_id=(\d+), target_id=(\d+), seed=([\d,]+), timestamp=(\d+)\)".*topLeftX="(\d+)" topLeftY="(\d+)" topLeftZ="(\d+)" width="(\d+)" height="(\d+)" depth="(\d+)" />' ) bboxes: List[FloodFillBbox] = [] nml_file = open(args.nml_path, "r", encoding="utf-8") lines = nml_file.readlines() nml_file.close() for line in lines: matches = nml_regex.findall(line) for match in matches: # each match is a tuple of (source_id, target_id, seed, timestamp, top_left_x, top_left_y, top_left_z, width, height, depth bboxes.append( FloodFillBbox( bounding_box=BoundingBox((match[4], match[5], match[6]), (match[7], match[8], match[9])), seed_position=Vec3Int(match[2].split(",")), source_id=int(match[0]), target_id=int(match[1]), timestamp=int(match[3]), )) bboxes = sorted(bboxes, key=lambda x: x.timestamp) time_start("Merge with fallback layer") data_mag = merge_with_fallback_layer( args.output_path, args.volume_path, args.segmentation_layer_path, ) time_stop("Merge with fallback layer") time_start("All floodfills") for floodfill in bboxes: time_start("Floodfill") execute_floodfill( data_mag, floodfill.seed_position, floodfill.bounding_box, floodfill.source_id, floodfill.target_id, ) time_stop("Floodfill") time_stop("All floodfills") time_start("Recompute downsampled mags") data_mag.layer.redownsample() time_stop("Recompute downsampled mags")
def check_properties(annotation: wk.Annotation) -> None: assert len(annotation.user_bounding_boxes) == 2 assert annotation.user_bounding_boxes[0].topleft.x == 2371 assert annotation.user_bounding_boxes[0].name == "Bounding box 1" assert annotation.user_bounding_boxes[0].is_visible assert annotation.user_bounding_boxes[0].id == "1" assert annotation.user_bounding_boxes[1] == BoundingBox( (371, 4063, 1676), (891, 579, 232)) assert annotation.user_bounding_boxes[1].name == "Bounding box 2" assert not annotation.user_bounding_boxes[1].is_visible assert annotation.user_bounding_boxes[1].color == ( 0.2705882489681244, 0.6470588445663452, 0.19607843458652496, 1.0, )
def _get_slice_generator(self) -> Generator[np.ndarray, None, None]: for batch in get_chunks( list( range( self.bbox_current_mag.topleft[self.dimension], self.bbox_current_mag.bottomright[self.dimension], ) ), self.buffer_size, ): n_slices = len(batch) batch_start_idx = batch[0] assert ( n_slices <= self.buffer_size ), f"n_slices should at most be batch_size, but {n_slices} > {self.buffer_size}" bbox_offset = self.bbox_current_mag.topleft bbox_size = self.bbox_current_mag.size buffer_bounding_box = BoundingBox.from_tuple2( ( bbox_offset[: self.dimension] + (batch_start_idx,) + bbox_offset[self.dimension + 1 :], bbox_size[: self.dimension] + (n_slices,) + bbox_size[self.dimension + 1 :], ) ) if self.use_logging: info( f"({getpid()}) Reading {n_slices} slices at position {batch_start_idx}." ) data = self.view.read( absolute_bounding_box=buffer_bounding_box.from_mag_to_mag1( self.view.mag ) ) for current_slice in np.rollaxis( data, self.dimension + 1 ): # The '+1' is important because the first dimension is the channel yield current_slice
def __init__( self, view: "View", offset: Optional[Vec3IntLike] = None, size: Optional[Vec3IntLike] = None, # buffer_size specifies, how many slices should be aggregated until they are flushed. buffer_size: int = 32, dimension: int = 2, # z *, relative_bounding_box: Optional[BoundingBox] = None, # in mag1 absolute_bounding_box: Optional[BoundingBox] = None, # in mag1 use_logging: bool = False, ) -> None: """see `View.get_buffered_slice_reader()`""" self.view = view self.buffer_size = buffer_size self.dtype = self.view.get_dtype() assert 0 <= dimension <= 2 self.dimension = dimension self.use_logging = use_logging if offset is not None and size is not None: warnings.warn( "[DEPRECATION] Using offset and size for a buffered slice reader is deprecated. " + "Please use the parameter relative_bounding_box or absolute_bounding_box in Mag(1) instead.", DeprecationWarning, ) assert relative_bounding_box is None and absolute_bounding_box is None absolute_bounding_box = BoundingBox(offset, size).from_mag_to_mag1(view.mag) offset = None size = None assert ( offset is None and size is None ), "You have to set both offset and size or none of both." if relative_bounding_box is None and absolute_bounding_box is None: absolute_bounding_box = view.bounding_box if relative_bounding_box is not None: assert absolute_bounding_box is None absolute_bounding_box = relative_bounding_box.offset( view.bounding_box.topleft ) assert absolute_bounding_box is not None self.bbox_current_mag = absolute_bounding_box.in_mag(view.mag)
def merge_with_fallback_layer( output_path: Path, volume_annotation_path: Path, segmentation_layer_path: Path, ) -> MagView: assert not output_path.exists(), f"Dataset at {output_path} already exists" # Prepare output dataset by creatign a shallow copy of the dataset # determined by segmentation_layer_path, but do a deep copy of # segmentation_layer_path itself (so that we can mutate it). input_segmentation_dataset = wk.Dataset.open( segmentation_layer_path.parent) time_start("Prepare output dataset") output_dataset = input_segmentation_dataset.shallow_copy_dataset( output_path, name=output_path.name, make_relative=True, layers_to_ignore=[segmentation_layer_path.name], ) output_layer = output_dataset.add_copy_layer(segmentation_layer_path, segmentation_layer_path.name) time_stop("Prepare output dataset") input_segmentation_mag = input_segmentation_dataset.get_layer( segmentation_layer_path.name).get_finest_mag() with temporary_annotation_view( volume_annotation_path) as input_annotation_layer: input_annotation_mag = input_annotation_layer.get_finest_mag() bboxes = [ bbox.in_mag(input_annotation_mag._mag) for bbox in input_annotation_mag.get_bounding_boxes_on_disk() ] output_mag = output_layer.get_mag(input_segmentation_mag.mag) cube_size = output_mag.info.chunk_size[ 0] * output_mag.info.chunks_per_shard[0] chunks_with_bboxes = BoundingBox.group_boxes_with_aligned_mag( bboxes, Mag(cube_size)) assert (input_annotation_mag.info.chunks_per_shard == Vec3Int.ones() ), "volume annotation must have file_len=1" assert (input_annotation_mag.info.voxel_type == input_segmentation_mag.info.voxel_type ), "Volume annotation must have same dtype as fallback layer" chunk_count = 0 for chunk, bboxes in chunks_with_bboxes.items(): chunk_count += 1 logger.info(f"Processing chunk {chunk_count}...") time_start("Read chunk") data_buffer = output_mag.read(chunk.topleft, chunk.size)[0, :, :, :] time_stop("Read chunk") time_start("Read/merge bboxes") for bbox in bboxes: read_data = input_annotation_mag.read(bbox.topleft, bbox.size) data_buffer[bbox.offset( -chunk.topleft).to_slices()] = read_data time_stop("Read/merge bboxes") time_start("Write chunk") output_mag.write(data_buffer, chunk.topleft) time_stop("Write chunk") return output_mag
def execute_floodfill( data_mag: MagView, seed_position: Vec3Int, already_processed_bbox: BoundingBox, source_id: int, target_id: int, ) -> None: cube_size = data_mag.info.shard_size cube_bbox = BoundingBox(Vec3Int(0, 0, 0), cube_size) chunk_with_relative_seed: List[Tuple[Vec3Int, Vec3Int]] = [ get_chunk_pos_and_offset(seed_position, cube_size) ] # The `is_visited` variable is used to know which parts of the already processed bbox # were already traversed. Outside of that bounding box, the actual data already # is an indicator of whether the flood-fill has reached a voxel. is_visited = np.zeros(already_processed_bbox.size.to_tuple(), dtype=np.uint8) chunk_count = 0 while len(chunk_with_relative_seed) > 0: chunk_count += 1 if chunk_count % 10000 == 0: logger.info(f"Handled seed positions {chunk_count}") dirty_bucket = False current_cube, relative_seed = chunk_with_relative_seed.pop() global_seed = current_cube + relative_seed # Only reading one voxel for the seed can be up to 30,000 times faster # which is very relevent, since the chunk doesn't need to be traversed # if the seed voxel was already covered. value_at_seed_position = data_mag.read(current_cube + relative_seed, (1, 1, 1)) if value_at_seed_position == source_id or ( already_processed_bbox.contains(global_seed) and value_at_seed_position == target_id and not is_visited[global_seed - already_processed_bbox.topleft]): logger.info( f"Handling chunk {chunk_count} with current cube {current_cube}" ) time_start("read data") cube_data = data_mag.read(current_cube, cube_size) cube_data = cube_data[0, :, :, :] time_stop("read data") seeds_in_current_chunk: Set[Vec3Int] = set() seeds_in_current_chunk.add(relative_seed) time_start("traverse cube") while len(seeds_in_current_chunk) > 0: current_relative_seed = seeds_in_current_chunk.pop() current_global_seed = current_cube + current_relative_seed if already_processed_bbox.contains(current_global_seed): is_visited[current_global_seed - already_processed_bbox.topleft] = 1 if cube_data[current_relative_seed] != target_id: cube_data[current_relative_seed] = target_id dirty_bucket = True # check neighbors for neighbor in NEIGHBORS: neighbor_pos = current_relative_seed + neighbor global_neighbor_pos = current_cube + neighbor_pos if already_processed_bbox.contains(global_neighbor_pos): if is_visited[global_neighbor_pos - already_processed_bbox.topleft]: continue if cube_bbox.contains(neighbor_pos): if cube_data[neighbor_pos] == source_id or ( already_processed_bbox.contains( global_neighbor_pos) and cube_data[neighbor_pos] == target_id): seeds_in_current_chunk.add(neighbor_pos) else: chunk_with_relative_seed.append( get_chunk_pos_and_offset(global_neighbor_pos, cube_size)) time_stop("traverse cube") if dirty_bucket: time_start("write chunk") data_mag.write(cube_data, current_cube) time_stop("write chunk")
def download_dataset( dataset_name: str, organization_id: str, sharing_token: Optional[str] = None, bbox: Optional[BoundingBox] = None, layers: Optional[List[str]] = None, mags: Optional[List[Mag]] = None, path: Optional[Union[PathLike, str]] = None, exist_ok: bool = False, ) -> Dataset: context = _get_context() client = context.generated_client dataset_info_response = dataset_info.sync_detailed( organization_name=organization_id, data_set_name=dataset_name, client=client, sharing_token=sharing_token, ) assert dataset_info_response.status_code == 200, dataset_info_response parsed = dataset_info_response.parsed assert parsed is not None datastore_client = context.get_generated_datastore_client( parsed.data_store.url) optional_datastore_token = sharing_token or context.datastore_token actual_path = Path(dataset_name) if path is None else Path(path) if actual_path.exists(): logger.warning(f"{actual_path} already exists, skipping download.") return Dataset.open(actual_path) voxel_size = cast(Tuple[float, float, float], tuple(parsed.data_source.scale)) data_layers = parsed.data_source.data_layers dataset = Dataset(actual_path, name=parsed.name, voxel_size=voxel_size, exist_ok=exist_ok) for layer_name in layers or [i.name for i in data_layers]: response_layers = [i for i in data_layers if i.name == layer_name] assert ( len(response_layers) > 0 ), f"The provided layer name {layer_name} could not be found in the requested dataset." assert ( len(response_layers) == 1 ), f"The provided layer name {layer_name} was found multiple times in the requested dataset." response_layer = response_layers[0] category = cast(LayerCategoryType, response_layer.category) layer = dataset.add_layer( layer_name=layer_name, category=category, dtype_per_layer=response_layer.element_class, num_channels=3 if response_layer.element_class == "uint24" else 1, largest_segment_id=response_layer.additional_properties.get( "largestSegmentId", None), ) default_view_configuration_dict = None if not isinstance(response_layer.default_view_configuration, Unset): default_view_configuration_dict = ( response_layer.default_view_configuration.to_dict()) if default_view_configuration_dict is not None: default_view_configuration = dataset_converter.structure( default_view_configuration_dict, LayerViewConfiguration) layer.default_view_configuration = default_view_configuration if bbox is None: response_bbox = response_layer.bounding_box layer.bounding_box = BoundingBox( response_bbox.top_left, (response_bbox.width, response_bbox.height, response_bbox.depth), ) else: assert isinstance( bbox, BoundingBox ), f"Expected a BoundingBox object for the bbox parameter but got {type(bbox)}" layer.bounding_box = bbox if mags is None: mags = [Mag(mag) for mag in response_layer.resolutions] for mag in mags: mag_view = layer.get_or_add_mag( mag, compress=True, chunk_size=Vec3Int.full(32), chunks_per_shard=_DOWNLOAD_CHUNK_SIZE // 32, ) aligned_bbox = layer.bounding_box.align_with_mag(mag, ceil=True) download_chunk_size_in_mag = _DOWNLOAD_CHUNK_SIZE * mag.to_vec3_int( ) for chunk in track( list( aligned_bbox.chunk(download_chunk_size_in_mag, download_chunk_size_in_mag)), description=f"Downloading layer={layer.name} mag={mag}", ): chunk_in_mag = chunk.in_mag(mag) response = dataset_download.sync_detailed( organization_name=organization_id, data_set_name=dataset_name, data_layer_name=layer_name, mag=mag.to_long_layer_name(), client=datastore_client, token=optional_datastore_token, x=chunk.topleft.x, y=chunk.topleft.y, z=chunk.topleft.z, width=chunk_in_mag.size.x, height=chunk_in_mag.size.y, depth=chunk_in_mag.size.z, ) assert response.status_code == 200, response assert ( response.headers["missing-buckets"] == "[]" ), f"Download contained missing buckets {response.headers['missing-buckets']}." data = np.frombuffer(response.content, dtype=layer.dtype_per_channel).reshape( layer.num_channels, *chunk_in_mag.size, order="F") mag_view.write(data, absolute_offset=chunk.topleft) return dataset
class DatasetProperties: id: Dict[str, str] scale: Tuple[float, float, float] data_layers: List[Union[SegmentationLayerProperties, LayerProperties, ]] default_view_configuration: Optional[DatasetViewConfiguration] = None # --- Converter -------------------- dataset_converter = cattr.Converter() # register (un-)structure hooks for non-attr-classes bbox_to_wkw: Callable[[BoundingBox], dict] = lambda o: o.to_wkw_dict() dataset_converter.register_unstructure_hook(BoundingBox, bbox_to_wkw) dataset_converter.register_structure_hook( BoundingBox, lambda d, _: BoundingBox.from_wkw_dict(d)) def mag_unstructure(mag: Mag) -> List[int]: return mag.to_list() dataset_converter.register_unstructure_hook(Mag, mag_unstructure) dataset_converter.register_structure_hook(Mag, lambda d, _: Mag(d)) vec3int_to_array: Callable[[Vec3Int], List[int]] = lambda o: o.to_list() dataset_converter.register_unstructure_hook(Vec3Int, vec3int_to_array) dataset_converter.register_structure_hook( Vec3Int, lambda d, _: Vec3Int.full(d) if isinstance(d, int) else Vec3Int(d))