def convert_cube_job(
    source_knossos_info: KnossosDatasetInfo, args: Tuple[View, int]
) -> None:
    target_view, _ = args

    time_start(f"Converting of {target_view.bounding_box}")
    cube_size = cast(Tuple[int, int, int], (CUBE_EDGE_LEN,) * 3)

    offset = target_view.bounding_box.in_mag(target_view.mag).topleft
    size = target_view.bounding_box.in_mag(target_view.mag).size
    buffer = np.zeros(size.to_tuple(), dtype=target_view.get_dtype())
    with open_knossos(source_knossos_info) as source_knossos:
        for x in range(0, size.x, CUBE_EDGE_LEN):
            for y in range(0, size.y, CUBE_EDGE_LEN):
                for z in range(0, size.z, CUBE_EDGE_LEN):
                    cube_data = source_knossos.read(
                        (offset + Vec3Int(x, y, z)).to_tuple(), cube_size
                    )
                    buffer[
                        x : (x + CUBE_EDGE_LEN),
                        y : (y + CUBE_EDGE_LEN),
                        z : (z + CUBE_EDGE_LEN),
                    ] = cube_data
    target_view.write(buffer)

    time_stop(f"Converting of {target_view.bounding_box}")
Exemplo n.º 2
0
def convert_raw(
    source_raw_path: Path,
    target_path: Path,
    layer_name: str,
    input_dtype: str,
    shape: Tuple[int, int, int],
    data_format: DataFormat,
    chunk_size: Vec3Int,
    chunks_per_shard: Vec3Int,
    order: str = "F",
    voxel_size: Optional[Tuple[float, float, float]] = (1.0, 1.0, 1.0),
    flip_axes: Optional[Union[int, Tuple[int, ...]]] = None,
    compress: bool = True,
    executor_args: Optional[argparse.Namespace] = None,
) -> MagView:
    assert order in ("C", "F")
    time_start(f"Conversion of {source_raw_path}")

    if voxel_size is None:
        voxel_size = 1.0, 1.0, 1.0
    wk_ds = Dataset(target_path, voxel_size=voxel_size, exist_ok=True)
    wk_layer = wk_ds.get_or_add_layer(
        layer_name,
        "color",
        dtype_per_layer=np.dtype(input_dtype),
        num_channels=1,
        data_format=data_format,
    )
    wk_layer.bounding_box = BoundingBox((0, 0, 0), shape)
    wk_mag = wk_layer.get_or_add_mag("1",
                                     chunk_size=chunk_size,
                                     chunks_per_shard=chunks_per_shard,
                                     compress=compress)

    # Parallel chunk conversion
    with get_executor_for_args(executor_args) as executor:
        wait_and_ensure_success(
            executor.map_to_futures(
                partial(
                    _raw_chunk_converter,
                    source_raw_path=source_raw_path,
                    target_mag_view=wk_mag,
                    input_dtype=input_dtype,
                    shape=shape,
                    order=order,
                    flip_axes=flip_axes,
                ),
                wk_layer.bounding_box.chunk(chunk_size=chunk_size *
                                            chunks_per_shard),
            ))

    time_stop(f"Conversion of {source_raw_path}")
    return wk_mag
Exemplo n.º 3
0
def main(args: argparse.Namespace) -> None:

    # Use the skeleton API to read the bounding boxes once
    # https://github.com/scalableminds/webknossos-libs/issues/482 is done.
    nml_regex = re.compile(
        r'<userBoundingBox .*name="Limits of flood-fill \(source_id=(\d+), target_id=(\d+), seed=([\d,]+), timestamp=(\d+)\)".*topLeftX="(\d+)" topLeftY="(\d+)" topLeftZ="(\d+)" width="(\d+)" height="(\d+)" depth="(\d+)" />'
    )

    bboxes: List[FloodFillBbox] = []
    nml_file = open(args.nml_path, "r", encoding="utf-8")
    lines = nml_file.readlines()
    nml_file.close()
    for line in lines:
        matches = nml_regex.findall(line)
        for match in matches:
            # each match is a tuple of (source_id, target_id, seed, timestamp, top_left_x, top_left_y, top_left_z, width, height, depth
            bboxes.append(
                FloodFillBbox(
                    bounding_box=BoundingBox((match[4], match[5], match[6]),
                                             (match[7], match[8], match[9])),
                    seed_position=Vec3Int(match[2].split(",")),
                    source_id=int(match[0]),
                    target_id=int(match[1]),
                    timestamp=int(match[3]),
                ))
    bboxes = sorted(bboxes, key=lambda x: x.timestamp)

    time_start("Merge with fallback layer")
    data_mag = merge_with_fallback_layer(
        args.output_path,
        args.volume_path,
        args.segmentation_layer_path,
    )
    time_stop("Merge with fallback layer")

    time_start("All floodfills")
    for floodfill in bboxes:
        time_start("Floodfill")
        execute_floodfill(
            data_mag,
            floodfill.seed_position,
            floodfill.bounding_box,
            floodfill.source_id,
            floodfill.target_id,
        )
        time_stop("Floodfill")
    time_stop("All floodfills")

    time_start("Recompute downsampled mags")
    data_mag.layer.redownsample()
    time_stop("Recompute downsampled mags")
Exemplo n.º 4
0
def tile_cubing_job(
    args: Tuple[View, List[int], str, int, Tuple[int, int, int],
                Dict[str, int], Dict[str, int], Dict[str, int], str, int, ]
) -> int:
    (
        target_view,
        z_batches,
        input_path_pattern,
        batch_size,
        tile_size,
        min_dimensions,
        max_dimensions,
        decimal_lengths,
        dtype,
        num_channels,
    ) = args
    largest_value_in_chunk = 0  # This is used to compute the largest_segmentation_id if it is a segmentation layer

    # Iterate over the z batches
    # Batching is useful to utilize IO more efficiently
    for z_batch in get_chunks(z_batches, batch_size):
        try:
            time_start(f"Cubing of z={z_batch[0]}-{z_batch[-1]}")
            for x in range(min_dimensions["x"], max_dimensions["x"] + 1):
                for y in range(min_dimensions["y"], max_dimensions["y"] + 1):
                    # Allocate a large buffer for all images in this batch
                    # Shape will be (channel_count, x, y, z)
                    # Using fortran order for the buffer, prevents that the data has to be copied in rust
                    buffer_shape = [
                        num_channels,
                        tile_size[0],
                        tile_size[1],
                        len(z_batch),
                    ]
                    buffer = np.empty(buffer_shape, dtype=dtype, order="F")
                    for z in z_batch:
                        # Read file if exists or use zeros instead
                        file_name = find_file_with_dimensions(
                            input_path_pattern, x, y, z, decimal_lengths)
                        if file_name:
                            # read the image
                            image = read_image_file(
                                file_name,
                                target_view.header.voxel_type,
                                z,
                                None,
                                None,
                            )
                        else:
                            # add zeros instead
                            image = np.zeros(
                                tile_size + (1, ),
                                dtype=target_view.header.voxel_type,
                            )
                        # The size of a image might be smaller than the buffer, if the tile is at the bottom/right border
                        buffer[:, :image.shape[0], :image.shape[1],
                               z - z_batch[0]] = image.transpose(
                                   (2, 0, 1, 3))[:, :, :, 0]

                    if np.any(buffer != 0):
                        offset = (
                            (x - min_dimensions["x"]) * tile_size[0],
                            (y - min_dimensions["y"]) * tile_size[1],
                            z_batch[0] - target_view.global_offset.z,
                        )
                        target_view.write(data=buffer, offset=offset)
                        largest_value_in_chunk = max(largest_value_in_chunk,
                                                     np.max(buffer))
            time_stop(f"Cubing of z={z_batch[0]}-{z_batch[-1]}")
        except Exception as exc:
            logging.error("Cubing of z={}-{} failed with: {}".format(
                z_batch[0], z_batch[-1], exc))
            raise exc
    return largest_value_in_chunk
Exemplo n.º 5
0
def merge_with_fallback_layer(
    output_path: Path,
    volume_annotation_path: Path,
    segmentation_layer_path: Path,
) -> MagView:

    assert not output_path.exists(), f"Dataset at {output_path} already exists"

    # Prepare output dataset by creatign a shallow copy of the dataset
    # determined by segmentation_layer_path, but do a deep copy of
    # segmentation_layer_path itself (so that we can mutate it).
    input_segmentation_dataset = wk.Dataset.open(
        segmentation_layer_path.parent)
    time_start("Prepare output dataset")
    output_dataset = input_segmentation_dataset.shallow_copy_dataset(
        output_path,
        name=output_path.name,
        make_relative=True,
        layers_to_ignore=[segmentation_layer_path.name],
    )
    output_layer = output_dataset.add_copy_layer(segmentation_layer_path,
                                                 segmentation_layer_path.name)
    time_stop("Prepare output dataset")

    input_segmentation_mag = input_segmentation_dataset.get_layer(
        segmentation_layer_path.name).get_finest_mag()
    with temporary_annotation_view(
            volume_annotation_path) as input_annotation_layer:
        input_annotation_mag = input_annotation_layer.get_finest_mag()
        bboxes = [
            bbox.in_mag(input_annotation_mag._mag)
            for bbox in input_annotation_mag.get_bounding_boxes_on_disk()
        ]
        output_mag = output_layer.get_mag(input_segmentation_mag.mag)

        cube_size = output_mag.info.chunk_size[
            0] * output_mag.info.chunks_per_shard[0]
        chunks_with_bboxes = BoundingBox.group_boxes_with_aligned_mag(
            bboxes, Mag(cube_size))

        assert (input_annotation_mag.info.chunks_per_shard == Vec3Int.ones()
                ), "volume annotation must have file_len=1"
        assert (input_annotation_mag.info.voxel_type ==
                input_segmentation_mag.info.voxel_type
                ), "Volume annotation must have same dtype as fallback layer"

        chunk_count = 0
        for chunk, bboxes in chunks_with_bboxes.items():
            chunk_count += 1
            logger.info(f"Processing chunk {chunk_count}...")

            time_start("Read chunk")
            data_buffer = output_mag.read(chunk.topleft,
                                          chunk.size)[0, :, :, :]
            time_stop("Read chunk")

            time_start("Read/merge bboxes")
            for bbox in bboxes:
                read_data = input_annotation_mag.read(bbox.topleft, bbox.size)
                data_buffer[bbox.offset(
                    -chunk.topleft).to_slices()] = read_data
            time_stop("Read/merge bboxes")

            time_start("Write chunk")
            output_mag.write(data_buffer, chunk.topleft)
            time_stop("Write chunk")
    return output_mag
Exemplo n.º 6
0
def execute_floodfill(
    data_mag: MagView,
    seed_position: Vec3Int,
    already_processed_bbox: BoundingBox,
    source_id: int,
    target_id: int,
) -> None:
    cube_size = data_mag.info.shard_size
    cube_bbox = BoundingBox(Vec3Int(0, 0, 0), cube_size)
    chunk_with_relative_seed: List[Tuple[Vec3Int, Vec3Int]] = [
        get_chunk_pos_and_offset(seed_position, cube_size)
    ]

    # The `is_visited` variable is used to know which parts of the already processed bbox
    # were already traversed. Outside of that bounding box, the actual data already
    # is an indicator of whether the flood-fill has reached a voxel.
    is_visited = np.zeros(already_processed_bbox.size.to_tuple(),
                          dtype=np.uint8)
    chunk_count = 0

    while len(chunk_with_relative_seed) > 0:
        chunk_count += 1
        if chunk_count % 10000 == 0:
            logger.info(f"Handled seed positions {chunk_count}")

        dirty_bucket = False
        current_cube, relative_seed = chunk_with_relative_seed.pop()
        global_seed = current_cube + relative_seed

        # Only reading one voxel for the seed can be up to 30,000 times faster
        # which is very relevent, since the chunk doesn't need to be traversed
        # if the seed voxel was already covered.
        value_at_seed_position = data_mag.read(current_cube + relative_seed,
                                               (1, 1, 1))

        if value_at_seed_position == source_id or (
                already_processed_bbox.contains(global_seed)
                and value_at_seed_position == target_id and
                not is_visited[global_seed - already_processed_bbox.topleft]):
            logger.info(
                f"Handling chunk {chunk_count} with current cube {current_cube}"
            )
            time_start("read data")
            cube_data = data_mag.read(current_cube, cube_size)
            cube_data = cube_data[0, :, :, :]
            time_stop("read data")

            seeds_in_current_chunk: Set[Vec3Int] = set()
            seeds_in_current_chunk.add(relative_seed)

            time_start("traverse cube")
            while len(seeds_in_current_chunk) > 0:
                current_relative_seed = seeds_in_current_chunk.pop()
                current_global_seed = current_cube + current_relative_seed
                if already_processed_bbox.contains(current_global_seed):
                    is_visited[current_global_seed -
                               already_processed_bbox.topleft] = 1

                if cube_data[current_relative_seed] != target_id:
                    cube_data[current_relative_seed] = target_id
                    dirty_bucket = True

                # check neighbors
                for neighbor in NEIGHBORS:
                    neighbor_pos = current_relative_seed + neighbor

                    global_neighbor_pos = current_cube + neighbor_pos
                    if already_processed_bbox.contains(global_neighbor_pos):
                        if is_visited[global_neighbor_pos -
                                      already_processed_bbox.topleft]:
                            continue
                    if cube_bbox.contains(neighbor_pos):
                        if cube_data[neighbor_pos] == source_id or (
                                already_processed_bbox.contains(
                                    global_neighbor_pos)
                                and cube_data[neighbor_pos] == target_id):
                            seeds_in_current_chunk.add(neighbor_pos)
                    else:
                        chunk_with_relative_seed.append(
                            get_chunk_pos_and_offset(global_neighbor_pos,
                                                     cube_size))
            time_stop("traverse cube")

            if dirty_bucket:
                time_start("write chunk")
                data_mag.write(cube_data, current_cube)
                time_stop("write chunk")
Exemplo n.º 7
0
def cubing_job(
    args: Tuple[View, Mag, InterpolationModes, List[str], int, bool,
                Optional[int], Optional[int], str, int, ]
) -> Any:
    (
        target_view,
        target_mag,
        interpolation_mode,
        source_file_batches,
        batch_size,
        pad,
        channel_index,
        sample_index,
        dtype,
        num_channels,
    ) = args

    downsampling_needed = target_mag != Mag(1)
    largest_value_in_chunk = 0  # This is used to compute the largest_segmentation_id if it is a segmentation layer

    max_image_size = (target_view.size[0], target_view.size[1])

    # Iterate over batches of continuous z sections
    # The batches have a maximum size of `batch_size`
    # Batched iterations allows to utilize IO more efficiently
    first_z_idx = target_view.global_offset.z
    for source_file_batch in get_chunks(source_file_batches, batch_size):
        try:
            time_start(
                f"Cubing of z={first_z_idx}-{first_z_idx + len(source_file_batch)}"
            )
            # Allocate a large buffer for all images in this batch
            # Shape will be (channel_count, x, y, z)
            # Using fortran order for the buffer, prevents that the data has to be copied in rust
            buffer_shape = ([num_channels] + list(max_image_size) +
                            [len(source_file_batch)])
            buffer = np.empty(buffer_shape, dtype=dtype, order="F")

            # Iterate over each z section in the batch
            for i, file_name in enumerate(source_file_batch):
                z = first_z_idx + i
                # Image shape will be (x, y, channel_count, z=1)
                image = read_image_file(
                    file_name,
                    target_view.info.voxel_type,
                    z,
                    channel_index,
                    sample_index,
                )

                if pad:
                    image = np.pad(
                        image,
                        mode="constant",
                        pad_width=[
                            (0, max_image_size[0] - image.shape[0]),
                            (0, max_image_size[1] - image.shape[1]),
                            (0, 0),
                            (0, 0),
                        ],
                    )
                else:
                    assert (
                        image.shape[0:2] == max_image_size
                    ), "Section z={} has the wrong dimensions: {} (expected {}). Consider using --pad.".format(
                        z, image.shape, max_image_size)
                buffer[:, :, :, i] = image.transpose((2, 0, 1, 3))[:, :, :, 0]
            del image

            if downsampling_needed:
                buffer = downsample_unpadded_data(buffer, target_mag,
                                                  interpolation_mode)
            buffer_z_offset = (first_z_idx -
                               target_view.global_offset.z) // target_mag.z
            target_view.write(offset=(0, 0, buffer_z_offset), data=buffer)
            largest_value_in_chunk = max(largest_value_in_chunk,
                                         np.max(buffer))
            time_stop(
                f"Cubing of z={first_z_idx}-{first_z_idx + len(source_file_batch)}"
            )
            first_z_idx += len(source_file_batch)

        except Exception as exc:
            logging.error("Cubing of z={}-{} failed with {}".format(
                first_z_idx, first_z_idx + len(source_file_batch), exc))
            raise exc

    return largest_value_in_chunk
Exemplo n.º 8
0
def convert_nifti(
    source_nifti_path: Path,
    target_path: Path,
    layer_name: str,
    dtype: str,
    voxel_size: Tuple[float, ...],
    data_format: DataFormat,
    chunk_size: Vec3Int,
    chunks_per_shard: Vec3Int,
    is_segmentation_layer: bool = False,
    bbox_to_enforce: Optional[BoundingBox] = None,
    use_orientation_header: bool = False,
    flip_axes: Optional[Union[int, Tuple[int, ...]]] = None,
) -> None:
    shard_size = chunk_size * chunks_per_shard
    time_start(f"Converting of {source_nifti_path}")

    source_nifti = nib.load(str(source_nifti_path.resolve()))

    if use_orientation_header:
        # Get canonical representation of data to incorporate
        # encoded transformations. Needs to be flipped later
        # to match the coordinate system of WKW.
        source_nifti = nib.funcs.as_closest_canonical(source_nifti,
                                                      enforce_diag=False)

    cube_data = np.array(source_nifti.get_fdata())

    category_type: LayerCategoryType = ("segmentation"
                                        if is_segmentation_layer else "color")
    logging.debug(f"Assuming {category_type} as layer type for {layer_name}")

    if len(source_nifti.shape) == 3:
        cube_data = cube_data.reshape((1, ) + source_nifti.shape)

    elif len(source_nifti.shape) == 4:
        cube_data = np.transpose(cube_data, (3, 0, 1, 2))

    else:
        logging.warning(
            "Converting of {} failed! Too many or too less dimensions".format(
                source_nifti_path))

        return

    if use_orientation_header:
        # Flip y and z to transform data into wkw's coordinate system.
        cube_data = np.flip(cube_data, (2, 3))

    if flip_axes:
        cube_data = np.flip(cube_data, flip_axes)

    if voxel_size is None:
        voxel_size = tuple(map(float, source_nifti.header["pixdim"][:3]))

    logging.info(f"Using voxel_size: {voxel_size}")
    cube_data = to_target_datatype(cube_data, dtype, is_segmentation_layer)

    # everything needs to be padded to
    if bbox_to_enforce is not None:
        target_topleft = np.array((0, ) + tuple(bbox_to_enforce.topleft))
        target_size = np.array((1, ) + tuple(bbox_to_enforce.size))

        cube_data = pad_or_crop_to_size_and_topleft(cube_data, target_size,
                                                    target_topleft)

    # Writing wkw compressed requires files of shape (shard_size, shard_size, shard_size)
    # Pad data accordingly
    padding_offset = shard_size - np.array(cube_data.shape[1:4]) % shard_size
    cube_data = np.pad(
        cube_data,
        (
            (0, 0),
            (0, int(padding_offset[0])),
            (0, int(padding_offset[1])),
            (0, int(padding_offset[2])),
        ),
    )

    wk_ds = Dataset(
        target_path,
        voxel_size=cast(Tuple[float, float, float], voxel_size or (1, 1, 1)),
        exist_ok=True,
    )
    wk_layer = (wk_ds.get_or_add_layer(
        layer_name,
        category_type,
        dtype_per_layer=np.dtype(dtype),
        data_format=data_format,
        largest_segment_id=int(np.max(cube_data) + 1),
    ) if is_segmentation_layer else wk_ds.get_or_add_layer(
        layer_name,
        category_type,
        data_format=data_format,
        dtype_per_layer=np.dtype(dtype),
    ))
    wk_mag = wk_layer.get_or_add_mag("1",
                                     chunk_size=chunk_size,
                                     chunks_per_shard=chunks_per_shard)
    wk_mag.write(cube_data)

    time_stop(f"Converting of {source_nifti_path}")