def test_copysegmentation_dvid_to_zarr(setup_dvid_to_zarr):
    template_dir, config, volume, dvid_address, repo_uuid, output_file = setup_dvid_to_zarr

    # Modify the config from above to compute pyramid scales,
    # and choose a bounding box that is aligned with the bricks even at scale 2
    # (just for easier testing).
    box_zyx = [[0, 0, 0], [256, 256, 256]]
    config["input"]["geometry"]["bounding-box"] = box_zyx
    config["copysegmentation"]["pyramid-depth"] = 2

    yaml = YAML()
    yaml.default_flow_style = False
    with open(f"{template_dir}/workflow.yaml", 'w') as f:
        yaml.dump(config, f)

    execution_dir, _workflow = launch_flow(template_dir, 1)

    box_zyx = np.array(box_zyx)

    scale_0_vol = volume[box_to_slicing(*box_zyx)]
    scale_1_vol = downsample_labels(scale_0_vol, 2, True)
    scale_2_vol = downsample_labels(scale_1_vol, 2, True)

    store = zarr.NestedDirectoryStore(f"{execution_dir}/{output_file}")
    f = zarr.open(store, 'r')
    output_0_vol = f['s0'][box_to_slicing(*(box_zyx // 1))]
    output_1_vol = f['s1'][box_to_slicing(*(box_zyx // 2))]
    output_2_vol = f['s2'][box_to_slicing(*(box_zyx // 4))]

    assert (output_0_vol == scale_0_vol).all(), \
        "Scale 0: Written vol does not match expected"
    assert (output_1_vol == scale_1_vol).all(), \
        "Scale 1: Written vol does not match expected"
    assert (output_2_vol == scale_2_vol).all(), \
        "Scale 2: Written vol does not match expected"
Пример #2
0
    def get_subvolume(self, box_zyx, scale=0):
        box_zyx = np.array(box_zyx)
        orig_box = box_zyx.copy()
        box_zyx -= (self._global_offset // (2**scale))

        clipped_box = box_intersection(box_zyx, [(0,0,0), self.zarr_dataset(scale).shape])
        if (clipped_box == box_zyx).all():
            return self.zarr_dataset(scale)[box_to_slicing(*box_zyx.tolist())]

        # Note that this message shows the true zarr storage bounds,
        # and doesn't show the logical bounds according to global_offset (if any).
        msg = f"Zarr Request is out-of-bounds (XYZ): {orig_box[:, ::-1].tolist()}"
        if self._out_of_bounds_access in ("permit", "permit-empty"):
            logger.warning(msg)
        else:
            msg += "\nAdd 'out-of-bounds-access' to your config to allow such requests"
            raise RuntimeError(msg)

        if (clipped_box[1] - clipped_box[0] <= 0).any():
            # request is completely out-of-bounds; just return zeros
            return np.zeros(box_zyx[1] - box_zyx[0], self.dtype)

        # Request is partially out-of-bounds; read what we can, zero-fill for the rest.
        clipped_vol = self.zarr_dataset(scale)[box_to_slicing(*clipped_box.tolist())]
        result = np.zeros(box_zyx[1] - box_zyx[0], self.dtype)
        localbox = clipped_box - box_zyx[0]
        result[box_to_slicing(*localbox)] = clipped_vol
        return result
Пример #3
0
def test_write(volume_setup):
    tmpdir = tempfile.mkdtemp()
    config, volume = volume_setup
    global_offset = config["zarr"]["global-offset"][::-1]

    config["zarr"]["path"] = f"{tmpdir}/test_zarr_service_testvol_WRITE.zarr"
    if os.path.exists(config["zarr"]["path"]):
        os.unlink(config["zarr"]["path"])

    # Can't initialize service if file doesn't exist
    with pytest.raises(RuntimeError) as excinfo:
        ZarrVolumeService(config)
    assert 'create-if-necessary' in str(excinfo.value)

    assert not os.path.exists(config["zarr"]["path"])
    config["zarr"]["create-if-necessary"] = True
    config["zarr"]["creation-settings"] = {
        "shape": [*volume.shape][::-1],
        "dtype": str(volume.dtype),
        "chunk-shape": [32, 32, 32],
        "max-scale": 0
    }

    # Write some data
    box = [(30, 40, 50), (50, 60, 70)]
    box = np.array(box)
    subvol = volume[box_to_slicing(*box)]
    service = ZarrVolumeService(config)
    service.write_subvolume(subvol, box[0] + global_offset)

    # Read it back.
    subvol = service.get_subvolume(box + global_offset)
    assert (subvol == volume[box_to_slicing(*box)]).all()

    # Write some out-of-bounds zeros
    oob_box = box.copy()
    oob_box[1, 2] = 500
    subvol = np.zeros(oob_box[1] - oob_box[0], int)
    service.write_subvolume(subvol, oob_box[0] + global_offset)

    # Read it back.
    readback = service.get_subvolume(oob_box + global_offset)
    assert (readback == subvol).all()

    # Try writing something other than zeros -- should fail
    subvol[:, :, -1] = 1
    with pytest.raises(RuntimeError):
        service.write_subvolume(subvol, oob_box[0] + global_offset)
Пример #4
0
def uncompress_volume(method,
                      encoded_data,
                      dtype,
                      encoded_box_zyx,
                      box_zyx=None):
    """
    Uncompress the given encoded data using the specified scheme.
    If the data was encoded into a box that is larger than the box of interest,
    specify a separate box_zyx for the subvolume of interest.
    """
    if method == 'gzip_labelarray':
        volume = decode_labelarray_volume(encoded_box_zyx, encoded_data)

    if method == 'lz4':
        shape = encoded_box_zyx[1] - encoded_box_zyx[0]
        buf = lz4.frame.decompress(encoded_data)
        volume = np.frombuffer(buf, dtype).reshape(shape)

    if method == 'lz4_2x':
        shape = encoded_box_zyx[1] - encoded_box_zyx[0]
        buf = lz4.frame.decompress(encoded_data)
        buf = lz4.frame.decompress(buf)
        volume = np.frombuffer(buf, dtype).reshape(shape)

    if box_zyx is None or (box_zyx == encoded_box_zyx).all():
        return volume
    else:
        assert (box_zyx[0] >= encoded_box_zyx[0]).all() and (box_zyx[1] <= encoded_box_zyx[1]).all(), \
            f"box_zyx ({box_zyx.tolist()}) must be contained within encoded_box_zyx ({encoded_box_zyx.tolist()})"
        vol_box = box_zyx - encoded_box_zyx[0]
        return volume[box_to_slicing(*vol_box)]
Пример #5
0
def deserialize_uint64_blocks(compressed_blocks, shape):
    """
    Reconstitute a volume that was serialized with serialize_uint64_blocks(), above.
    
    NOTE: If the volume is not 64-px aligned, then the output will NOT be C-contiguous.
    """
    if (np.array(shape) % 64).any():
        padding = 64 - ( np.array(shape) % 64 )
        aligned_shape = shape + padding
    else:
        aligned_shape = shape

    aligned_volume = np.empty( aligned_shape, dtype=np.uint64 )
    block_view = view_as_blocks( aligned_volume, (64,64,64) )
    
    for bi, (zi, yi, xi) in enumerate(np.ndindex(*block_view.shape[:3])):
        compressed_block = compressed_blocks[bi]
        
        # (See note above regarding recompression with LZ4)
        encoded_block = lz4.frame.decompress( compressed_block )
        block = decode_label_block( encoded_block )
        block_view[zi,yi,xi] = block
    
    if shape == tuple(aligned_shape):
        volume = aligned_volume
    else:
        # Trim
        volume = np.asarray(aligned_volume[box_to_slicing((0,0,0), shape)], order='C')

    return volume
def write_brick(output_service, scale, brick):
    shape = np.array(brick.volume.shape)
    assert (shape[0:2] == output_service.block_width).all()
    assert shape[2] % output_service.block_width == 0
    
    # Omit leading/trailing empty blocks
    block_width = output_service.block_width
    assert (np.array(brick.volume.shape) % block_width).all() == 0
    blockwise_view = view_as_blocks( brick.volume, brick.volume.shape[0:2] + (block_width,) )
    
    # blockwise view has shape (1,1,X/bx, bz, by, bx)
    assert blockwise_view.shape[0:2] == (1,1)
    blockwise_view = blockwise_view[0,0] # drop singleton axes
    
    block_maxes = blockwise_view.max( axis=(1,2,3) )
    assert block_maxes.ndim == 1
    
    nonzero_block_indexes = np.nonzero(block_maxes)[0]
    if len(nonzero_block_indexes) == 0:
        return # brick is completely empty
    
    first_nonzero_block = nonzero_block_indexes[0]
    last_nonzero_block = nonzero_block_indexes[-1]
    
    nonzero_start = (0, 0, block_width*first_nonzero_block)
    nonzero_stop = ( brick.volume.shape[0:2] + (block_width*(last_nonzero_block+1),) )
    nonzero_subvol = brick.volume[box_to_slicing(nonzero_start, nonzero_stop)]
    nonzero_subvol = np.asarray(nonzero_subvol, order='C')

    output_service.write_subvolume(nonzero_subvol, brick.physical_box[0] + nonzero_start, scale)
def _run_to_dvid(setup, check_scale_0=True):
    template_dir, config, volume, dvid_address, repo_uuid, output_grayscale_name = setup

    yaml = YAML()
    yaml.default_flow_style = False

    # re-dump config in case it's been changed by a specific test
    with open(f"{template_dir}/workflow.yaml", 'w') as f:
        yaml.dump(config, f)

    _execution_dir, workflow = launch_flow(template_dir, 1)
    final_config = workflow.config

    box_xyz = np.array(final_config['input']['geometry']['bounding-box'])
    box_zyx = box_xyz[:, ::-1]

    output_vol = fetch_raw(dvid_address, repo_uuid, output_grayscale_name,
                           box_zyx)
    expected_vol = volume[box_to_slicing(*box_zyx)]

    if check_scale_0:
        assert (output_vol == expected_vol).all(), \
            "Written vol does not match expected"

    return box_zyx, expected_vol
Пример #8
0
def setup_transposed_volume_service():
    test_dir = tempfile.mkdtemp()
    test_file = f'{test_dir}/scaled-volume-test.h5'

    full_volume = np.random.randint(255, size=(256, 256, 256))
    with h5py.File(test_file, 'w') as f:
        f['volume'] = full_volume

    box_zyx = np.array([[0, 0, 0], [100, 200, 256]])
    box_xyz = box_zyx[:, ::-1]

    RAW_VOLUME_DATA = full_volume[box_to_slicing(*box_zyx)]

    VOLUME_CONFIG = {
        "hdf5": {
            "path": test_file,
            "dataset": "volume"
        },
        "geometry": {
            "bounding-box": box_xyz.tolist(),
            "available-scales": [0]  # Ensure only the first scale is used.
        }
    }

    # First, hdf5 alone
    h5_reader = Hdf5VolumeService(VOLUME_CONFIG)
    assert (h5_reader.bounding_box_zyx == box_zyx).all()
    full_from_h5 = h5_reader.get_subvolume(h5_reader.bounding_box_zyx)
    assert full_from_h5.shape == (*(box_zyx[1] - box_zyx[0]), )
    assert (full_from_h5 == RAW_VOLUME_DATA).all()

    return RAW_VOLUME_DATA, VOLUME_CONFIG, full_from_h5, h5_reader
def deserialize_uint64_blocks(compressed_blocks, shape):
    """
    Reconstitute a volume that was serialized with serialize_uint64_blocks(), above.
    
    NOTE: If the volume is not 64-px aligned, then the output will NOT be C-contiguous.
    """
    if (np.array(shape) % 64).any():
        padding = 64 - ( np.array(shape) % 64 )
        aligned_shape = shape + padding
    else:
        aligned_shape = shape

    aligned_volume = np.empty( aligned_shape, dtype=np.uint64 )
    block_view = view_as_blocks( aligned_volume, (64,64,64) )
    
    for bi, (zi, yi, xi) in enumerate(np.ndindex(*block_view.shape[:3])):
        compressed_block = compressed_blocks[bi]
        
        # (See note above regarding recompression with LZ4)
        encoded_block = lz4.frame.decompress( compressed_block )
        block = decode_label_block( encoded_block )
        block_view[zi,yi,xi] = block
    
    if shape == tuple(aligned_shape):
        volume = aligned_volume
    else:
        # Trim
        volume = np.asarray(aligned_volume[box_to_slicing((0,0,0), shape)], order='C')

    return volume
 def test_get_union_mask_for_bodies(self):
     union_mask, box, blocksize = sparkdvid.get_union_block_mask_for_bodies(
         TEST_DVID_SERVER, self.uuid, self.instance, [1, 2])
     expected, _ = downsample_binary_3d_suppress_zero(
         self.labels.astype(bool), 64)
     assert blocksize == (64, 64, 64)
     assert (expected[box_to_slicing(*box)] == union_mask).all()
Пример #11
0
    def check_vol(box_zyx, scale):
        # raw volume handle
        spec = dict(config["tensorstore"]["spec"])
        spec['scale_index'] = scale
        context = ts.Context(config["tensorstore"]["context"])
        store = ts.open(spec, read=True, write=False, context=context).result()
        store_box = np.array([
            store.spec().domain.inclusive_min[:3][::-1],
            store.spec().domain.exclusive_max[:3][::-1]
        ])

        # Just verify that the 'service' wrapper is consistent with the low-level handle
        assert service.dtype == store.dtype.numpy_dtype
        assert (service.bounding_box_zyx // (2**scale) == store_box).all(), \
            f"{service.bounding_box_zyx.tolist()} != {store_box.tolist()}"

        if scale == 0:
            # Service INSERTS geometry into config if necessary
            assert config["geometry"][
                "bounding-box"] == store_box[:, ::-1].tolist()

        store_subvol = store[box_to_slicing(*box_zyx[:, ::-1])].read(
            order='F').result().transpose()
        assert store_subvol.any(
        ), "Volume from raw API is all zeros; this is a bad test"

        subvol = service.get_subvolume(box_zyx, scale)
        assert subvol.any(), "Volume from service is all zeros"

        assert (subvol.shape == (box_zyx[1] - box_zyx[0])).all()
        assert (subvol == store_subvol).all()
    def write_subvolume(self, subvolume, offset_zyx, scale=0):
        assert scale == 0

        box = np.array([offset_zyx, offset_zyx])
        box[1] += subvolume.shape

        self.dataset[box_to_slicing(*box)] = subvolume
Пример #13
0
    def write_subvolume(self, subvolume, offset_zyx, scale=0):
        offset_zyx = np.array(offset_zyx)
        offset_zyx -= self._global_offset // (2**scale)
        box = np.array([offset_zyx, offset_zyx + subvolume.shape])

        stored_bounding_box = (self._bounding_box_zyx -
                               self._global_offset) // (2**scale)
        if (box[0] >= 0).all() and (box[1] <= stored_bounding_box[1]).all():
            # Box is fully contained within the Zarr volume bounding box.
            self.zarr_dataset(scale)[box_to_slicing(*box)] = subvolume
        else:
            msg = (
                "Box extends beyond Zarr volume bounds (XYZ): "
                f"{box[:, ::-1].tolist()} exceeds {stored_bounding_box[:, ::-1].tolist()}"
            )

            if self._out_of_bounds_access == 'forbid':
                # Note that this message shows the true zarr storage bounds,
                # and doesn't show the logical bounds according to global_offset (if any).
                msg = "Cannot write subvolume. " + msg
                msg += "\nAdd permit-out-of-bounds to your config to allow such writes,"
                msg += " assuming the out-of-bounds portion is completely empty."
                raise RuntimeError(msg)

            clipped_box = box_intersection(box, stored_bounding_box)

            # If any of the out-of-bounds portion is non-empty, that's an error.
            subvol_copy = subvolume.copy()
            subvol_copy[box_to_slicing(*(clipped_box - box[0]))] = 0
            if self._out_of_bounds_access == 'permit-empty' and subvol_copy.any(
            ):
                # Note that this message shows the true zarr storage bounds,
                # and doesn't show the logical bounds according to global_offset (if any).
                msg = (
                    "Cannot write subvolume. Box extends beyond Zarr volume storage bounds (XYZ): "
                    f"{box[:, ::-1].tolist()} exceeds {stored_bounding_box[:, ::-1].tolist()}\n"
                    "and the out-of-bounds portion is not empty (contains non-zero values).\n"
                )
                raise RuntimeError(msg)

            logger.warning(msg)
            clipped_subvolume = subvolume[box_to_slicing(*clipped_box -
                                                         box[0])]
            self.zarr_dataset(scale)[box_to_slicing(
                *clipped_box)] = clipped_subvolume
 def place_test_object(label, corner, height):
     corner = np.array(corner)
     object_vol = create_test_object(height).astype(np.uint64)
     object_vol *= label
     object_box = np.array([corner, corner + object_vol.shape])
     
     testvol_view = test_volume[box_to_slicing(*object_box)]
     testvol_view[:] = np.where(object_vol, object_vol, testvol_view)
     return object_box, (object_vol != 0).sum()
 def get_subvolume(self, box_zyx, scale=0):
     assert scale == 0, "Slice File reader only supports scale 0"
     z_offset = box_zyx[0,0]
     yx_box = box_zyx[:,1:]
     output = np.ndarray(shape=(box_zyx[1] - box_zyx[0]), dtype=self.dtype)
     for z in range(*box_zyx[:,0]):
         slice_path = self._slice_fmt.format(z)
         slice_data = np.array( Image.open(slice_path).convert("L") )
         output[z-z_offset] = slice_data[box_to_slicing(*yx_box)]
     return output
Пример #16
0
    def get_subvolume(self, box_zyx, scale=0):
        """
        Extract the subvolume, specified in new (scaled) coordinates from the
        original volume service, then scale result accordingly before returning it.
        """
        box_zyx = np.asarray(box_zyx)
        true_scale = scale + self.scale_delta

        if true_scale in self.original_volume_service.available_scales:
            # The original source already has the data at the necessary scale.
            return self.original_volume_service.get_subvolume(
                box_zyx, true_scale)

        # Start with the closest scale we've got
        base_scales = np.array(self.original_volume_service.available_scales)
        i_best = np.abs(base_scales - true_scale).argmin()
        best_base_scale = base_scales[i_best]

        delta_from_best = true_scale - best_base_scale

        if delta_from_best > 0:
            orig_box_zyx = box_zyx * 2**delta_from_best
            orig_data = self.original_volume_service.get_subvolume(
                orig_box_zyx, best_base_scale)

            if self.method:
                #print(f"orig_data.shape: {scale}, {box_zyx}, {orig_box_zyx}, {orig_data.shape}, {delta_from_best}")
                downsampled_data = downsample(orig_data, 2**delta_from_best,
                                              self.method)
            elif np.dtype(self.dtype) == np.uint64:
                # Assume that uint64 means labels.

                ## FIXME: Our C++ method for downsampling ('labels')
                ##        seems to have a bad build at the moment (it segfaults and/or produces zeros)
                ##        For now, we use the 'labels-numba' method
                downsampled_data = downsample(orig_data, 2**delta_from_best,
                                              'labels-numba')
            else:
                downsampled_data = downsample(orig_data, 2**delta_from_best,
                                              'block-mean')
            return downsampled_data
        else:
            upsample_factor = int(2**-delta_from_best)
            orig_box_zyx = downsample_box(box_zyx,
                                          np.array(3 * (upsample_factor, )))
            orig_data = self.original_volume_service.get_subvolume(
                orig_box_zyx, best_base_scale)

            upsampled_data = upsample(orig_data, upsample_factor)
            relative_box = box_zyx - upsample_factor * orig_box_zyx[0]
            requested_data = upsampled_data[box_to_slicing(*relative_box)]

            # Force contiguous so caller doesn't have to worry about it.
            return np.asarray(requested_data, order='C')
Пример #17
0
def test_copygrayscale_from_hdf5_to_slices(disable_auto_retry):
    template_dir = tempfile.mkdtemp(suffix="copygrayscale-from-hdf5-template")

    # Create volume, write to HDF5
    volume = np.random.randint(10, size=TESTVOL_SHAPE, dtype=np.uint8)
    volume_path = f"{template_dir}/volume.h5"
    with h5py.File(volume_path, 'w') as f:
        f['volume'] = volume

    SLICE_FMT = 'slices/{:04d}.png'

    config_text = textwrap.dedent(f"""\
        workflow-name: copygrayscale
        cluster-type: {CLUSTER_TYPE}
        
        input:
          hdf5:
            path: {volume_path}
            dataset: volume
          
          geometry:
            message-block-shape: [64,64,256]
            bounding-box: [[0,0,100], [256,200,256]]

          adapters:
            # Enable multi-scale, since otherwise
            # Hdf5VolumeService doesn't support it out-of-the box
            rescale-level: 0

        output:
          slice-files:
            slice-path-format: "{SLICE_FMT}"
            dtype: uint8
        
        copygrayscale:
          max-pyramid-scale: 0
          slab-depth: 128
    """)

    with open(f"{template_dir}/workflow.yaml", 'w') as f:
        f.write(config_text)

    _execution_dir, workflow = launch_flow(template_dir, 1)
    final_config = workflow.config

    box_xyz = np.array(final_config['input']['geometry']['bounding-box'])
    box_zyx = box_xyz[:, ::-1]

    output_vol = SliceFilesVolumeService(final_config['output']).get_subvolume(
        [[100, 0, 0], [256, 200, 256]])
    expected_vol = volume[box_to_slicing(*box_zyx)]

    assert (output_vol == expected_vol).all(), \
        "Written vol does not match expected"
def _fill_gaps(mask, mask_box, analysis_scale, dilation_radius_s0, dilation_box):
    """
    Fill gaps between segments in the mask by dilating each segment
    and keeping the voxels that were covered by more than one dilation.
    """
    # Perform light dilation on the mask to fix gaps in the
    # segmentation due to hot knife seams, downsampling, etc.
    if dilation_radius_s0 == 0:
        return mask

    # We limit the dilation repair to a central box, to avoid joining
    # dendrites that just barely enter the volume in multiple places.
    # We only want to make repairs that aren't near the volume edge.
    dilation_box = box_intersection(mask_box, dilation_box)
    if (dilation_box[1] - dilation_box[0] <= 0).any():
        return mask

    # Perform dilation on each connected component independently,
    # and mark the areas where two dilated components overlap.
    # We'll add those overlapping voxels to the mask, to span
    # small gap defects in the segmentation.
    cc = labelMultiArrayWithBackground((mask != 0).view(np.uint8))
    cc_max = cc.max()
    if cc_max <= 1:
        return mask

    central_box = dilation_box - mask_box[0]
    cc_central = cc[box_to_slicing(*central_box)]

    dilation_radius = dilation_radius_s0 // (2**analysis_scale)
    dilated_intersections = np.zeros(cc_central.shape, bool)
    dilated_all = vigra.filters.multiBinaryDilation((cc_central == 1), dilation_radius)
    for i in range(2, cc_max+1):
        cc_dilated = vigra.filters.multiBinaryDilation((cc_central == i), dilation_radius)
        dilated_intersections[:] |= (dilated_all & cc_dilated)
        dilated_all[:] |= cc_dilated

    # Return a new array; don't modify the original in-place.
    mask = mask.astype(bool, copy=True)
    mask[box_to_slicing(*central_box)] |= dilated_intersections
    return mask.view(np.uint8)
    def get_subvolume(self, box_zyx, scale=0):
        box_zyx = np.array(box_zyx)
        assert scale == 0, "Slice File reader only supports scale 0"
        z_offset = box_zyx[0, 0]
        yx_box = box_zyx[:, 1:] - self.slice_corner_yx

        output = np.ndarray(shape=(box_zyx[1] - box_zyx[0]), dtype=self.dtype)
        for z in range(*box_zyx[:, 0]):
            slice_path = self._slice_fmt.format(z)
            slice_data = np.array(Image.open(slice_path).convert("L"))
            output[z - z_offset] = slice_data[box_to_slicing(*yx_box)]
        return output
Пример #20
0
def clip_to_logical( brick ):
    """
    Truncate the given brick so that it's volume does not exceed the bounds of its logical_box.
    (Useful if the brick was originally constructed with a halo.)
    """
    intersection = box_intersection(brick.physical_box, brick.logical_box)
    assert (intersection[1] > intersection[0]).all(), \
        f"physical_box ({brick.physical_box}) does not intersect logical_box ({brick.logical_box})"
    
    intersection_within_physical = intersection - brick.physical_box[0]
    new_vol = brick.volume[ box_to_slicing(*intersection_within_physical) ]
    return Brick( brick.logical_box, intersection, new_vol )
def test_subvolume_no_scaling(setup_hdf5_service):
    _raw_volume, _volume_config, full_from_h5, h5_reader = setup_hdf5_service

    box = np.array([[13, 15, 20], [100, 101, 91]])
    subvol_from_h5 = full_from_h5[box_to_slicing(*box)].copy('C')

    scaled_reader = ScaledVolumeService(h5_reader, 0)
    subvol_scaled = scaled_reader.get_subvolume(box)

    assert (subvol_scaled.shape == box[1] - box[0]).all()
    assert subvol_from_h5.shape == subvol_scaled.shape, \
        f"{subvol_scaled.shape} != {subvol_from_h5.shape}"
    assert (subvol_scaled == subvol_from_h5).all()
    assert subvol_scaled.flags.c_contiguous
Пример #22
0
def test_read(volume_setup):
    config, volume = volume_setup
    global_offset = config["zarr"]["global-offset"][::-1]

    service = ZarrVolumeService(config)
    assert (service.bounding_box_zyx - global_offset == [(0, 0, 0),
                                                         volume.shape]).all()
    assert service.dtype == volume.dtype

    # Service INSERTS geometry into config if necessary
    assert (config["geometry"]["bounding-box"] ==
            service.bounding_box_zyx[:, ::-1]).all()

    box = np.array([(30, 40, 50), (50, 60, 70)])
    subvol = service.get_subvolume(box + global_offset)
    assert (subvol == volume[box_to_slicing(*box)]).all()

    # Check out-of-bounds read (should be zeros)
    oob_box = box.copy()
    oob_box[1, 2] = 500
    subvol = service.get_subvolume(oob_box + global_offset)

    # In-bounds portion should match
    assert (subvol[box_to_slicing(*box - box[0])] == volume[box_to_slicing(
        *box)]).all()

    # Everything else should be zeros
    assert (subvol[:, :, 128:] == 0).all()

    #
    # Check sample_labels()
    #
    points = [np.random.randint(d, size=(10, )) for d in volume.shape]
    points = np.transpose(points)
    global_points = points + global_offset
    labels = service.sample_labels(global_points)
    assert (labels == volume[(*points.transpose(), )]).all()
def test_read_slab(read_slices_setup):
    volume, config = read_slices_setup
    box = np.array([(0, 0, 0), volume.shape])

    # Slab from z=64 to z=128
    box[:, 0] = [64, 128]

    slab_from_raw = volume[box_to_slicing(*box)]

    reader = SliceFilesVolumeService(config)
    slab_from_slices = reader.get_subvolume(box)

    assert slab_from_slices.shape == slab_from_raw.shape, \
        f"Wrong shape: Expected {slab_from_raw.shape}, Got {slab_from_slices.shape}"
    assert (slab_from_slices == slab_from_raw).all()
def test_write(volume_setup):
    config, volume = volume_setup
    config["hdf5"]["path"] = "/tmp/test_hdf5_service_testvol_WRITE.h5"
    if os.path.exists(config["hdf5"]["path"]):
        os.unlink(config["hdf5"]["path"])

    # Can't initialize service if file doesn't exist
    with pytest.raises(RuntimeError) as excinfo:
        Hdf5VolumeService(config)
    assert 'writable' in str(excinfo.value)

    # After setting writable=true, we can initialize the service.    
    assert not os.path.exists(config["hdf5"]["path"])
    config["hdf5"]["writable"] = True

    # Write some data
    box = [(30,40,50), (50,60,70)]
    subvol = volume[box_to_slicing(*box)]
    service = Hdf5VolumeService(config)
    service.write_subvolume(subvol, box[0])

    # Read it back.    
    subvol = service.get_subvolume(box)
    assert (subvol == volume[box_to_slicing(*box)]).all()
def test_read(volume_setup):
    config, volume = volume_setup
    
    service = Hdf5VolumeService(config)    
    assert (service.bounding_box_zyx == [(0,0,0),volume.shape]).all()
    assert service.dtype == volume.dtype

    # Service INSERTS geometry into config if necessary
    assert config["geometry"]["bounding-box"] == [[0,0,0], list(volume.shape[::-1])]
    assert config["hdf5"]["dtype"] == volume.dtype.name
    
    box = [(30,40,50), (50,60,70)]
    subvol = service.get_subvolume(box)
    
    assert (subvol == volume[box_to_slicing(*box)]).all()
Пример #26
0
def write_brick(output_service, scale, brick):
    # For most outputs, we just write the whole brick.
    if not isinstance(output_service.base_service, DvidVolumeService):
        output_service.write_subvolume(brick.volume, brick.physical_box[0],
                                       scale)

    # For dvid outputs, implement a special optimization.
    # We trim empty blocks from the left/right of the brick.
    else:
        # Typically, users will prefer bricks of shape (64,64,N).
        # However, if the bricks wider than 64, this code still works,
        # but all blocks for a given X must be empty for the brick to be trimmed.

        block_width = output_service.block_width
        assert np.array(brick.volume.shape)[2] % block_width == 0, \
            "Brick X-dimension is not a multiple of the DVID block-shape"

        # Omit leading/trailing empty blocks
        assert (np.array(brick.volume.shape) % block_width).all() == 0
        blockwise_view = view_as_blocks(
            brick.volume, brick.volume.shape[0:2] + (block_width, ))

        # blockwise view has shape (1,1,X/bx, bz, by, bx)
        assert blockwise_view.shape[0:2] == (1, 1)
        blockwise_view = blockwise_view[0, 0]  # drop singleton axes

        # Compute max in each block to determine the non-empty blocks
        block_maxes = blockwise_view.max(axis=(1, 2, 3))
        assert block_maxes.ndim == 1

        nonzero_block_indexes = np.nonzero(block_maxes)[0]
        if len(nonzero_block_indexes) == 0:
            return  # brick is completely empty

        first_nonzero_block = nonzero_block_indexes[0]
        last_nonzero_block = nonzero_block_indexes[-1]

        nonzero_start = (0, 0, block_width * first_nonzero_block)
        nonzero_stop = (brick.volume.shape[0:2] + (block_width *
                                                   (last_nonzero_block + 1), ))
        nonzero_subvol = brick.volume[box_to_slicing(nonzero_start,
                                                     nonzero_stop)]
        nonzero_subvol = np.asarray(nonzero_subvol, order='C')

        output_service.write_subvolume(nonzero_subvol,
                                       brick.physical_box[0] + nonzero_start,
                                       scale)
Пример #27
0
 def get_subvolume(self, box, scale=0):
     req_bytes = 8 * np.prod(box[1] - box[0])
     with self._resource_manager_client.access_context(
             'brainmaps', True, 1, req_bytes):
         if not self._fetch_blockwise:
             return self._brainmaps_client.get_subvolume(box, scale)
         else:
             block_shape = 3 * (self._block_width, )
             subvol = np.zeros(box[1] - box[0], self.dtype)
             for block_box in boxes_from_grid(box,
                                              block_shape,
                                              clipped=True):
                 block = self._brainmaps_client.get_subvolume(
                     block_box, scale)
                 outbox = block_box - box[0]
                 subvol[box_to_slicing(*outbox)] = block
             return subvol
def test_subvolume_downsample_1(setup_hdf5_service):
    _raw_volume, _volume_config, full_from_h5, h5_reader = setup_hdf5_service

    down_box = np.array([[13, 15, 20], [20, 40, 41]])
    up_box = 2 * down_box
    up_subvol_from_h5 = full_from_h5[box_to_slicing(*up_box)]
    down_subvol_from_h5 = downsample(up_subvol_from_h5, 2, 'block-mean')

    # Scale 1
    scaled_reader = ScaledVolumeService(h5_reader, 1)
    subvol_scaled = scaled_reader.get_subvolume(down_box)

    assert (subvol_scaled.shape == down_box[1] - down_box[0]).all()
    assert down_subvol_from_h5.shape == subvol_scaled.shape, \
        f"{subvol_scaled.shape} != {down_subvol_from_h5.shape}"
    assert (subvol_scaled == down_subvol_from_h5).all()
    assert subvol_scaled.flags.c_contiguous
def test_subvolume_upsample_1(setup_hdf5_service):
    _raw_volume, _volume_config, full_from_h5, h5_reader = setup_hdf5_service

    up_box = np.array([[13, 15, 20], [100, 101, 91]])
    full_upsampled_vol = np.empty(2 * np.array(full_from_h5.shape),
                                  dtype=h5_reader.dtype)
    up_view = view_as_blocks(full_upsampled_vol, (2, 2, 2))
    up_view[:] = full_from_h5[:, :, :, None, None, None]
    up_subvol_from_h5 = full_upsampled_vol[box_to_slicing(*up_box)]

    # Scale -1
    scaled_reader = ScaledVolumeService(h5_reader, -1)
    subvol_scaled = scaled_reader.get_subvolume(up_box)

    assert (subvol_scaled.shape == up_box[1] - up_box[0]).all()
    assert up_subvol_from_h5.shape == subvol_scaled.shape, \
        f"{subvol_scaled.shape} != {up_subvol_from_h5.shape}"
    assert (subvol_scaled == up_subvol_from_h5).all()
    assert subvol_scaled.flags.c_contiguous
    def get_subvolume(self, box_zyx, scale=0):
        """
        Extract the subvolume, specified in new (scaled) coordinates from the
        original volume service, then scale result accordingly before returning it.
        """
        true_scale = scale + self.scale_delta
        
        if true_scale in self.original_volume_service.available_scales:
            # The original source already has the data at the necessary scale.
            return self.original_volume_service.get_subvolume( box_zyx, true_scale )

        # Start with the closest scale we've got
        base_scales = np.array(self.original_volume_service.available_scales)
        i_best = np.abs(base_scales - true_scale).argmin()
        best_base_scale = base_scales[i_best]
        
        delta_from_best = true_scale - best_base_scale

        if delta_from_best > 0:
            orig_box_zyx = box_zyx * 2**delta_from_best
            orig_data = self.original_volume_service.get_subvolume(orig_box_zyx, best_base_scale)

            if self.dtype == np.uint64:
                # Assume that uint64 means labels.
                downsampled_data, _ = downsample_labels_3d( orig_data, 2**self.scale_delta )
            else:
                downsampled_data = downsample_raw( orig_data, self.scale_delta )[-1]
            return downsampled_data
        else:
            upsample_factor = int(2**-delta_from_best)
            orig_box_zyx = downsample_box(box_zyx, np.array(3*(upsample_factor,)))
            orig_data = self.original_volume_service.get_subvolume(orig_box_zyx, best_base_scale)

            orig_shape = np.array(orig_data.shape)
            upsampled_data = np.empty( orig_shape * upsample_factor, dtype=self.dtype )
            v = view_as_blocks(upsampled_data, 3*(upsample_factor,))
            v[:] = orig_data[:,:,:,None, None, None]

            relative_box = box_zyx - upsample_factor*orig_box_zyx[0]
            requested_data = upsampled_data[box_to_slicing(*relative_box)]

            # Force contiguous so caller doesn't have to worry about it.
            return np.asarray(requested_data, order='C')
def setup_labelmap_test():
    test_dir = tempfile.mkdtemp()
    test_file = f'{test_dir}/mapped-volume-test.h5'

    full_volume = np.random.randint(100, size=(256, 256, 256), dtype=np.uint64)
    with h5py.File(test_file, 'w') as f:
        f['volume'] = full_volume

    box_zyx = np.array([[0, 0, 0], [100, 200, 256]])
    box_xyz = box_zyx[:, ::-1]

    RAW_VOLUME_DATA = full_volume[box_to_slicing(*box_zyx)]

    VOLUME_CONFIG = {
        "hdf5": {
            "path": test_file,
            "dataset": "volume"
        },
        "geometry": {
            "bounding-box": box_xyz.tolist(),
            "available-scales": [0]  # Ensure only the first scale is used.
        }
    }

    # First, hdf5 alone
    h5_reader = Hdf5VolumeService(VOLUME_CONFIG)
    assert (h5_reader.bounding_box_zyx == box_zyx).all()
    full_from_h5 = h5_reader.get_subvolume(h5_reader.bounding_box_zyx)
    assert full_from_h5.shape == (*(box_zyx[1] - box_zyx[0]), )
    assert (full_from_h5 == RAW_VOLUME_DATA).all()

    mapping_path = f'{test_dir}/mapping.csv'
    mapping = pd.DataFrame({
        'orig': np.arange(100),
        'body': np.arange(100) + 1000
    })
    mapping.to_csv(mapping_path, index=False, header=True)

    labelmap_config = {"file": mapping_path, "file-type": "label-to-body"}

    expected_vol = RAW_VOLUME_DATA + 1000

    return RAW_VOLUME_DATA, expected_vol, labelmap_config, full_from_h5, h5_reader
Пример #32
0
def fetch_supervoxel_mask(server, uuid, instance, sv, max_box_volume):
    """
    Fetch a mask for the given supervoxel.
    The mask will be downloaded at a scale which is chosen such that the
    mask's bounding box will not exceed the given volume.
    """
    coarse_coords = fetch_sparsevol_coarse(server,
                                           uuid,
                                           instance,
                                           sv,
                                           supervoxels=True)

    # (Note: sparsevol-coarse is returned at scale 6)
    box = (2**6) * np.array(
        [coarse_coords.min(axis=0), 1 + coarse_coords.max(axis=0)])
    shape = box[1] - box[0]
    scale = 0

    # Select a scale
    while np.prod(shape) > max_box_volume:
        scale += 1
        box //= 2
        shape = box[1] - box[0]

    # Fetch sparse masks
    ns = DVIDNodeService(server, uuid)
    block_coords, block_masks = ns.get_sparselabelmask(sv,
                                                       instance,
                                                       scale,
                                                       supervoxels=True)

    fetched_box = np.array(
        [block_coords.min(axis=0), 64 + block_coords.max(axis=0)])
    fetched_shape = fetched_box[1] - fetched_box[0]

    # Combine sparse masks into a single array
    full_mask = np.zeros(fetched_shape, dtype=bool)
    for coord, mask in zip(block_coords, block_masks):
        mask_box = np.array([coord, coord + 64]) - fetched_box[0]
        full_mask[box_to_slicing(*mask_box)] = mask

    return full_mask, scale, fetched_box
Пример #33
0
def test_encode_nonaligned_labelarray_volume():
    nonaligned_box = np.array([(520, 1050, 2050), (620, 1150, 2150)])
    nonaligned_vol = np.random.randint(1000,
                                       2000,
                                       size=(100, 100, 100),
                                       dtype=np.uint64)

    aligned_start = np.array((512, 1024, 2048))
    aligned_box = np.array([aligned_start, aligned_start + 128])
    aligned_vol = np.zeros((128, 128, 128), dtype=np.uint64)
    aligned_vol[box_to_slicing(*(nonaligned_box -
                                 aligned_box[0]))] = nonaligned_vol

    encoded_box, encoded_vol = encode_nonaligned_labelarray_volume(
        nonaligned_box[0], nonaligned_vol)
    inflated = DVIDNodeService.inflate_labelarray_blocks3D_from_raw(
        encoded_vol, (128, 128, 128), aligned_start)

    assert (encoded_box == aligned_box).all()
    assert (inflated == aligned_vol).all()
    def get_subvolume(self, box_zyx, scale=0):
        req_bytes = 8 * np.prod(box_zyx[1] - box_zyx[0])
        try:
            resource_name = self.volume_config['tensorstore']['spec'][
                'kvstore']['bucket']
        except KeyError:
            resource_name = self.volume_config['tensorstore']['spec']['path']

        with self._resource_manager_client.access_context(
                resource_name, True, 1, req_bytes):
            store = self.store(scale)

            # Tensorstore uses X,Y,Z conventions, so it's best to
            # request a Fortran array and transpose it ourselves.
            box_xyz = box_zyx[:, ::-1]
            vol_xyzc = store[box_to_slicing(*box_xyz)].read(order='F').result()
            vol_xyz = vol_xyzc[..., 0]
            vol_zyx = vol_xyz.transpose()

            assert (vol_zyx.shape == (box_zyx[1] - box_zyx[0])).all(), \
                f"Fetched volume_zyx shape ({vol_zyx.shape} doesn't match box_zyx {box_zyx.tolist()}"
            return vol_zyx
def serialize_uint64_blocks(volume):
    """
    Compress and serialize a volume of uint64.
    
    Preconditions:
      - volume.dtype == np.uint64
      - volume.ndim == 3
      
    NOTE: If volume.shape is NOT divisible by 64, the input will be copied and padded.
    
    Returns compressed_blocks, where the blocks are a flat list, in scan-order
    """
    assert volume.dtype == np.uint64
    assert volume.ndim == 3

    if (np.array(volume.shape) % 64).any():
        padding = 64 - ( np.array(volume.shape) % 64 )
        aligned_shape = volume.shape + padding
        aligned_volume = np.zeros( aligned_shape, dtype=np.uint64 )
        aligned_volume[box_to_slicing((0,0,0), volume.shape)] = volume
    else:
        aligned_volume = volume
    
    assert (np.array(aligned_volume.shape) % 64 == 0).all()
    
    block_view = view_as_blocks( aligned_volume, (64,64,64) )
    compressed_blocks = []
    for zi, yi, xi in np.ndindex(*block_view.shape[:3]):
        block = block_view[zi,yi,xi].copy('C')
        encoded_block = encode_label_block(block)

        # We compress AGAIN, with LZ4, because this seems to provide
        # an additional 2x size reduction for nearly no slowdown.
        compressed_block = lz4.frame.compress( encoded_block )
        compressed_blocks.append( compressed_block )
        del block
    
    return compressed_blocks
Пример #36
0
def serialize_uint64_blocks(volume):
    """
    Compress and serialize a volume of uint64.
    
    Preconditions:
      - volume.dtype == np.uint64
      - volume.ndim == 3
      
    NOTE: If volume.shape is NOT divisible by 64, the input will be copied and padded.
    
    Returns compressed_blocks, where the blocks are a flat list, in scan-order
    """
    assert volume.dtype == np.uint64
    assert volume.ndim == 3

    if (np.array(volume.shape) % 64).any():
        padding = 64 - ( np.array(volume.shape) % 64 )
        aligned_shape = volume.shape + padding
        aligned_volume = np.zeros( aligned_shape, dtype=np.uint64 )
        aligned_volume[box_to_slicing((0,0,0), volume.shape)] = volume
    else:
        aligned_volume = volume
    
    assert (np.array(aligned_volume.shape) % 64 == 0).all()
    
    block_view = view_as_blocks( aligned_volume, (64,64,64) )
    compressed_blocks = []
    for zi, yi, xi in np.ndindex(*block_view.shape[:3]):
        block = block_view[zi,yi,xi].copy('C')
        encoded_block = encode_label_block(block)

        # We compress AGAIN, with LZ4, because this seems to provide
        # an additional 2x size reduction for nearly no slowdown.
        compressed_block = lz4.frame.compress( encoded_block )
        compressed_blocks.append( compressed_block )
        del block
    
    return compressed_blocks
def block_stats_from_brick(block_shape, brick):
    """
    Get the count of voxels for each segment (excluding segment 0)
    in each block within the given brick, returned as a DataFrame.
    
    Returns a DataFrame with the following columns:
        ['segment_id', 'z', 'y', 'x', 'count']
        where z,y,z are the starting coordinates of each block.
    """
    block_grid = Grid(block_shape)
    
    block_dfs = []
    block_boxes = boxes_from_grid(brick.physical_box, block_grid)
    for box in block_boxes:
        clipped_box = box_intersection(box, brick.physical_box) - brick.physical_box[0]
        block_vol = brick.volume[box_to_slicing(*clipped_box)]
        counts = pd.Series(block_vol.reshape(-1)).value_counts(sort=False)
        segment_ids = counts.index.values
        counts = counts.values.astype(np.uint32)

        box = box.astype(np.int32)

        block_df = pd.DataFrame( { 'segment_id': segment_ids,
                                   'count': counts,
                                   'z': box[0][0],
                                   'y': box[0][1],
                                   'x': box[0][2] } )

        # Exclude segment 0 from output        
        block_df = block_df[block_df['segment_id'] != 0]

        block_dfs.append(block_df)

    brick_df = pd.concat(block_dfs, ignore_index=True)
    brick_df = brick_df[['segment_id', 'z', 'y', 'x', 'count']]
    assert list(brick_df.columns) == list(BLOCK_STATS_DTYPES.keys())
    return brick_df
 def get_subvolume(self, box_zyx, scale=0):
     box_zyx = np.asarray(box_zyx)
     return self.n5_dataset(scale)[box_to_slicing(*box_zyx.tolist())]
Пример #39
0
def stats_df_from_brick(column_names, brick, exclude_zero=True, exclude_halo=True):
    """
    For a given brick, return a DataFrame of statistics for the segments it contains.
    
    Args:
    
        column_names (list):
            Which statistics to compute. Anything from COLUMNS_INFO
            is permitted, except compressed_bytes.
            The 'segment' column must be first in the list.
        
        brick (Brick):
            The brick to process
        
        exclude_zero (bool):
            Discard statistics for segment=0.
        
        exclude_halo (bool):
            Exclude voxels that lie outside the Brick's logical_box.
    
    Returns:
        pd.DataFrame, with df.columns == column_names
    """
    import pandas as pd
    assert column_names[0] == 'segment'

    volume = brick.volume
    if exclude_halo and (brick.physical_box != brick.logical_box).any():
        internal_box = box_intersection( brick.logical_box, brick.physical_box ) - brick.physical_box[0]
        volume = volume[box_to_slicing(*internal_box)]
        volume = np.asarray(volume, order='C')

    # We always compute segment and voxel_count
    TRIVIAL_COLUMNS = set(['segment', 'voxel_count'])
    counts = pd.Series(volume.ravel('K')).value_counts(sort=False)
    segment_ids = counts.index.values
    assert segment_ids.dtype == volume.dtype
    
    # Other columns are computed only if needed
    if set(column_names) - TRIVIAL_COLUMNS:
        # Must remap to consecutive segments before calling extractRegionFeatures()
        remapped_ids = np.arange(len(segment_ids), dtype=np.uint32)
        mapper = dvidutils.LabelMapper( segment_ids, remapped_ids )
        remapped_vol = mapper.apply(volume)
        assert remapped_vol.dtype == np.uint32
        remapped_vol = vigra.taggedView( remapped_vol, 'zyx' )

        # Compute (local) bounding boxes.
        acc = vigra.analysis.extractRegionFeatures( np.zeros(remapped_vol.shape, np.float32), remapped_vol,
                                                    ["Count", "Coord<Minimum >", "Coord<Maximum >"]  )
        assert (acc["Count"] == counts.values).all()
        
        # Use int64: int32 is dangerous because multiplying them together quickly overflows
        local_bb_starts = acc["Coord<Minimum >"].astype(np.int64)
        local_bb_stops = (1 + acc["Coord<Maximum >"]).astype(np.int64)

        global_bb_starts = local_bb_starts + brick.physical_box[0]
        global_bb_stops = local_bb_stops + brick.physical_box[0]

        if 'block_list' in column_names:
            block_lists = []
            for remapped_id, start, stop in zip(remapped_ids, local_bb_starts, local_bb_stops):
                local_box = np.array((start, stop))
                binary = (remapped_vol[box_to_slicing(*local_box)] == remapped_id)
                
                # This downsample function respects block-alignment, since we're providing the local_box
                reduced, block_bb = downsample_binary_3d_suppress_zero(binary, BLOCK_WIDTH, local_box)
                
                local_block_indexes = np.transpose(reduced.nonzero())
                local_block_starts = BLOCK_WIDTH * (block_bb[0] + local_block_indexes)
                global_block_starts = brick.physical_box[0] + local_block_starts
                block_lists.append(global_block_starts)
    
    # Segment is always first.
    df = pd.DataFrame(columns=column_names)
    df['segment'] = segment_ids

    # Append columns in-order
    for column in column_names:
        if column == 'voxel_count':
            df['voxel_count'] = counts.values
        
        if column == 'block_list':
            df['block_list'] = block_lists
        
        if column == 'bounding_box_start':
            df['bounding_box_start'] = list(global_bb_starts) # Must convert to list or pandas complains about non-1D-data.
        
        if column == 'bounding_box_stop':
            df['bounding_box_stop'] = list(global_bb_stops) # ditto

        if column in ('z0', 'y0', 'x0'):
            df[column] = global_bb_starts[:, ('z0', 'y0', 'x0').index(column)]

        if column in ('z1', 'y1', 'x1'):
            df[column] = global_bb_stops[:, ('z1', 'y1', 'x1').index(column)]
        
        if column == 'compressed_bytes':
            raise RuntimeError("Can't compute compressed_bytes in this function.")

    if exclude_zero:
        df.drop(df.index[df.segment == 0], inplace=True)

    return df