def test_downsample_with_offset(): delete_layer() storage, data = create_layer(size=(512,512,128,1), offset=(3,7,11)) cv = CloudVolume(storage.layer_path) assert len(cv.scales) == 4 assert len(cv.available_mips) == 4 cv.commit_info() create_downsampling_tasks(MockTaskQueue(), storage.layer_path, mip=0, num_mips=3) cv.refresh_info() assert len(cv.available_mips) == 4 assert np.array_equal(cv.mip_volume_size(0), [ 512, 512, 128 ]) assert np.array_equal(cv.mip_volume_size(1), [ 256, 256, 128 ]) assert np.array_equal(cv.mip_volume_size(2), [ 128, 128, 128 ]) assert np.array_equal(cv.mip_volume_size(3), [ 64, 64, 128 ]) assert np.all(cv.mip_voxel_offset(3) == (0,0,11)) cv.mip = 0 assert np.all(cv[3:67, 7:71, 11:75] == data[0:64, 0:64, 0:64]) data_ds1 = downsample.downsample_with_averaging(data, factor=[2, 2, 1, 1]) cv.mip = 1 assert np.all(cv[1:33, 3:35, 11:75] == data_ds1[0:32, 0:32, 0:64]) data_ds2 = downsample.downsample_with_averaging(data_ds1, factor=[2, 2, 1, 1]) cv.mip = 2 assert np.all(cv[0:16, 1:17, 11:75] == data_ds2[0:16, 0:16, 0:64]) data_ds3 = downsample.downsample_with_averaging(data_ds2, factor=[2, 2, 1, 1]) cv.mip = 3 assert np.all(cv[0:8, 0:8, 11:75] == data_ds3[0:8,0:8,0:64])
def test_downsample_w_missing(): delete_layer() storage, data = create_layer(size=(512,512,128,1), offset=(3,7,11)) cv = CloudVolume(storage.layer_path) assert len(cv.scales) == 4 assert len(cv.available_mips) == 4 delete_layer() cv.commit_info() try: create_downsampling_tasks(MockTaskQueue(), storage.layer_path, mip=0, num_mips=3, fill_missing=False) except EmptyVolumeException: pass create_downsampling_tasks(MockTaskQueue(), storage.layer_path, mip=0, num_mips=3, fill_missing=True) cv.refresh_info() assert len(cv.available_mips) == 4 assert np.array_equal(cv.mip_volume_size(0), [ 512, 512, 128 ]) assert np.array_equal(cv.mip_volume_size(1), [ 256, 256, 128 ]) assert np.array_equal(cv.mip_volume_size(2), [ 128, 128, 128 ]) assert np.array_equal(cv.mip_volume_size(3), [ 64, 64, 128 ]) assert np.all(cv.mip_voxel_offset(3) == (0,0,11)) cv.mip = 0 cv.fill_missing = True assert np.count_nonzero(cv[3:67, 7:71, 11:75]) == 0
def create_quantize_tasks(src_layer, dest_layer, shape, mip=0, fill_missing=False, chunk_size=(128, 128, 64), encoding='raw', bounds=None): shape = Vec(*shape) info = create_quantized_affinity_info(src_layer, dest_layer, shape, mip, chunk_size, encoding) destvol = CloudVolume(dest_layer, info=info, mip=mip) destvol.commit_info() downsample_scales.create_downsample_scales(dest_layer, mip=mip, ds_shape=shape, chunk_size=chunk_size, encoding=encoding) if bounds is None: bounds = destvol.mip_bounds(mip) else: bounds = destvol.bbox_to_mip(bounds, mip=0, to_mip=mip) bounds = bounds.expand_to_chunk_size(destvol.mip_chunk_size(mip), destvol.mip_voxel_offset(mip)) class QuantizeTasksIterator(FinelyDividedTaskIterator): def task(self, shape, offset): return partial( QuantizeTask, source_layer_path=src_layer, dest_layer_path=dest_layer, shape=shape.tolist(), offset=offset.tolist(), fill_missing=fill_missing, mip=mip, ) def on_finish(self): destvol.provenance.sources = [src_layer] destvol.provenance.processing.append({ 'method': { 'task': 'QuantizeTask', 'source_layer_path': src_layer, 'dest_layer_path': dest_layer, 'shape': shape.tolist(), 'fill_missing': fill_missing, 'mip': mip, }, 'by': operator_contact(), 'date': strftime('%Y-%m-%d %H:%M %Z'), }) destvol.commit_provenance() return QuantizeTasksIterator(bounds, shape)
def write_layer(path, mode, layer, flip_xy, z_start, mip, factor): """Write a layer to CloudVolume. Parameter --------- path : str Filepath to the location to write the archive. layer : numpy.ndarray Image data to write to the archive. flip_xy : bool If True, order ``layer`` as [Y, X, Z]. Otherwise, order ``layer`` as [X, Y, Z]. z_start The starting index of ``layer`` within the archive. mip The number of mip levels to compute. factor The factor by which to reduce each mip level along each dimension. cv_args Arguments used to access the CloudVolume archive. """ # Transpose the axes to match the CloudVolume order if flip_xy: layer = np.transpose(layer, axes=[1, 2, 0]) else: layer = np.transpose(layer, axes=[2, 1, 0]) cv_args = dict(bounded=True, fill_missing=True, autocrop=False, cache=False, compress_cache=None, cdn_cache=False, progress=False, info=None, provenance=None, compress=(mode == 'segmentation'), non_aligned_writes=True, parallel=1) # Set the volume for each mip level for m in range(1): LOGGER.info('Writing images {}-{} to MIP level {}'.format( z_start, z_start + layer.shape[-1], mip)) # Access the CloudVolume cv = CloudVolume(path, mip=m, **cv_args) # Compute the index of this layer in the CloudVolume archive offset = cv.mip_voxel_offset(m) step = np.power(np.array(factor), m) cv_z_start = int(z_start // step[2] + offset[2]) cv_z_end = int(min(cv_z_start + layer.shape[-1], cv.shape[-2])) # Set the layer cv[:, :, cv_z_start:cv_z_end] = layer # Reduce the size of the layer to match the next mip level layer = layer[::factor[0], ::factor[1], ::factor[2]]
def create_bounding_boxes(chunk_size: tuple, chunk_overlap: tuple = (0, 0, 0), roi_start: tuple = None, roi_stop: tuple = None, layer_path: str = None, mip: int = 0, grid_size: tuple = None, verbose: bool = True): if layer_path: vol = CloudVolume(layer_path, mip=mip) # dataset shape as z,y,x dataset_size = vol.mip_shape(mip)[:3][::-1] dataset_offset = vol.mip_voxel_offset(mip)[::-1] if roi_stop is None: roi_stop = Vec( *[o + s for o, s in zip(dataset_offset, dataset_size)]) if roi_start is None: # note that we normally start from -overlap to keep the chunks aligned! roi_start = dataset_offset - chunk_overlap chunk_size = Vec(*chunk_size) chunk_overlap = Vec(*chunk_overlap) stride = chunk_size - chunk_overlap if isinstance(grid_size, tuple): grid_size = Vec(*grid_size) assert roi_start is not None if isinstance(roi_start, tuple): roi_start = Vec(*roi_start) if roi_stop is None: roi_stop = roi_start + stride * grid_size + chunk_overlap elif isinstance(roi_stop, tuple): roi_stop = Vec(*roi_stop) roi_size = roi_stop - roi_start if grid_size is None: grid_size = (roi_size - chunk_overlap) // stride + 1 # the stride should not be zero if there is more than one chunks for g, s in zip(grid_size, stride): if g > 1: assert s > 0 final_output_stop = roi_start + (grid_size - 1) * stride + chunk_size if verbose: print('\nroi start: ', roi_start) print('stride: ', stride) print('grid size: ', grid_size) print('final output stop: ', final_output_stop) bboxes = [] for (z, y, x) in product(range(grid_size[0]), range(grid_size[1]), range(grid_size[2])): chunk_start = roi_start + Vec(z, y, x) * stride bbox = Bbox.from_delta(chunk_start, chunk_size) bboxes.append(bbox) return bboxes
def create_bounding_boxes(chunk_size:tuple, overlap: tuple=(0,0,0), start:tuple=None, layer_path: str=None, mip:int=0, grid_size: tuple=None, verbose: bool=True): if layer_path: vol = CloudVolume(layer_path, mip=mip) # dataset shape as z,y,x dataset_shape = vol.mip_shape(mip)[:3][::-1] dataset_offset = vol.mip_voxel_offset(mip)[::-1] chunk_size = Vec(*chunk_size) overlap = Vec(*overlap) stride = chunk_size - overlap if start is None: # note that we normally start from -overlap to keep the chunks aligned! start = dataset_offset - overlap volume_size = dataset_shape else: start = Vec(*start) if grid_size is None: volume_size = dataset_shape - (start - dataset_offset) grid_size = (volume_size-overlap) // stride + 1 # the stride should not be zero if there is more than one chunks for g, s in zip(grid_size, stride): if g > 1: assert s > 0 if verbose: print('\nstart: ', start) print('stride: ', stride) print('grid size: ', grid_size) print('chunk_size: ', chunk_size, '\n') bboxes = [] for (z, y, x) in tqdm(product(range(grid_size[0]), range(grid_size[1]), range(grid_size[2]))): chunk_start = start + Vec(z, y, x) * stride bbox = Bbox.from_delta(chunk_start, chunk_size) bboxes.append( bbox ) return bboxes
def mpi_cloud_write(f_sublist, c_path, start_z, mip, factor, chunk_size, flip_xy, cast, cv_args): ''' f_sublist is a List[List[str]], inner list size == z_batch''' for fb in tqdm(f_sublist): if flip_xy: loaded_vol = np.stack( [np.transpose(io.imread(f)) for f in tqdm(fb, desc='loading')], axis=2) else: loaded_vol = np.stack([io.imread(f) for f in tqdm(fb, 'loading')], axis=2) if cast: loaded_vol = loaded_vol.astype(np.uint32) diff = chunk_size[2] - loaded_vol.shape[2] if diff > 0: loaded_vol = np.pad(loaded_vol, ((0, 0), (0, 0), (0, diff)), 'constant', constant_values=0) curr_z = _find_index(fb[0]) actual_z = curr_z - start_z for m in range(mip + 1): cv = CloudVolume(c_path, mip=m, **cv_args) offset = cv.mip_voxel_offset(m) step = np.array(factor)**m cv_z_start = actual_z // step[2] + offset[2] # diff = chunk_size[2] - loaded_vol.shape[2] # if diff > 0: # loaded_vol = np.pad(loaded_vol, ((0,0), (0,0), (0, diff)), 'constant', constant_values=0) # cv_z_size = loaded_vol.shape[2] // step[2] cv_z_size = loaded_vol.shape[2] # logging.warn('mip %d, writing %s %s', m, cv_z_start, cv_z_size) # cv_z_size = loaded_vol.shape[2] # if cv_z_size < chunk_size[2]: # cv[:, :, cv_z_start:cv_z_start + chunk_size[2]] = 0 cv[:, :, cv_z_start:cv_z_start + cv_z_size] = loaded_vol loaded_vol = loaded_vol[::factor[0], ::factor[1], ::factor[2]] del loaded_vol return
def from_manual_setup(cls, chunk_size: Union[Vec, tuple], chunk_overlap: Union[Vec, tuple] = Vec(0, 0, 0), roi_start: Union[Vec, tuple] = None, roi_stop: Union[Vec, tuple] = None, roi_size: Union[Vec, tuple] = None, grid_size: Union[Vec, tuple] = None, respect_chunk_size: bool = True, aligned_block_size: Union[Vec, tuple] = None, layer_path: str = None, mip: int = 0): if layer_path: if layer_path.endswith('.h5'): assert os.path.exists(layer_path) with h5py.File(layer_path, mode='r') as file: for key in file.keys(): if 'offset' in key: roi_start = Vec(*(file[key])) elif 'voxel_size' not in key: if roi_size is None: roi_size = Vec(*file[key].shape[-3:]) if roi_start is None: roi_start = Vec(0, 0, 0) roi_stop = roi_start + roi_size else: vol = CloudVolume(layer_path, mip=mip) # dataset shape as z,y,x dataset_size = vol.mip_shape(mip)[:3][::-1] dataset_offset = vol.mip_voxel_offset(mip)[::-1] if roi_size is None: roi_size = Vec(*dataset_size) if roi_stop is None: roi_stop = Vec( *[o + s for o, s in zip(dataset_offset, dataset_size)]) if roi_start is None: # note that we normally start from -overlap to keep the chunks aligned! roi_start = dataset_offset - chunk_overlap assert roi_start is not None if roi_size is None and roi_stop is None and grid_size is None: grid_size = Vec(1, 1, 1) if isinstance(chunk_size, tuple): chunk_size = Vec(*chunk_size) if isinstance(chunk_overlap, tuple): chunk_overlap = Vec(*chunk_overlap) if isinstance(roi_start, tuple): roi_start = Vec(*roi_start) if isinstance(roi_size, tuple): roi_size = Vec(*roi_size) if isinstance(grid_size, tuple): grid_size = Vec(*grid_size) if isinstance(roi_stop, tuple): roi_stop = Vec(*roi_stop) stride = chunk_size - chunk_overlap if roi_stop is None: roi_stop = roi_start + stride * grid_size + chunk_overlap if aligned_block_size is not None: if not isinstance(aligned_block_size, Vec): aligned_block_size = Vec(*aligned_block_size) assert np.all(aligned_block_size <= chunk_size) assert np.alltrue(chunk_size % aligned_block_size == 0) roi_start -= roi_start % aligned_block_size assert len(aligned_block_size) == 3 assert len(roi_stop) == 3 for idx in range(3): if roi_stop[idx] % aligned_block_size[idx] > 0: roi_stop[idx] += aligned_block_size[ idx] - roi_stop[idx] % aligned_block_size[idx] if roi_size is None: roi_size = roi_stop - roi_start if grid_size is None: grid_size = (roi_size - chunk_overlap) / stride grid_size = tuple(ceil(x) for x in grid_size) grid_size = Vec(*grid_size) # the stride should not be zero if there is more than one chunks for g, s in zip(grid_size, stride): if g > 1: assert s > 0 final_output_stop = roi_start + (grid_size - 1) * stride + chunk_size logging.info(f'\nroi start: {roi_start}') logging.info(f'stride: {stride}') logging.info(f'grid size: {grid_size}') logging.info(f'final output stop: {final_output_stop}') print('grid size: ', grid_size) bboxes = [] for (gz, gy, gx) in product(range(grid_size[0]), range(grid_size[1]), range(grid_size[2])): chunk_start = roi_start + Vec(gz, gy, gx) * stride bbox = Bbox.from_delta(chunk_start, chunk_size) if not respect_chunk_size: bbox.maxpt = np.minimum(bbox.maxpt, roi_stop) bboxes.append(bbox) return cls(bboxes)
def save_cloudvolume(img, path, mode, origin, mip=0, resolution=None, flip_xy=False, voxel_offset=None, volume_size=None, chunk_size=(64, 64, 64), factor=(2, 2, 2)): """Save images to a CloudVolume layer. Parameters ---------- img : array_like The image/volume to save. path : str The directory to write the layer to. mode : {'image', 'segmentation'} """ if mode not in ['image', 'segmentation']: raise ValueError( 'Invalid mode {}. Must be one of "image", "segmentation"'.format( mode)) if not re.search(r'^[a-zA-Z\d]+://$', path.split(os.path.sep)[0]): raise ValueError('No protocol specified in {}.'.format(path)) if not os.path.isfile(os.path.join(path, 'info')): if MPI.COMM_WORLD.Get_rank() == 0: if mode == 'image': info = CloudVolume.create_new_info( num_channels=img.shape[-1], layer_type='image', data_type='uint8', encoding='raw', resolution=resolution, voxel_offset=offset, volume_size=list(volume_size), chunk_size=chunk_size, max_mip=mip, factor=factor) cv_args = dict(bounded=True, fill_missing=True, autocrop=False, cache=False, compress_cache=None, cdn_cache=False, progress=False, info=info, provenance=None, compress=True, non_aligned_writes=True, parallel=1) cv = CloudVolume(path, mip=0, **cv_args) cv.commit_info() elif mode == 'segmentation': info = CloudVolume.create_new_info( num_channels=img.shape[-1], layer_type='segmentation', data_type='uint32', encoding='compressed_segmentation', resolution=resolution, voxel_offset=offset, volume_size=list(volume_size), chunk_size=chunk_size, max_mip=mip, factor=factor) if mip >= 1: for i in range(1, mip + 1): info['scales'][i]['compressed_segmentation_block_size'] = \ info['scales'][0]['compressed_segmentation_block_size'] cv_args = dict(bounded=True, fill_missing=True, autocrop=False, cache=False, compress_cache=None, cdn_cache=False, progress=False, info=info, provenance=None, compress=True, non_aligned_writes=True, parallel=1) cv = CloudVolume(path, mip=0, **cv_args) cv.commit_info() if MPI.COMM_WORLD.Get_size() > 1: MPI.COMM_WORLD.barrier() if flip_xy: img = np.transpose(img, axes=(1, 2, 0)) else: img = np.transpose(img, axes=(2, 1, 0)) cv_args = dict(bounded=True, fill_missing=True, autocrop=False, cache=False, compress_cache=None, cdn_cache=False, progress=False, info=None, provenance=None, compress=(mode == 'segmentation'), non_aligned_writes=True, parallel=1) for m in range(mip + 1): cv = CloudVolume(path, mip=m, **cv_args) offset = cv.mip_voxel_offset(m) step = np.power(np.asarray(factor), m) cv_z_start = origin[0] // step[2] + offset[2] cv_z_size = img.shape[2] cv[:, :, cv_z_start:cv_z_start + cv_z_size] = loaded_vol img = img[::factor[0], ::factor[1], ::factor[2]] return cv
def write_subvolume(path, subvolume, flip_xy, z_start, mip, factor): """Write an image to CloudVolume. Parameter --------- path : str Filepath to the location to write the archive. subvolume : numpy.ndarray Image data to write to the archive. flip_xy : bool If True, order ``layer`` as [Y, X, Z]. Otherwise, order ``layer`` as [X, Y, Z]. z_start The starting index of ``layer`` within the archive. mip The number of mip levels to compute. factor The factor by which to reduce each mip level along each dimension. """ # Transpose the axes to match the CloudVolume order if subvolume.ndim == 2: subvolume = np.expand_dims(subvolume, 0) if flip_xy: subvolume = np.transpose(subvolume, axes=[1, 2, 0]) else: subvolume = np.transpose(subvolume, axes=[2, 1, 0]) if subvolume.ndim == 3: subvolume = np.expand_dims(subvolume, -1) cv_args = dict(bounded=True, fill_missing=True, autocrop=False, cache=False, compress_cache=None, cdn_cache=False, progress=False, info=None, provenance=None, compress=True, non_aligned_writes=True, parallel=1) # Set the volume for each mip level for m in range(1): # Access the CloudVolume LOGGER.info('Writing MIP level {}.'.format(mip)) cv = CloudVolume(path, mip=m, **cv_args) # Compute the index of this layer in the CloudVolume archive offset = cv.mip_voxel_offset(m) step = np.power(np.array(factor), m) cv_z_start = int(z_start // step[2] + offset[2]) cv_z_end = int(min(cv_z_start + subvolume.shape[-2], cv.shape[-2])) # Set the layer cv[:, :, cv_z_start:cv_z_end] = subvolume # Reduce the size of the layer to match the next mip level subvolume = subvolume[::factor[0], ::factor[1], ::factor[2]]