Exemplo n.º 1
0
def main(h5_path: str, n5_path: str, skip_confirmation: bool = False):
    crop_name = Path(split_by_suffix(h5_path, ('.h5', ))[0]).parent.name
    num_n5_suffixes = len(
        tuple(filter(lambda v: v.endswith('.n5'),
                     Path(n5_path).parts)))
    if num_n5_suffixes != 1:
        raise ValueError(
            'The n5 path must have one and only one element ending with ".n5". The path given has {num_n5_suffixes}'
        )

    dataset_name = Path(split_by_suffix(n5_path, ('.n5', ))[0]).name
    sheet_df = get_sheet_df(credfile, sheetname, sheetpage)
    crop_attrs = generate_crop_attrs(dataset_name, crop_name, sheet_df,
                                     classNameDict)
    gt_data = read(h5_path)
    if not skip_confirmation:
        print(
            f'The data created at {n5_path} will have the following attributes: '
        )
        print(crop_attrs)
        click.confirm('Do you wish to continue?', default=True)

    output_array = access(n5_path,
                          shape=gt_data.shape,
                          dtype=gt_data.dtype,
                          mode='a')
    output_array[:] = gt_data
    output_array.attrs.update(**crop_attrs)
    log.info(f'Saved data and attributes to {n5_path}')
    return 0
Exemplo n.º 2
0
def test_store_blocks():
    data = da.arange(256).reshape(16, 16).rechunk((4, 4))
    store = tempfile.mkdtemp(suffix='.zarr')
    atexit.register(shutil.rmtree, store)
    z = zarr.open(store, mode='w', shape=data.shape, chunks=data.chunksize)
    dask.delayed(store_blocks(data, z)).compute()
    assert np.array_equal(read(store)[:], data.compute())
Exemplo n.º 3
0
def test_accessing_array_zarr_n5():
    store = tempfile.mkdtemp(suffix='.n5')
    atexit.register(shutil.rmtree, store)
    data = np.random.randint(0, 255, size=(100, ), dtype='uint8')
    z = zarr.open(store, mode='w', shape=data.shape, chunks=10)
    z[:] = data
    assert np.array_equal(read(store)[:], data)

    darr = read_dask(store, chunks=(10, ))
    assert (darr.compute() == data).all
Exemplo n.º 4
0
def test_accessing_array_zarr_n5():
    store = 'data/array.n5'
    data = np.random.randint(0, 255, size=(100, ), dtype='uint8')
    z = zarr.open(store, mode='w', shape=data.shape, chunks=10)
    z[:] = data
    assert np.array_equal(read(store)[:], data)

    darr = daskify(store, chunks=(10, ))
    assert (darr.compute() == data).all
    shutil.rmtree(store)
def adjust_coordinates_multiscale(
    url: str,
    new_scale: Optional[Dict[str, float]] = None,
    new_translate: Optional[Dict[str, float]] = None,
    new_units: Optional[Dict[str, str]] = None,
) -> List[Dict[str, Union[float, str]]]:

    multiscales_index = 0
    multiscales_meta = cosem.MultiscaleMeta(**read(url).attrs["multiscales"])
    array_metas = {
        entry.path: entry.transform
        for entry in multiscales_meta[multiscales_index].datasets
    }

    # update the transform of the first element of multiscales
    base = array_metas[multiscales_meta[multiscales_index].datasets[0].path]

    return base
Exemplo n.º 6
0
def copy_crop(gt_path: str, cosemSheet: pd.DataFrame,
              classNameDict: Dict[str, Any]):
    dataset_name = Path(gt_path).parts[3]
    cropname_matches = cropname_matcher.findall(gt_path)
    if len(cropname_matches) == 0:
        raise ValueError(
            f'Could not infer the crop name from {gt_path}. Crop names should take the form `crop130`.'
        )
    else:
        crop_name = cropname_matches[0]

    labelList = createLabelList(dataset_name, crop_name, cosemSheet,
                                classNameDict)
    offset = getOffset(dataset_name, crop_name, cosemSheet)
    base_resolution = getBaseResolution(dataset_name, crop_name, cosemSheet)
    annotation_resolution = getAnnotationResolution(dataset_name, crop_name,
                                                    cosemSheet)
    transform = SpatialTransform(
        scale=annotation_resolution[::-1],
        translate=[r * o for r, o in zip(base_resolution, offset)][::-1],
        axes=('x', 'y', 'z')[::-1],
        units=('nm', 'nm', 'nm'))
    pixr = PixelResolution(unit='nm', dimensions=annotation_resolution)
    new_path = update_path(gt_path)

    old_array = read(gt_path)
    new_attrs = {
        'pixelResolution': pixr.dict(),
        'transform': transform.dict(),
        **labelList
    }
    new_array = access(new_path,
                       shape=old_array.shape,
                       dtype=old_array.dtype,
                       mode='a')
    new_array[:] = old_array[:]
    new_array.attrs.update(**new_attrs)
num_workers = 120


def rescale_and_invert(arr, min, max):
    return arr.map_blocks(rescale_intensity,
                          in_range=(min, max),
                          out_range=(255, 0),
                          dtype='uint8')


if __name__ == '__main__':

    source = '/groups/cellmap/cellmap/data/jrc_ctl-id8-1/jrc_ctl-id8-1.n5/volumes/raw'
    dest = '/groups/cellmap/cellmap/data/jrc_ctl-id8-1/jrc_ctl-id8-1.n5/volumes/raw_normalized'

    source_group = read(source)
    target_group = access(dest, mode='a')
    target_group.attrs.update(**source_group.attrs)

    with get_cluster() as clust, Client(clust) as cl:
        print(cl.cluster.dashboard_link)
        cl.cluster.scale(num_workers)
        _arr = da.from_array(source_group['s0'],
                             chunks=[x * 4 for x in source_group['s0'].chunks])
        min, max = cl.compute((_arr.min(), _arr.max()), sync=True)

        for array_path, arr in source_group.arrays():
            arr_dest = access(dest + f'/{array_path}',
                              shape=arr.shape,
                              dtype='uint8',
                              chunks=arr.chunks,
import os

num_workers = 4
tpw = 2
chunk_locking = True
name = f'lsf_nw-{num_workers}_tpw-{tpw}_chunk-locking-{chunk_locking}'
levels = list(range(1,6))
crop = (slice(8192),) * 3

def reducer(v, **kwargs):
    return np.mean(v, dtype='float32', **kwargs)

source_path = '/nrs/flyem/bench/Z0720_07m_BR.n5/render/Sec30/v1_acquire_trimmed_align___20210413_194018/s0'
target_path = '/nrs/flyem/bench/Z0720_07m_BR.n5/test_dask_down/'

store_chunks = read(source_path, storage_options={'normalize_keys': False}).chunks
read_chunks=(1024,) * 3

data = read_xarray(source_path, storage_options={'normalize_keys': False}, chunks=read_chunks, name='test_data')[crop]

multi = get(levels, multiscale(data, reducer, (2,2,2)))

if not chunk_locking:
    for m in multi:
        m.data = ensure_minimum_chunksize(m.data, store_chunks)

    
multi_store = Multiscales(name, {f's{l}' : m for l,m in zip(levels, multi)})

if __name__ == '__main__':
    with get_cluster(threads_per_worker=tpw) as cluster, Client(cluster) as cl: