Python scatter_series 예제들, wmem.utils.scatter_series Python 예제들

예제 #1

0

파일 보기

파일: splitblocks.py 프로젝트: michielkleinnijenhuis/EM

def splitblocks(
    h5path_in,
    dset_name,
    dataslices=None,
    blocksize=[500, 500, 500],
    margin=[20, 20, 20],
    blockrange=[],
    usempi=False,
    outputdir='',
    save_steps=False,
    protective=False,
):
    """"Convert a directory of tifs to an hdf5 stack."""

    # Prepare for processing with MPI.
    mpi_info = utils.get_mpi_info(usempi)

    # Determine the outputpaths.
    basepath, h5path_dset = h5path_in.split('.h5/')
    datadir, fname = os.path.split(basepath)
    postfix = fname.split(dset_name)[-1]
    if not outputdir:
        blockdir = 'blocks_{:04d}'.format(blocksize[0])
        outputdir = os.path.join(datadir, blockdir)
    utils.mkdir_p(outputdir)
    fname = '{}_{}{}.h5'.format(dset_name, '{}', postfix)
    h5path_tpl = os.path.join(outputdir, fname, h5path_dset)

    # Open data for reading.
    h5_info = utils.h5_load(h5path_in, comm=mpi_info['comm'])
    h5file_in, ds_in, elsize, axlab = h5_info

    # Divide the data into a series of blocks.
    blocks = get_blocks(ds_in.shape, blocksize, margin, h5path_tpl, dataslices)
    if blockrange:
        blocks = blocks[blockrange[0]:blockrange[1]]
    series = np.array(range(0, len(blocks)), dtype=int)
    if mpi_info['enabled']:
        series = utils.scatter_series(mpi_info, series)[0]

    # Write blocks to the outputfile(s).
    for blocknr in series:
        block = blocks[blocknr]
        write_block(ds_in, elsize, axlab, block)

    # Close the h5 files or return the output array.
    try:
        h5file_in.close()
    except (ValueError, AttributeError):
        pass
    except UnboundLocalError:
        pass

예제 #2

0

파일 보기

파일: connected_components.py 프로젝트: michielkleinnijenhuis/EM

def CC_2Dprops(
        h5path_labels,
        basename,
        map_propnames,
        usempi=False,
        h5path_out='',
        protective=False,
        ):
    """Map the labels/properties."""

    # check output paths
    if '.h5' in h5path_out:
        for propname in map_propnames:
            h5path_prop = os.path.join(h5path_out, propname)
            status, info = utils.h5_check(h5path_out, protective)
            print(info)
            if status == "CANCELLED":
                return

    # open data for reading
    h5file_in, ds_in, elsize, axlab = utils.h5_load(h5path_labels)

    # prepare mpi
    n_props = len(map_propnames)
    series = np.array(range(0, n_props), dtype=int)
    mpi_info = utils.get_mpi_info(usempi)
    if mpi_info['enabled']:
        series = utils.scatter_series(mpi_info, series)[0]

    fws = {}
    for i in series:
        propname = map_propnames[i]
        print("processing prop %s" % propname)

        nppath = '{}_{}.npy'.format(basename, propname)
        fws[propname] = np.load(nppath)

        # open data for writing
        h5path_prop = os.path.join(h5path_out, propname)
        h5file_prop, ds_prop = utils.h5_write(None, ds_in.shape,
                                              fws[propname].dtype,
                                              h5path_prop,
                                              element_size_um=elsize,
                                              axislabels=axlab,
                                              comm=mpi_info['comm'])

        ds_prop[:] = fws[propname][ds_in[:]]

        h5file_prop.close()

    # close and return
    h5file_in.close()

예제 #3

0

파일 보기

def evaluate_overlaps(
        h5path_in,
        slicedim,
        offsets,
        threshold_overlap,
        do_map_labels=False,
        h5path_mm='',
        min_labelsize=0,
        close=None,
        relabel_from=0,
        usempi=False,
        h5path_out='',
        save_steps=False,
        protective=False,
        ):
    """Check for slicewise overlaps between labels."""

    # prepare mpi  # TODO: could allow selection of slices/subset here
    mpi_info = utils.get_mpi_info(usempi)

    # open data for reading
    h5file_in, ds_in, _, _ = utils.h5_load(h5path_in, comm=mpi_info['comm'])

    n_slices = ds_in.shape[slicedim] - offsets
    series = np.array(range(0, n_slices), dtype=int)
    if mpi_info['enabled']:
        series = utils.scatter_series(mpi_info, series)[0]

    # merge overlapping neighbours
    labelsets = {}
    for i in series:
        print("processing slice {}".format(i))
        for j in range(1, offsets):

            data_section = utils.get_slice(ds_in, i, slicedim)
            nb_section = utils.get_slice(ds_in, i+j, slicedim)

            labelsets = merge_neighbours(labelsets,
                                         data_section, nb_section,
                                         threshold_overlap)

    # dump the list of overlapping neighbours in a pickle
    h5root = h5file_in.filename.split('.h5')[0]
    ds_out_name = os.path.split(h5path_out)[1]
    mname = "host-{}_rank-{:02d}".format(socket.gethostname(), mpi_info['rank'])
    lsroot = '{}_{}_{}'.format(h5root, ds_out_name, mname)
    utils.write_labelsets(labelsets, lsroot, ['pickle'])

    h5file_in.close()

    # wait for all processes to finish
    if mpi_info['enabled']:
        mpi_info['comm'].Barrier()

    # let one process combine the overlaps found in the separate processes
    if mpi_info['rank'] == 0:
        lsroot = '{}_{}'.format(h5root, ds_out_name)
        match = "{}_host*_rank*.pickle".format(lsroot)
        infiles = glob.glob(match)
        for ppath in infiles:
            with open(ppath, "r") as f:
                newlabelsets = pickle.load(f)
            for lsk, lsv in newlabelsets.items():
                labelsets = utils.classify_label_set(labelsets, lsv, lsk)

        utils.write_labelsets(labelsets, lsroot, ['txt', 'pickle'])

        if do_map_labels:
            map_labels(h5path_in, h5path_mm,
                       min_labelsize, close, relabel_from,
                       h5path_out, save_steps, protective)

예제 #4

0

파일 보기

파일: connected_components.py 프로젝트: michielkleinnijenhuis/EM

def CC_2Dfilter(
        h5path_labels,
        map_propnames,
        criteria,
        h5path_int='',
        slicedim=0,
        usempi=False,
        outputfile='',
        protective=False,
        ):
    """Get forward mapping of labels/properties filtered by criteria."""

    (min_area,
     max_area,
     max_intensity_mb,
     max_eccentricity,
     min_solidity,
     min_euler_number,
     min_extent) = criteria

    # prepare mpi
    mpi_info = utils.get_mpi_info(usempi)

    # TODO: check output path

    # open data for reading
    h5file_mm, ds_mm, _, _ = utils.h5_load(h5path_labels, comm=mpi_info['comm'])
    if h5path_int:
        h5file_mb, ds_mb, _, _ = utils.h5_load(h5path_int, comm=mpi_info['comm'])
    else:
        ds_mb = None
    # mask used as intensity image in mean_intensity criterium

    # get the maximum labelvalue in the input
    root = h5path_labels.split('.h5')[0]
    maxlabel = get_maxlabel(root, ds_mm)

    # prepare mpi
    n_slices = ds_mm.shape[slicedim]
    series = np.array(range(0, n_slices), dtype=int)
    if mpi_info['enabled']:
        series = utils.scatter_series(mpi_info, series)[0]
        if mpi_info['rank'] == 0:
            fws_reduced = np.zeros((maxlabel + 1, len(map_propnames)),
                                   dtype='float')
        else:
            fws_reduced = None

    fws = np.zeros((maxlabel + 1, len(map_propnames)),
                   dtype='float')

    mapall = criteria.count(None) == len(criteria)

    # pick labels observing the constraints
    go2D = ((max_eccentricity is not None) or
            (min_solidity is not None) or
            (min_euler_number is not None) or
            mapall)
    if go2D:

        for i in series:
            slcMM = utils.get_slice(ds_mm, i, slicedim)
            if h5path_int:
                slcMB = utils.get_slice(ds_mb, i, slicedim)  # , 'bool'
            else:
                slcMB = None
            fws = check_constraints(slcMM, fws, map_propnames,
                                    criteria, slcMB, mapall)
        if mpi_info['enabled']:
            mpi_info['comm'].Reduce(fws, fws_reduced, op=MPI.MAX, root=0)
        else:
            fws_reduced = fws

    else:

        if mpi_info['rank'] == 0:
            fws = check_constraints(ds_mm, fws, map_propnames,
                                    criteria, ds_mb, mapall)
            fws_reduced = fws

    # write the forward maps to a numpy vector
    if mpi_info['rank'] == 0:
        slc = int(n_slices/2)
        slcMM = ds_mm[slc, :, :]
        slcMB = ds_mb[slc, :, :] if h5path_int else None
        datatypes = get_prop_datatypes(slcMM, map_propnames, slcMB)
        for i, propname in enumerate(map_propnames):
            root = outputfile.split('.h5')[0]
            nppath = '{}_{}.npy'.format(root, propname)
            outarray = np.array(fws_reduced[:, i], dtype=datatypes[i])
            np.save(nppath, outarray)

    # close and return
    h5file_mm.close()
    if h5path_int:
        h5file_mb.close()

    if mpi_info['rank'] == 0:
        return outarray

예제 #5

0

파일 보기

파일: connected_components.py 프로젝트: michielkleinnijenhuis/EM

def CC_2D(
        h5path_in,
        h5path_mask='',
        slicedim=0,
        usempi=False,
        h5path_out='',
        protective=False,
        ):
    """Label connected components in all slices."""

    # check output path
    if '.h5' in h5path_out:
        status, info = utils.h5_check(h5path_out, protective)
        print(info)
        if status == "CANCELLED":
            return

    # open data for reading
    h5file_mm, ds_mm, elsize, axlab = utils.h5_load(h5path_in)
    if h5path_mask:
        h5file_md, ds_md, _, _ = utils.h5_load(h5path_mask)

    # prepare mpi  # TODO: could allow selection of slices/subset here
    mpi_info = utils.get_mpi_info(usempi)
    n_slices = ds_mm.shape[slicedim]
    series = np.array(range(0, n_slices), dtype=int)
    if mpi_info['enabled']:
        series = utils.scatter_series(mpi_info, series)[0]

    # open data for writing
    h5file_out, ds_out = utils.h5_write(None, ds_mm.shape, 'uint32',
                                        h5path_out,
                                        element_size_um=elsize,
                                        axislabels=axlab,
                                        comm=mpi_info['comm'])

    # slicewise labeling
    maxlabel = 0
    for i in series:

        slcMM = utils.get_slice(ds_mm, i, slicedim, 'bool')
        if h5path_mask:
            slcMD = utils.get_slice(ds_md, i, slicedim, 'bool')
            labels, num = label(np.logical_and(~slcMM, slcMD), return_num=True)
        else:
            labels, num = label(~slcMM, return_num=True)
        print("found %d labels in slice %d" % (num, i))

        if mpi_info['enabled']:
            # NOTE: assumed max number of labels in slice is 10000
            labels[~slcMM] += 10000 * i
            if i == n_slices - 1:
                maxlabel = np.amax(labels)
        else:
            labels[~slcMM] += maxlabel
            maxlabel += num

        if slicedim == 0:
            ds_out[i, :, :] = labels
        elif slicedim == 1:
            ds_out[:, i, :] = labels
        elif slicedim == 2:
            ds_out[:, :, i] = labels

    # save the maximum labelvalue in the dataset
    print("found %d labels" % (maxlabel))
    if mpi_info['rank'] == mpi_info['size'] - 1:
        root = h5path_out.split('.h5')[0]
        fpath = root + '.npy'
        np.save(fpath, np.array([maxlabel]))

    # close and return
    try:
        h5file_mm.close()
        h5file_out.close()
        if h5path_mask:
            h5file_md.close()
    except (ValueError, AttributeError):
        return ds_out

예제 #6

0

파일 보기

def downsample_slices(
        inputdir,
        outputdir,
        regex='*.tif',
        ds_factor=4,
        dataslices=None,
        usempi=False,
        protective=False,
        ):
    """Downsample a series of 2D images."""

    if '.h5' in outputdir:
        status, info = utils.h5_check(outputdir, protective)
        print(info)
        if status == "CANCELLED":
            return

    if '.h5' in inputdir:  # FIXME: assumed zyx for now

        h5file_in, ds_in, elsize, axlab = utils.h5_load(inputdir)
        zyxdims = ds_in.shape

    else:

        # Get the list of input filepaths.
        files = sorted(glob.glob(os.path.join(inputdir, regex)))
        zyxdims = [len(files)] + list(io.imread(files[0]).shape)
        axlab = 'zyx'

    if '.h5' in outputdir:

        elsize[1] = elsize[1] / ds_factor
        elsize[2] = elsize[2] / ds_factor
        outsize = [ds_in.shape[0],
                   ds_in.shape[1] / ds_factor,
                   ds_in.shape[2] / ds_factor]
        h5file_out, ds_out = utils.h5_write(None, outsize, ds_in.dtype,
                                            outputdir,
                                            element_size_um=elsize,
                                            axislabels=axlab)

    else:

        # Get the list of output filepaths.
        utils.mkdir_p(outputdir)
        outpaths = []
        for fpath in files:
            root, ext = os.path.splitext(fpath)
            tail = os.path.split(root)[1]
            outpaths.append(os.path.join(outputdir, tail + ext))
        # Check if any output paths already exist.
        status = utils.output_check_dir(outpaths, protective)
        if status == "CANCELLED":
            return

    # Get the slice objects for the input data.
    slices = utils.get_slice_objects_prc(dataslices, zyxdims)
    # Prepare for processing with MPI.
    mpi_info = utils.get_mpi_info(usempi)
    series = np.array(range(slices[0].start,
                            slices[0].stop,
                            slices[0].step), dtype=int)
    if mpi_info['enabled']:
        series = utils.scatter_series(mpi_info, series)[0]

    # Downsample and save the images.
    for slc in series:
        if '.h5' in inputdir:
            sub = ds_in[slc, slices[1], slices[2]]
        else:
            sub = io.imread(files[slc])[slices[1], slices[2]]

        img_ds = resize(sub, (sub.shape[0] / ds_factor,
                              sub.shape[1] / ds_factor))

        if '.h5' in outputdir:
            ds_out[slc, :, :] = img_ds
        else:
            imsave(outpaths[slc], img_ds)
#         downsample_image(outpaths[slc], sub, ds_factor)

    try:
        h5file_in.close()
        h5file_out.close()
    except (ValueError, AttributeError):
        pass

예제 #7

0

파일 보기

파일: mergeblocks.py 프로젝트: michielkleinnijenhuis/EM

def mergeblocks(
    h5paths_in,
    blockoffset=[0, 0, 0],
    blocksize=[],
    margin=[0, 0, 0],
    fullsize=[],
    is_labelimage=False,
    relabel=False,
    neighbourmerge=False,
    save_fwmap=False,
    blockreduce=[],
    func='np.amax',
    datatype='',
    usempi=False,
    h5path_out='',
    save_steps=False,
    protective=False,
):
    """Merge blocks of data into a single hdf5 file."""

    # prepare mpi
    mpi_info = utils.get_mpi_info(usempi)
    series = np.array(range(0, len(h5paths_in)), dtype=int)
    if mpi_info['enabled']:
        series = utils.scatter_series(mpi_info, series)[0]

    # TODO: save_steps
    # check output paths
    outpaths = {'out': h5path_out}
    status = utils.output_check(outpaths, save_steps, protective)
    if status == "CANCELLED":
        return

    # open data for reading
    h5file_in, ds_in, elsize, axlab = utils.h5_load(h5paths_in[0],
                                                    comm=mpi_info['comm'])
    try:
        ndim = ds_in.ndim
    except AttributeError:
        ndim = len(ds_in.dims)

    # get the size of the outputfile
    # TODO: option to derive fullsize from dset_names?
    if blockreduce:
        datasize = np.subtract(fullsize, blockoffset)
        outsize = [
            int(np.ceil(d / np.float(b)))
            for d, b in zip(datasize, blockreduce)
        ]
        elsize = [e * b for e, b in zip(elsize, blockreduce)]
    else:  # FIXME: 'zyx(c)' stack assumed
        outsize = np.subtract(fullsize, blockoffset)

    if ndim == 4:
        outsize = list(outsize) + [ds_in.shape[3]]  # TODO: flexible insert

    datatype = datatype or ds_in.dtype
    chunks = ds_in.chunks or None

    h5file_in.close()

    # open data for writing
    h5file_out, ds_out = utils.h5_write(data=None,
                                        shape=outsize,
                                        dtype=datatype,
                                        h5path_full=h5path_out,
                                        chunks=chunks,
                                        element_size_um=elsize,
                                        axislabels=axlab,
                                        comm=mpi_info['comm'])

    # merge the datasets
    maxlabel = 0
    for i in series:
        h5path_in = h5paths_in[i]
        try:
            maxlabel = process_block(h5path_in, ndim, blockreduce, func,
                                     blockoffset, blocksize, margin, fullsize,
                                     ds_out, is_labelimage, relabel,
                                     neighbourmerge, save_fwmap, maxlabel,
                                     usempi, mpi_info)
            print('processed block {:03d}: {}'.format(i, h5path_in))
        except Exception as e:
            print('failed block {:03d}: {}'.format(i, h5path_in))
            print(e)

    # close and return
    try:
        h5file_out.close()
    except (ValueError, AttributeError):
        return ds_out

예제 #8

0

파일 보기

파일: series2stack.py 프로젝트: michielkleinnijenhuis/EM

def series2stack(
        inputdir,
        regex='*.tif',
        element_size_um=[None, None, None],
        outlayout='zyx',
        datatype='',
        chunksize=[20, 20, 20],
        dataslices=None,
        usempi=False,
        outputformats=['.h5'],
        outputpath='',
        save_steps=False,
        protective=False,
        ):
    """"Convert a directory of tifs to an hdf5 stack."""

    # Check if any output paths already exist.
    if '.h5' in outputformats:
        outpaths = {'out': outputpath}
        status = utils.output_check(outpaths, save_steps, protective)
        if status == "CANCELLED":
            return

    # Get the list of input filepaths.
    files = sorted(glob.glob(os.path.join(inputdir, regex)))

    # Get some metadata from the inputfiles
    zyxdims, datatype, element_size_um = get_metadata(files,
                                                      datatype,
                                                      outlayout,
                                                      element_size_um)

    # (plane, row, column) indexing to outlayout (where prc -> zyx).
    in2out = ['zyx'.index(o) for o in outlayout]

    # Get the properties of the output dataset.
    slices = utils.get_slice_objects_prc(dataslices, zyxdims)  # prc-order
    files = files[slices[0]]
    datashape_out_prc = (len(files),
                         len(range(*slices[1].indices(slices[1].stop))),
                         len(range(*slices[2].indices(slices[2].stop))))
    datashape_out = [datashape_out_prc[i] for i in in2out]

    # Reshape the file list into a list of blockwise file lists.
    scs = chunksize[outlayout.index('z')]  # chunksize slice dimension
    files_blocks = zip(* [iter(files)] * scs)
    rem = len(files) % scs
    if rem:
        files_blocks += [tuple(files[-rem:])]

    # Get slice objects for every output block.
    slices_out_prc = [[slice(bnr * scs, bnr * scs + scs),
                       slice(0, datashape_out_prc[1]),
                       slice(0, datashape_out_prc[2])]
                      for bnr in range(0, len(files_blocks))]
    slices_out = [[sliceset_prc[i] for i in in2out]
                  for sliceset_prc in slices_out_prc]

    # Prepare for processing with MPI.
    mpi_info = utils.get_mpi_info(usempi)
    series = np.array(range(0, len(files_blocks)), dtype=int)
    if mpi_info['enabled']:
        series = utils.scatter_series(mpi_info, series)[0]

    # Open the outputfile for writing and create the dataset or output array.
    if '.h5' in outputformats:
        h5file_out, ds_out = utils.h5_write(None, datashape_out, datatype,
                                            outputpath,
                                            element_size_um=element_size_um,
                                            axislabels=outlayout,
                                            chunks=tuple(chunksize),
                                            comm=mpi_info['comm'])
        outdir = os.path.dirname(outputpath.split('.h5')[0])
    else:
        ds_out = None
        outdir = outputpath

    # Write blocks of 2D images to the outputfile(s).
    for blocknr in series:
        if '.h5' in outputformats:
            ds_out = process_block(files_blocks[blocknr], ds_out,
                                   slices, slices_out[blocknr], in2out,
                                   outputformats, outdir)
        else:
            process_slices(files_blocks[blocknr],
                           slices, slices_out[blocknr],
                           outputformats, outdir, datatype)

    # Close the h5 files or return the output array.
    try:
        h5file_out.close()
    except (ValueError, AttributeError):
        return ds_out
    except UnboundLocalError:
        pass