def splitblocks( h5path_in, dset_name, dataslices=None, blocksize=[500, 500, 500], margin=[20, 20, 20], blockrange=[], usempi=False, outputdir='', save_steps=False, protective=False, ): """"Convert a directory of tifs to an hdf5 stack.""" # Prepare for processing with MPI. mpi_info = utils.get_mpi_info(usempi) # Determine the outputpaths. basepath, h5path_dset = h5path_in.split('.h5/') datadir, fname = os.path.split(basepath) postfix = fname.split(dset_name)[-1] if not outputdir: blockdir = 'blocks_{:04d}'.format(blocksize[0]) outputdir = os.path.join(datadir, blockdir) utils.mkdir_p(outputdir) fname = '{}_{}{}.h5'.format(dset_name, '{}', postfix) h5path_tpl = os.path.join(outputdir, fname, h5path_dset) # Open data for reading. h5_info = utils.h5_load(h5path_in, comm=mpi_info['comm']) h5file_in, ds_in, elsize, axlab = h5_info # Divide the data into a series of blocks. blocks = get_blocks(ds_in.shape, blocksize, margin, h5path_tpl, dataslices) if blockrange: blocks = blocks[blockrange[0]:blockrange[1]] series = np.array(range(0, len(blocks)), dtype=int) if mpi_info['enabled']: series = utils.scatter_series(mpi_info, series)[0] # Write blocks to the outputfile(s). for blocknr in series: block = blocks[blocknr] write_block(ds_in, elsize, axlab, block) # Close the h5 files or return the output array. try: h5file_in.close() except (ValueError, AttributeError): pass except UnboundLocalError: pass
def CC_2Dprops( h5path_labels, basename, map_propnames, usempi=False, h5path_out='', protective=False, ): """Map the labels/properties.""" # check output paths if '.h5' in h5path_out: for propname in map_propnames: h5path_prop = os.path.join(h5path_out, propname) status, info = utils.h5_check(h5path_out, protective) print(info) if status == "CANCELLED": return # open data for reading h5file_in, ds_in, elsize, axlab = utils.h5_load(h5path_labels) # prepare mpi n_props = len(map_propnames) series = np.array(range(0, n_props), dtype=int) mpi_info = utils.get_mpi_info(usempi) if mpi_info['enabled']: series = utils.scatter_series(mpi_info, series)[0] fws = {} for i in series: propname = map_propnames[i] print("processing prop %s" % propname) nppath = '{}_{}.npy'.format(basename, propname) fws[propname] = np.load(nppath) # open data for writing h5path_prop = os.path.join(h5path_out, propname) h5file_prop, ds_prop = utils.h5_write(None, ds_in.shape, fws[propname].dtype, h5path_prop, element_size_um=elsize, axislabels=axlab, comm=mpi_info['comm']) ds_prop[:] = fws[propname][ds_in[:]] h5file_prop.close() # close and return h5file_in.close()
def evaluate_overlaps( h5path_in, slicedim, offsets, threshold_overlap, do_map_labels=False, h5path_mm='', min_labelsize=0, close=None, relabel_from=0, usempi=False, h5path_out='', save_steps=False, protective=False, ): """Check for slicewise overlaps between labels.""" # prepare mpi # TODO: could allow selection of slices/subset here mpi_info = utils.get_mpi_info(usempi) # open data for reading h5file_in, ds_in, _, _ = utils.h5_load(h5path_in, comm=mpi_info['comm']) n_slices = ds_in.shape[slicedim] - offsets series = np.array(range(0, n_slices), dtype=int) if mpi_info['enabled']: series = utils.scatter_series(mpi_info, series)[0] # merge overlapping neighbours labelsets = {} for i in series: print("processing slice {}".format(i)) for j in range(1, offsets): data_section = utils.get_slice(ds_in, i, slicedim) nb_section = utils.get_slice(ds_in, i+j, slicedim) labelsets = merge_neighbours(labelsets, data_section, nb_section, threshold_overlap) # dump the list of overlapping neighbours in a pickle h5root = h5file_in.filename.split('.h5')[0] ds_out_name = os.path.split(h5path_out)[1] mname = "host-{}_rank-{:02d}".format(socket.gethostname(), mpi_info['rank']) lsroot = '{}_{}_{}'.format(h5root, ds_out_name, mname) utils.write_labelsets(labelsets, lsroot, ['pickle']) h5file_in.close() # wait for all processes to finish if mpi_info['enabled']: mpi_info['comm'].Barrier() # let one process combine the overlaps found in the separate processes if mpi_info['rank'] == 0: lsroot = '{}_{}'.format(h5root, ds_out_name) match = "{}_host*_rank*.pickle".format(lsroot) infiles = glob.glob(match) for ppath in infiles: with open(ppath, "r") as f: newlabelsets = pickle.load(f) for lsk, lsv in newlabelsets.items(): labelsets = utils.classify_label_set(labelsets, lsv, lsk) utils.write_labelsets(labelsets, lsroot, ['txt', 'pickle']) if do_map_labels: map_labels(h5path_in, h5path_mm, min_labelsize, close, relabel_from, h5path_out, save_steps, protective)
def CC_2Dfilter( h5path_labels, map_propnames, criteria, h5path_int='', slicedim=0, usempi=False, outputfile='', protective=False, ): """Get forward mapping of labels/properties filtered by criteria.""" (min_area, max_area, max_intensity_mb, max_eccentricity, min_solidity, min_euler_number, min_extent) = criteria # prepare mpi mpi_info = utils.get_mpi_info(usempi) # TODO: check output path # open data for reading h5file_mm, ds_mm, _, _ = utils.h5_load(h5path_labels, comm=mpi_info['comm']) if h5path_int: h5file_mb, ds_mb, _, _ = utils.h5_load(h5path_int, comm=mpi_info['comm']) else: ds_mb = None # mask used as intensity image in mean_intensity criterium # get the maximum labelvalue in the input root = h5path_labels.split('.h5')[0] maxlabel = get_maxlabel(root, ds_mm) # prepare mpi n_slices = ds_mm.shape[slicedim] series = np.array(range(0, n_slices), dtype=int) if mpi_info['enabled']: series = utils.scatter_series(mpi_info, series)[0] if mpi_info['rank'] == 0: fws_reduced = np.zeros((maxlabel + 1, len(map_propnames)), dtype='float') else: fws_reduced = None fws = np.zeros((maxlabel + 1, len(map_propnames)), dtype='float') mapall = criteria.count(None) == len(criteria) # pick labels observing the constraints go2D = ((max_eccentricity is not None) or (min_solidity is not None) or (min_euler_number is not None) or mapall) if go2D: for i in series: slcMM = utils.get_slice(ds_mm, i, slicedim) if h5path_int: slcMB = utils.get_slice(ds_mb, i, slicedim) # , 'bool' else: slcMB = None fws = check_constraints(slcMM, fws, map_propnames, criteria, slcMB, mapall) if mpi_info['enabled']: mpi_info['comm'].Reduce(fws, fws_reduced, op=MPI.MAX, root=0) else: fws_reduced = fws else: if mpi_info['rank'] == 0: fws = check_constraints(ds_mm, fws, map_propnames, criteria, ds_mb, mapall) fws_reduced = fws # write the forward maps to a numpy vector if mpi_info['rank'] == 0: slc = int(n_slices/2) slcMM = ds_mm[slc, :, :] slcMB = ds_mb[slc, :, :] if h5path_int else None datatypes = get_prop_datatypes(slcMM, map_propnames, slcMB) for i, propname in enumerate(map_propnames): root = outputfile.split('.h5')[0] nppath = '{}_{}.npy'.format(root, propname) outarray = np.array(fws_reduced[:, i], dtype=datatypes[i]) np.save(nppath, outarray) # close and return h5file_mm.close() if h5path_int: h5file_mb.close() if mpi_info['rank'] == 0: return outarray
def CC_2D( h5path_in, h5path_mask='', slicedim=0, usempi=False, h5path_out='', protective=False, ): """Label connected components in all slices.""" # check output path if '.h5' in h5path_out: status, info = utils.h5_check(h5path_out, protective) print(info) if status == "CANCELLED": return # open data for reading h5file_mm, ds_mm, elsize, axlab = utils.h5_load(h5path_in) if h5path_mask: h5file_md, ds_md, _, _ = utils.h5_load(h5path_mask) # prepare mpi # TODO: could allow selection of slices/subset here mpi_info = utils.get_mpi_info(usempi) n_slices = ds_mm.shape[slicedim] series = np.array(range(0, n_slices), dtype=int) if mpi_info['enabled']: series = utils.scatter_series(mpi_info, series)[0] # open data for writing h5file_out, ds_out = utils.h5_write(None, ds_mm.shape, 'uint32', h5path_out, element_size_um=elsize, axislabels=axlab, comm=mpi_info['comm']) # slicewise labeling maxlabel = 0 for i in series: slcMM = utils.get_slice(ds_mm, i, slicedim, 'bool') if h5path_mask: slcMD = utils.get_slice(ds_md, i, slicedim, 'bool') labels, num = label(np.logical_and(~slcMM, slcMD), return_num=True) else: labels, num = label(~slcMM, return_num=True) print("found %d labels in slice %d" % (num, i)) if mpi_info['enabled']: # NOTE: assumed max number of labels in slice is 10000 labels[~slcMM] += 10000 * i if i == n_slices - 1: maxlabel = np.amax(labels) else: labels[~slcMM] += maxlabel maxlabel += num if slicedim == 0: ds_out[i, :, :] = labels elif slicedim == 1: ds_out[:, i, :] = labels elif slicedim == 2: ds_out[:, :, i] = labels # save the maximum labelvalue in the dataset print("found %d labels" % (maxlabel)) if mpi_info['rank'] == mpi_info['size'] - 1: root = h5path_out.split('.h5')[0] fpath = root + '.npy' np.save(fpath, np.array([maxlabel])) # close and return try: h5file_mm.close() h5file_out.close() if h5path_mask: h5file_md.close() except (ValueError, AttributeError): return ds_out
def downsample_slices( inputdir, outputdir, regex='*.tif', ds_factor=4, dataslices=None, usempi=False, protective=False, ): """Downsample a series of 2D images.""" if '.h5' in outputdir: status, info = utils.h5_check(outputdir, protective) print(info) if status == "CANCELLED": return if '.h5' in inputdir: # FIXME: assumed zyx for now h5file_in, ds_in, elsize, axlab = utils.h5_load(inputdir) zyxdims = ds_in.shape else: # Get the list of input filepaths. files = sorted(glob.glob(os.path.join(inputdir, regex))) zyxdims = [len(files)] + list(io.imread(files[0]).shape) axlab = 'zyx' if '.h5' in outputdir: elsize[1] = elsize[1] / ds_factor elsize[2] = elsize[2] / ds_factor outsize = [ds_in.shape[0], ds_in.shape[1] / ds_factor, ds_in.shape[2] / ds_factor] h5file_out, ds_out = utils.h5_write(None, outsize, ds_in.dtype, outputdir, element_size_um=elsize, axislabels=axlab) else: # Get the list of output filepaths. utils.mkdir_p(outputdir) outpaths = [] for fpath in files: root, ext = os.path.splitext(fpath) tail = os.path.split(root)[1] outpaths.append(os.path.join(outputdir, tail + ext)) # Check if any output paths already exist. status = utils.output_check_dir(outpaths, protective) if status == "CANCELLED": return # Get the slice objects for the input data. slices = utils.get_slice_objects_prc(dataslices, zyxdims) # Prepare for processing with MPI. mpi_info = utils.get_mpi_info(usempi) series = np.array(range(slices[0].start, slices[0].stop, slices[0].step), dtype=int) if mpi_info['enabled']: series = utils.scatter_series(mpi_info, series)[0] # Downsample and save the images. for slc in series: if '.h5' in inputdir: sub = ds_in[slc, slices[1], slices[2]] else: sub = io.imread(files[slc])[slices[1], slices[2]] img_ds = resize(sub, (sub.shape[0] / ds_factor, sub.shape[1] / ds_factor)) if '.h5' in outputdir: ds_out[slc, :, :] = img_ds else: imsave(outpaths[slc], img_ds) # downsample_image(outpaths[slc], sub, ds_factor) try: h5file_in.close() h5file_out.close() except (ValueError, AttributeError): pass
def mergeblocks( h5paths_in, blockoffset=[0, 0, 0], blocksize=[], margin=[0, 0, 0], fullsize=[], is_labelimage=False, relabel=False, neighbourmerge=False, save_fwmap=False, blockreduce=[], func='np.amax', datatype='', usempi=False, h5path_out='', save_steps=False, protective=False, ): """Merge blocks of data into a single hdf5 file.""" # prepare mpi mpi_info = utils.get_mpi_info(usempi) series = np.array(range(0, len(h5paths_in)), dtype=int) if mpi_info['enabled']: series = utils.scatter_series(mpi_info, series)[0] # TODO: save_steps # check output paths outpaths = {'out': h5path_out} status = utils.output_check(outpaths, save_steps, protective) if status == "CANCELLED": return # open data for reading h5file_in, ds_in, elsize, axlab = utils.h5_load(h5paths_in[0], comm=mpi_info['comm']) try: ndim = ds_in.ndim except AttributeError: ndim = len(ds_in.dims) # get the size of the outputfile # TODO: option to derive fullsize from dset_names? if blockreduce: datasize = np.subtract(fullsize, blockoffset) outsize = [ int(np.ceil(d / np.float(b))) for d, b in zip(datasize, blockreduce) ] elsize = [e * b for e, b in zip(elsize, blockreduce)] else: # FIXME: 'zyx(c)' stack assumed outsize = np.subtract(fullsize, blockoffset) if ndim == 4: outsize = list(outsize) + [ds_in.shape[3]] # TODO: flexible insert datatype = datatype or ds_in.dtype chunks = ds_in.chunks or None h5file_in.close() # open data for writing h5file_out, ds_out = utils.h5_write(data=None, shape=outsize, dtype=datatype, h5path_full=h5path_out, chunks=chunks, element_size_um=elsize, axislabels=axlab, comm=mpi_info['comm']) # merge the datasets maxlabel = 0 for i in series: h5path_in = h5paths_in[i] try: maxlabel = process_block(h5path_in, ndim, blockreduce, func, blockoffset, blocksize, margin, fullsize, ds_out, is_labelimage, relabel, neighbourmerge, save_fwmap, maxlabel, usempi, mpi_info) print('processed block {:03d}: {}'.format(i, h5path_in)) except Exception as e: print('failed block {:03d}: {}'.format(i, h5path_in)) print(e) # close and return try: h5file_out.close() except (ValueError, AttributeError): return ds_out
def series2stack( inputdir, regex='*.tif', element_size_um=[None, None, None], outlayout='zyx', datatype='', chunksize=[20, 20, 20], dataslices=None, usempi=False, outputformats=['.h5'], outputpath='', save_steps=False, protective=False, ): """"Convert a directory of tifs to an hdf5 stack.""" # Check if any output paths already exist. if '.h5' in outputformats: outpaths = {'out': outputpath} status = utils.output_check(outpaths, save_steps, protective) if status == "CANCELLED": return # Get the list of input filepaths. files = sorted(glob.glob(os.path.join(inputdir, regex))) # Get some metadata from the inputfiles zyxdims, datatype, element_size_um = get_metadata(files, datatype, outlayout, element_size_um) # (plane, row, column) indexing to outlayout (where prc -> zyx). in2out = ['zyx'.index(o) for o in outlayout] # Get the properties of the output dataset. slices = utils.get_slice_objects_prc(dataslices, zyxdims) # prc-order files = files[slices[0]] datashape_out_prc = (len(files), len(range(*slices[1].indices(slices[1].stop))), len(range(*slices[2].indices(slices[2].stop)))) datashape_out = [datashape_out_prc[i] for i in in2out] # Reshape the file list into a list of blockwise file lists. scs = chunksize[outlayout.index('z')] # chunksize slice dimension files_blocks = zip(* [iter(files)] * scs) rem = len(files) % scs if rem: files_blocks += [tuple(files[-rem:])] # Get slice objects for every output block. slices_out_prc = [[slice(bnr * scs, bnr * scs + scs), slice(0, datashape_out_prc[1]), slice(0, datashape_out_prc[2])] for bnr in range(0, len(files_blocks))] slices_out = [[sliceset_prc[i] for i in in2out] for sliceset_prc in slices_out_prc] # Prepare for processing with MPI. mpi_info = utils.get_mpi_info(usempi) series = np.array(range(0, len(files_blocks)), dtype=int) if mpi_info['enabled']: series = utils.scatter_series(mpi_info, series)[0] # Open the outputfile for writing and create the dataset or output array. if '.h5' in outputformats: h5file_out, ds_out = utils.h5_write(None, datashape_out, datatype, outputpath, element_size_um=element_size_um, axislabels=outlayout, chunks=tuple(chunksize), comm=mpi_info['comm']) outdir = os.path.dirname(outputpath.split('.h5')[0]) else: ds_out = None outdir = outputpath # Write blocks of 2D images to the outputfile(s). for blocknr in series: if '.h5' in outputformats: ds_out = process_block(files_blocks[blocknr], ds_out, slices, slices_out[blocknr], in2out, outputformats, outdir) else: process_slices(files_blocks[blocknr], slices, slices_out[blocknr], outputformats, outdir, datatype) # Close the h5 files or return the output array. try: h5file_out.close() except (ValueError, AttributeError): return ds_out except UnboundLocalError: pass