def preprocessing_cluster(dir_root, save_root, cameraNoiseMat=cameraNoiseMat, nsplit = (4, 4), num_t_chunks = 80,\ dask_tmp=None, memory_limit=0, is_bz2=False, is_singlePlane=False, down_sample_registration=1): from ..utils.getCameraInfo import getCameraInfo # set worker cluster, client = fdask.setup_workers(numCore=200, is_local=False, dask_tmp=dask_tmp, memory_limit=memory_limit) print_client_links(cluster) if not os.path.exists(f'{save_root}/denoised_data.zarr'): if not is_bz2: files = sorted(glob(dir_root+'/*.h5')) chunks = File(files[0],'r')['default'].shape if not is_singlePlane: data = da.stack([da.from_array(File(fn,'r')['default'], chunks=chunks) for fn in files]) else: if len(chunks)==2: data = da.stack([da.from_array(File(fn,'r')['default'], chunks=chunks) for fn in files]) else: data = da.concatenate([da.from_array(File(fn,'r')['default'], chunks=(1, chunks[1], chunks[2])) for fn in files], axis=0) cameraInfo = getCameraInfo(dir_root) else: import xml.etree.ElementTree as ET from utils import load_bz2file dims = ET.parse(dir_root+'/ch0.xml') root = dims.getroot() for info in root.findall('info'): if info.get('dimensions'): dims = info.get('dimensions') dims = dims.split('x') dims = [int(float(num)) for num in dims] files = sorted(glob(dir_root+'/*.stack.bz2')) imread = dask.delayed(lambda v: load_bz2file(v, dims), pure=True) lazy_data = [imread(fn) for fn in files] sample = lazy_data[0].compute() data = da.stack([da.from_delayed(fn, shape=sample.shape, dtype=sample.dtype) for fn in lazy_data]) cameraInfo = getCameraInfo(dir_root) pixel_x0, pixel_x1, pixel_y0, pixel_y1 = [int(_) for _ in cameraInfo['camera_roi'].split('_')] pixel_x0 = pixel_x0-1 pixel_y0 = pixel_y0-1 cameraInfo['camera_roi'] = '%d_%d_%d_%d'%(pixel_x0, pixel_x1, pixel_y0, pixel_y1) chunks = sample.shape # pixel denoise if not is_singlePlane: denoised_data = data.map_blocks(lambda v: pixelDenoiseImag(v, cameraNoiseMat=cameraNoiseMat, cameraInfo=cameraInfo)) else: denoised_data = data.map_blocks(lambda v: pixelDenoiseImag(v, cameraNoiseMat=cameraNoiseMat, cameraInfo=cameraInfo), new_axis=1) denoised_data.to_zarr(f'{save_root}/denoised_data.zarr') num_t = denoised_data.shape[0] else: denoised_data = da.from_zarr(f'{save_root}/denoised_data.zarr') chunks = denoised_data.shape[1:] num_t = denoised_data.shape[0] # save and compute reference image print('Compute reference image ---') if not os.path.exists(f'{save_root}/motion_fix_.h5'): med_win = len(denoised_data)//2 ref_img = denoised_data[med_win-50:med_win+50].mean(axis=0).compute() save_h5(f'{save_root}/motion_fix_.h5', ref_img, dtype='float16') print('--- Done computing reference image') # compute affine transform print('Registration to reference image ---') # create trans_affs file if not os.path.exists(f'{save_root}/trans_affs.npy'): ref_img = File(f'{save_root}/motion_fix_.h5', 'r')['default'].value ref_img = ref_img.max(axis=0, keepdims=True) if down_sample_registration==1: trans_affine = denoised_data.map_blocks(lambda x: estimate_rigid2d(x, fixed=ref_img), dtype='float32', drop_axis=(3), chunks=(1,4,4)).compute() else: #### downsample trans_affine case trans_affine = denoised_data[0::down_sample_registration].map_blocks(lambda x: estimate_rigid2d(x, fixed=ref_img), dtype='float32', drop_axis=(3), chunks=(1,4,4)).compute() len_dat = denoised_data.shape[0] trans_affine = rigid_interp(trans_affine, down_sample_registration, len_dat) # save trans_affs file np.save(f'{save_root}/trans_affs.npy', trans_affine) # load trans_affs file trans_affine_ = np.load(f'{save_root}/trans_affs.npy') trans_affine_ = da.from_array(trans_affine_, chunks=(1,4,4)) print('--- Done registration reference image') trans_data_ = da.map_blocks(apply_transform3d, denoised_data, trans_affine_, chunks=(1, *denoised_data.shape[1:]), dtype='float16') trans_data_t = trans_data_.rechunk((-1, 1, chunks[1]//nsplit[0], chunks[2]//nsplit[1])).transpose((1, 2, 3, 0)) trans_data_t.to_zarr(f'{save_root}/motion_corrected_data.zarr') fdask.terminate_workers(cluster, client) print('Remove temporal files of registration') if os.path.exists(f'{save_root}/denoised_data.zarr'): shutil.rmtree(f'{save_root}/denoised_data.zarr') return None
def preprocessing(dir_root, save_root, cameraNoiseMat=cameraNoiseMat, nsplit = (4, 4), num_t_chunks = 80,\ dask_tmp=None, memory_limit=0, is_bz2=False, is_singlePlane=False, down_sample_registration=1): from ..utils.getCameraInfo import getCameraInfo from tqdm import tqdm from ..utils.fileio import du # set worker cluster, client = fdask.setup_workers(is_local=True, dask_tmp=dask_tmp, memory_limit=memory_limit) print_client_links(cluster) if isinstance(save_root, list): save_root_ext = save_root[1] save_root = save_root[0] print(f'Tmp files will be saved to {save_root}') if 'save_root_ext' in locals(): print(f'With extended drive to {save_root_ext}') if not os.path.exists(f'{save_root}/denoised_data.zarr'): print('========================') print('Getting data infos') if not is_bz2: files = sorted(glob(dir_root+'/*.h5')) chunks = File(files[0],'r')['default'].shape if not is_singlePlane: data = da.stack([da.from_array(File(fn,'r')['default'], chunks=chunks) for fn in files]) else: if len(chunks)==2: data = da.stack([da.from_array(File(fn,'r')['default'], chunks=chunks) for fn in files]) else: data = da.concatenate([da.from_array(File(fn,'r')['default'], chunks=(1, chunks[1], chunks[2])) for fn in files], axis=0) cameraInfo = getCameraInfo(dir_root) else: import xml.etree.ElementTree as ET from utils import load_bz2file dims = ET.parse(dir_root+'/ch0.xml') root = dims.getroot() for info in root.findall('info'): if info.get('dimensions'): dims = info.get('dimensions') dims = dims.split('x') dims = [int(float(num)) for num in dims] files = sorted(glob(dir_root+'/*.stack.bz2')) imread = dask.delayed(lambda v: load_bz2file(v, dims), pure=True) lazy_data = [imread(fn) for fn in files] sample = lazy_data[0].compute() data = da.stack([da.from_delayed(fn, shape=sample.shape, dtype=sample.dtype) for fn in lazy_data]) cameraInfo = getCameraInfo(dir_root) pixel_x0, pixel_x1, pixel_y0, pixel_y1 = [int(_) for _ in cameraInfo['camera_roi'].split('_')] pixel_x0 = pixel_x0-1 pixel_y0 = pixel_y0-1 cameraInfo['camera_roi'] = '%d_%d_%d_%d'%(pixel_x0, pixel_x1, pixel_y0, pixel_y1) chunks = sample.shape # pixel denoise print('========================') print('Denoising camera noise') if not is_singlePlane: denoised_data = data.map_blocks(lambda v: pixelDenoiseImag(v, cameraNoiseMat=cameraNoiseMat, cameraInfo=cameraInfo)) else: denoised_data = data.map_blocks(lambda v: pixelDenoiseImag(v, cameraNoiseMat=cameraNoiseMat, cameraInfo=cameraInfo), new_axis=1) print('Denoising camera noise -- save data') denoised_data.to_zarr(f'{save_root}/denoised_data.zarr') num_t = denoised_data.shape[0] print('Denoising camera noise -- load saved data') denoised_data = da.from_zarr(f'{save_root}/denoised_data.zarr') chunks = denoised_data.shape[1:] num_t = denoised_data.shape[0] # save and compute reference image print('Compute reference image ---') if not os.path.exists(f'{save_root}/motion_fix_.h5'): med_win = len(denoised_data)//2 ref_img = denoised_data[med_win-50:med_win+50].mean(axis=0).compute() save_h5(f'{save_root}/motion_fix_.h5', ref_img, dtype='float16') print('--- Done computing reference image') # compute affine transform print('Registration to reference image ---') # create trans_affs file if not os.path.exists(f'{save_root}/trans_affs.npy'): ref_img = File(f'{save_root}/motion_fix_.h5', 'r')['default'].value ref_img = ref_img.max(axis=0, keepdims=True) if down_sample_registration==1: trans_affine = denoised_data.map_blocks(lambda x: estimate_rigid2d(x, fixed=ref_img), dtype='float32', drop_axis=(3), chunks=(1,4,4)).compute() else: #### downsample trans_affine case trans_affine = denoised_data[0::down_sample_registration].map_blocks(lambda x: estimate_rigid2d(x, fixed=ref_img), dtype='float32', drop_axis=(3), chunks=(1,4,4)).compute() len_dat = denoised_data.shape[0] trans_affine = rigid_interp(trans_affine, down_sample_registration, len_dat) # save trans_affs file np.save(f'{save_root}/trans_affs.npy', trans_affine) # load trans_affs file trans_affine_ = np.load(f'{save_root}/trans_affs.npy') trans_affine_ = da.from_array(trans_affine_, chunks=(1,4,4)) print('--- Done registration reference image') # apply affine transform if not os.path.exists(f'{save_root}/motion_corrected_data.zarr'): # fix memory issue to load data all together for transpose on local machine # load data # swap axes splits_ = np.array_split(np.arange(num_t).astype('int'), num_t_chunks) print(f'Processing total {num_t_chunks} chunks in time.......') # estimate size of data to store used_ = du(f'{save_root}/denoised_data.zarr/') est_data_size = int(used_.decode('utf-8'))//(2**20*num_t_chunks*2)+5 #kb to Gb for nz, n_split in enumerate(splits_): if not os.path.exists(save_root+'/motion_corrected_data_chunks_%03d.zarr'%(nz)): if 'save_root_ext' in locals(): if os.path.exists(save_root_ext+'/motion_corrected_data_chunks_%03d.zarr'%(nz)): continue print('Apply registration to rechunk layer %03d'%(nz)) trans_data_ = da.map_blocks(apply_transform3d, denoised_data[n_split], trans_affine_[n_split], chunks=(1, *denoised_data.shape[1:]), dtype='float16') print('Starting to rechunk layer %03d'%(nz)) trans_data_t_z = trans_data_.rechunk((-1, 1, chunks[1]//nsplit[0], chunks[2]//nsplit[1])).transpose((1, 2, 3, 0)) # check space availablity _, _, free_ = shutil.disk_usage(f'{save_root}/') if (free_//(2**30)) > est_data_size: print(f'Remaining space {free_//(2**30)} GB..... -- start to save at {save_root}') trans_data_t_z.to_zarr(save_root+'/motion_corrected_data_chunks_%03d.zarr'%(nz)) else: try: print(f'Remaining space {free_//(2**30)} GB..... -- start to save at {save_root_ext}') trans_data_t_z.to_zarr(save_root_ext+'/motion_corrected_data_chunks_%03d.zarr'%(nz)) except Exception as e: # if any error -- break the code print(e) fdask.terminate_workers(cluster, client) return None del trans_data_t_z gc.collect() print('finishing rechunking time chunk -- %03d of %03d'%(nz, num_t_chunks)) print('Remove temporal files of registration') if os.path.exists(f'{save_root}/denoised_data.zarr'): shutil.rmtree(f'{save_root}/denoised_data.zarr') for ext_files in tqdm(glob(save_root_ext+'/motion_corrected_data_chunks_*.zarr')): print(f'Moving file {ext_files} to Tmp-file folder.....') shutil.move(ext_files, save_root+'/') fdask.terminate_workers(cluster, client) return None
def preprocessing(dir_root, save_root, cameraNoiseMat=cameraNoiseMat, nsplit = (4, 4), num_t_chunks = 80,\ dask_tmp=None, memory_limit=0, is_singlePlane=False, down_sample_registration=1): from ..utils.getCameraInfo import getCameraInfo from tqdm import tqdm from ..utils.fileio import du if isinstance(save_root, list): save_root_ext = save_root[1] save_root = save_root[0] print(f'Tmp files will be saved to {save_root}') if 'save_root_ext' in locals(): print(f'With extended drive to {save_root_ext}') print(f'is_singlePlane: {is_singlePlane}') print(f'nsplit: {nsplit}') if not os.path.exists(f'{save_root}/denoised_data.zarr'): # set worker cluster, client = fdask.setup_workers(is_local=True, dask_tmp=dask_tmp, memory_limit=memory_limit) print_client_links(cluster) print('========================') print('Getting data infos') files = sorted(glob(dir_root+'/*.h5')) chunks = File(files[0],'r')['default'].shape cameraInfo = getCameraInfo(dir_root) print('Stacking data') imread = dask.delayed(lambda v: pixelDenoiseImag(File(v,'r')['default'].value, cameraNoiseMat=cameraNoiseMat, cameraInfo=cameraInfo)) num_files = len(files) splits_ = np.array_split(np.arange(num_files).astype('int'), num_t_chunks) npfiles=np.array(files) for nz, n_split in enumerate(splits_): if not os.path.exists(save_root+'/denoised_data_%03d.zarr'%(nz)): print('Apply denoising to file chunk %03d'%(nz)) lazy_data = [imread(fn) for fn in npfiles[n_split]] sample = lazy_data[0].compute() if not is_singlePlane: denoised_data = da.stack([da.from_delayed(fn, shape=sample.shape, dtype=sample.dtype) for fn in lazy_data]) else: if len(chunks)==2: denoised_data = da.stack([da.from_delayed(fn, shape=sample.shape, dtype=sample.dtype) for fn in lazy_data]) else: denoised_data = da.concatenate([da.from_delayed(fn, shape=sample.shape, dtype=sample.dtype) for fn in lazy_data], axis=0).rechunk((1,-1,-1)) denoised_data.to_zarr(save_root+'/denoised_data_%03d.zarr'%(nz)) print('finishing denoising chunk -- %03d of %03d'%(nz, num_t_chunks)) f = open(f'{save_root}/processing.tmp', "a") f.write('finishing denoised data chunk -- %03d of %03d \n'%(nz, num_t_chunks)) f.close() denoised_data = da.concatenate([da.from_zarr(save_root+'/denoised_data_%03d.zarr'%(nz)) for nz in range(num_t_chunks)]) denoised_data.to_zarr(f'{save_root}/denoised_data.zarr') def rm_tmp(nz, save_root=save_root): if os.path.exists(f'{save_root}/denoised_data_%03d.zarr'%(nz)): print('Remove temporal files of denoise at %03d'%(nz)) shutil.rmtree(f'{save_root}/denoised_data_%03d.zarr'%(nz)) return np.array([1]) nz_list = da.from_array(np.arange(num_t_chunks), chunks=(1)) da.map_blocks(rm_tmp, nz_list).compute() # lazy_data = [imread(fn) for fn in files] # sample = lazy_data[0].compute() # if not is_singlePlane: # denoised_data = da.stack([da.from_delayed(fn, shape=sample.shape, dtype=sample.dtype) for fn in lazy_data]) # else: # if len(chunks)==2: # denoised_data = da.stack([da.from_delayed(fn, shape=sample.shape, dtype=sample.dtype) for fn in lazy_data]) # else: # denoised_data = da.concatenate([da.from_delayed(fn, shape=sample.shape, dtype=sample.dtype) for fn in lazy_data], axis=0).rechunk((1,-1,-1)) # print('========================') # print('Denoising camera noise') # print('Denoising camera noise -- save data') # denoised_data.to_zarr(f'{save_root}/denoised_data.zarr') fdask.terminate_workers(cluster, client) time.sleep(30) # set worker cluster, client = fdask.setup_workers(is_local=True, dask_tmp=dask_tmp, memory_limit=memory_limit) print_client_links(cluster) print('Denoising camera noise -- load saved data') f = open(f'{save_root}/processing.tmp', "a") f.write(f'Denoising camera noise -- load saved data \n') f.close() denoised_data = da.from_zarr(f'{save_root}/denoised_data.zarr') if denoised_data.ndim==3: denoised_data = denoised_data[:, None, :, :] chunks = denoised_data.shape[1:] num_t = denoised_data.shape[0] # save and compute reference image print('Compute reference image ---') f = open(f'{save_root}/processing.tmp', "a") f.write(f'Compute reference image --- \n') f.close() if not os.path.exists(f'{save_root}/motion_fix_.h5'): med_win = len(denoised_data)//2 ref_img = denoised_data[med_win-50:med_win+50].mean(axis=0).compute() save_h5(f'{save_root}/motion_fix_.h5', ref_img, dtype='float16') print('--- Done computing reference image') f = open(f'{save_root}/processing.tmp', "a") f.write(f'--- Done computing reference image \n') f.close() # compute affine transform print('Registration to reference image ---') f = open(f'{save_root}/processing.tmp', "a") f.write(f'Registration to reference image --- \n') f.close() # create trans_affs file if not os.path.exists(f'{save_root}/trans_affs.npy'): ref_img = File(f'{save_root}/motion_fix_.h5', 'r')['default'].value ref_img = ref_img.max(axis=0, keepdims=True) if down_sample_registration==1: trans_affine = denoised_data.map_blocks(lambda x: estimate_rigid2d(x, fixed=ref_img), dtype='float32', drop_axis=(3), chunks=(1,4,4)).compute() else: #### downsample trans_affine case trans_affine = denoised_data[0::down_sample_registration].map_blocks(lambda x: estimate_rigid2d(x, fixed=ref_img), dtype='float32', drop_axis=(3), chunks=(1,4,4)).compute() len_dat = denoised_data.shape[0] trans_affine = rigid_interp(trans_affine, down_sample_registration, len_dat) # save trans_affs file np.save(f'{save_root}/trans_affs.npy', trans_affine) # load trans_affs file trans_affine_ = np.load(f'{save_root}/trans_affs.npy') trans_affine_ = da.from_array(np.expand_dims(trans_affine_, 3), chunks=(1,4,4,1)) print('--- Done registration reference image') f = open(f'{save_root}/processing.tmp', "a") f.write(f'--- Done registration reference image \n') f.close() fdask.terminate_workers(cluster, client) time.sleep(30) # apply affine transform if not os.path.exists(f'{save_root}/motion_corrected_data.zarr'): # fix memory issue to load data all together for transpose on local machine # load data # swap axes # set worker cluster, client = fdask.setup_workers(is_local=True, dask_tmp=dask_tmp, memory_limit=memory_limit) print_client_links(cluster) splits_ = np.array_split(np.arange(num_t).astype('int'), num_t_chunks) print(f'Processing total {num_t_chunks} chunks in time.......') # estimate size of data to store used_ = du(f'{save_root}/denoised_data.zarr/') est_data_size = int(used_.decode('utf-8'))//(2**20*num_t_chunks*2)+5 #kb to Gb for nz, n_split in enumerate(splits_): if not os.path.exists(save_root+'/motion_corrected_data_chunks_%03d.zarr'%(nz)): if 'save_root_ext' in locals(): if os.path.exists(save_root_ext+'/motion_corrected_data_chunks_%03d.zarr'%(nz)): continue print('Apply registration to rechunk layer %03d'%(nz)) t_start = n_split[0] t_end = n_split[-1]+1 trans_data_ = da.map_blocks(apply_transform3d, denoised_data[t_start:t_end], trans_affine_[t_start:t_end], chunks=(1, *denoised_data.shape[1:]), dtype='float16') print('Starting to rechunk layer %03d'%(nz)) trans_data_t_z = trans_data_.rechunk((-1, 1, chunks[1]//nsplit[0], chunks[2]//nsplit[1])).transpose((1, 2, 3, 0)) # check space availablity _, _, free_ = shutil.disk_usage(f'{save_root}/') if (free_//(2**30)) > est_data_size: print(f'Remaining space {free_//(2**30)} GB..... -- start to save at {save_root}') trans_data_t_z.to_zarr(save_root+'/motion_corrected_data_chunks_%03d.zarr'%(nz)) else: try: print(f'Remaining space {free_//(2**30)} GB..... -- start to save at {save_root_ext}') trans_data_t_z.to_zarr(save_root_ext+'/motion_corrected_data_chunks_%03d.zarr'%(nz)) except Exception as e: # if any error -- break the code print(e) fdask.terminate_workers(cluster, client) return None del trans_data_t_z gc.collect() print('finishing rechunking time chunk -- %03d of %03d'%(nz, num_t_chunks)) f = open(f'{save_root}/processing.tmp', "a") f.write('finishing rechunking time chunk -- %03d of %03d \n'%(nz, num_t_chunks)) f.close() print('Remove temporal files of registration') if os.path.exists(f'{save_root}/denoised_data.zarr'): shutil.rmtree(f'{save_root}/denoised_data.zarr') for ext_files in tqdm(glob(save_root_ext+'/motion_corrected_data_chunks_*.zarr')): print(f'Moving file {ext_files} to Tmp-file folder.....') shutil.move(ext_files, save_root+'/') fdask.terminate_workers(cluster, client) time.sleep(60) return None