示例#1
0
def preprocessing_cluster(dir_root, save_root, cameraNoiseMat=cameraNoiseMat, nsplit = (4, 4), num_t_chunks = 80,\
                  dask_tmp=None, memory_limit=0, is_bz2=False, is_singlePlane=False, down_sample_registration=1):
    from ..utils.getCameraInfo import getCameraInfo
    # set worker
    cluster, client = fdask.setup_workers(numCore=200, is_local=False, dask_tmp=dask_tmp, memory_limit=memory_limit)
    print_client_links(cluster)

    if not os.path.exists(f'{save_root}/denoised_data.zarr'):
        if not is_bz2:
            files = sorted(glob(dir_root+'/*.h5'))
            chunks = File(files[0],'r')['default'].shape
            if not is_singlePlane:
                data = da.stack([da.from_array(File(fn,'r')['default'], chunks=chunks) for fn in files])
            else:
                if len(chunks)==2:
                    data = da.stack([da.from_array(File(fn,'r')['default'], chunks=chunks) for fn in files])
                else:
                    data = da.concatenate([da.from_array(File(fn,'r')['default'], chunks=(1, chunks[1], chunks[2])) for fn in files], axis=0)
            cameraInfo = getCameraInfo(dir_root)
        else:
            import xml.etree.ElementTree as ET
            from utils import load_bz2file
            dims = ET.parse(dir_root+'/ch0.xml')
            root = dims.getroot()
            for info in root.findall('info'):
                if info.get('dimensions'):
                    dims = info.get('dimensions')
            dims = dims.split('x')
            dims = [int(float(num)) for num in dims]
            files = sorted(glob(dir_root+'/*.stack.bz2'))
            imread = dask.delayed(lambda v: load_bz2file(v, dims), pure=True)
            lazy_data = [imread(fn) for fn in files]
            sample = lazy_data[0].compute()
            data = da.stack([da.from_delayed(fn, shape=sample.shape, dtype=sample.dtype) for fn in lazy_data])
            cameraInfo = getCameraInfo(dir_root)
            pixel_x0, pixel_x1, pixel_y0, pixel_y1 = [int(_) for _ in cameraInfo['camera_roi'].split('_')]
            pixel_x0 = pixel_x0-1
            pixel_y0 = pixel_y0-1
            cameraInfo['camera_roi'] = '%d_%d_%d_%d'%(pixel_x0, pixel_x1, pixel_y0, pixel_y1)
            chunks = sample.shape
        # pixel denoise
        if not is_singlePlane:
            denoised_data = data.map_blocks(lambda v: pixelDenoiseImag(v, cameraNoiseMat=cameraNoiseMat, cameraInfo=cameraInfo))
        else:
            denoised_data = data.map_blocks(lambda v: pixelDenoiseImag(v, cameraNoiseMat=cameraNoiseMat, cameraInfo=cameraInfo), new_axis=1)
        denoised_data.to_zarr(f'{save_root}/denoised_data.zarr')
        num_t = denoised_data.shape[0]
    else:
        denoised_data = da.from_zarr(f'{save_root}/denoised_data.zarr')
        chunks = denoised_data.shape[1:]
        num_t = denoised_data.shape[0]

    # save and compute reference image
    print('Compute reference image ---')
    if not os.path.exists(f'{save_root}/motion_fix_.h5'):
        med_win = len(denoised_data)//2
        ref_img = denoised_data[med_win-50:med_win+50].mean(axis=0).compute()
        save_h5(f'{save_root}/motion_fix_.h5', ref_img, dtype='float16')

    print('--- Done computing reference image')

    # compute affine transform
    print('Registration to reference image ---')
    # create trans_affs file
    if not os.path.exists(f'{save_root}/trans_affs.npy'):
        ref_img = File(f'{save_root}/motion_fix_.h5', 'r')['default'].value
        ref_img = ref_img.max(axis=0, keepdims=True)
        if down_sample_registration==1:
            trans_affine = denoised_data.map_blocks(lambda x: estimate_rigid2d(x, fixed=ref_img), dtype='float32', drop_axis=(3), chunks=(1,4,4)).compute()
        else:
            #### downsample trans_affine case
            trans_affine = denoised_data[0::down_sample_registration].map_blocks(lambda x: estimate_rigid2d(x, fixed=ref_img), dtype='float32', drop_axis=(3), chunks=(1,4,4)).compute()
            len_dat = denoised_data.shape[0]
            trans_affine = rigid_interp(trans_affine, down_sample_registration, len_dat)
        # save trans_affs file
        np.save(f'{save_root}/trans_affs.npy', trans_affine)
    # load trans_affs file
    trans_affine_ = np.load(f'{save_root}/trans_affs.npy')
    trans_affine_ = da.from_array(trans_affine_, chunks=(1,4,4))
    print('--- Done registration reference image')

    trans_data_ = da.map_blocks(apply_transform3d, denoised_data, trans_affine_, chunks=(1, *denoised_data.shape[1:]), dtype='float16')
    trans_data_t = trans_data_.rechunk((-1, 1, chunks[1]//nsplit[0], chunks[2]//nsplit[1])).transpose((1, 2, 3, 0))
    trans_data_t.to_zarr(f'{save_root}/motion_corrected_data.zarr')
    fdask.terminate_workers(cluster, client)

    print('Remove temporal files of registration')
    if os.path.exists(f'{save_root}/denoised_data.zarr'):
        shutil.rmtree(f'{save_root}/denoised_data.zarr')
    return None
示例#2
0
def preprocessing(dir_root, save_root, cameraNoiseMat=cameraNoiseMat, nsplit = (4, 4), num_t_chunks = 80,\
                  dask_tmp=None, memory_limit=0, is_bz2=False, is_singlePlane=False, down_sample_registration=1):
    from ..utils.getCameraInfo import getCameraInfo
    from tqdm import tqdm
    from ..utils.fileio import du
    # set worker
    cluster, client = fdask.setup_workers(is_local=True, dask_tmp=dask_tmp, memory_limit=memory_limit)
    print_client_links(cluster)
    
    if isinstance(save_root, list):
        save_root_ext = save_root[1]
        save_root = save_root[0]
    
    print(f'Tmp files will be saved to {save_root}')
    if 'save_root_ext' in locals():
        print(f'With extended drive to {save_root_ext}')

    if not os.path.exists(f'{save_root}/denoised_data.zarr'):
        print('========================')
        print('Getting data infos')
        if not is_bz2:
            files = sorted(glob(dir_root+'/*.h5'))
            chunks = File(files[0],'r')['default'].shape
            if not is_singlePlane:
                data = da.stack([da.from_array(File(fn,'r')['default'], chunks=chunks) for fn in files])
            else:
                if len(chunks)==2:
                    data = da.stack([da.from_array(File(fn,'r')['default'], chunks=chunks) for fn in files])
                else:
                    data = da.concatenate([da.from_array(File(fn,'r')['default'], chunks=(1, chunks[1], chunks[2])) for fn in files], axis=0)
            cameraInfo = getCameraInfo(dir_root)
        else:
            import xml.etree.ElementTree as ET
            from utils import load_bz2file
            dims = ET.parse(dir_root+'/ch0.xml')
            root = dims.getroot()
            for info in root.findall('info'):
                if info.get('dimensions'):
                    dims = info.get('dimensions')
            dims = dims.split('x')
            dims = [int(float(num)) for num in dims]
            files = sorted(glob(dir_root+'/*.stack.bz2'))
            imread = dask.delayed(lambda v: load_bz2file(v, dims), pure=True)
            lazy_data = [imread(fn) for fn in files]
            sample = lazy_data[0].compute()
            data = da.stack([da.from_delayed(fn, shape=sample.shape, dtype=sample.dtype) for fn in lazy_data])
            cameraInfo = getCameraInfo(dir_root)
            pixel_x0, pixel_x1, pixel_y0, pixel_y1 = [int(_) for _ in cameraInfo['camera_roi'].split('_')]
            pixel_x0 = pixel_x0-1
            pixel_y0 = pixel_y0-1
            cameraInfo['camera_roi'] = '%d_%d_%d_%d'%(pixel_x0, pixel_x1, pixel_y0, pixel_y1)
            chunks = sample.shape
        # pixel denoise
        print('========================')
        print('Denoising camera noise')
        if not is_singlePlane:
            denoised_data = data.map_blocks(lambda v: pixelDenoiseImag(v, cameraNoiseMat=cameraNoiseMat, cameraInfo=cameraInfo))
        else:
            denoised_data = data.map_blocks(lambda v: pixelDenoiseImag(v, cameraNoiseMat=cameraNoiseMat, cameraInfo=cameraInfo), new_axis=1)
        print('Denoising camera noise -- save data')
        denoised_data.to_zarr(f'{save_root}/denoised_data.zarr')
        num_t = denoised_data.shape[0]
        
    print('Denoising camera noise -- load saved data')
    denoised_data = da.from_zarr(f'{save_root}/denoised_data.zarr')
    chunks = denoised_data.shape[1:]
    num_t = denoised_data.shape[0]

    # save and compute reference image
    print('Compute reference image ---')
    if not os.path.exists(f'{save_root}/motion_fix_.h5'):
        med_win = len(denoised_data)//2
        ref_img = denoised_data[med_win-50:med_win+50].mean(axis=0).compute()
        save_h5(f'{save_root}/motion_fix_.h5', ref_img, dtype='float16')

    print('--- Done computing reference image')

    # compute affine transform
    print('Registration to reference image ---')
    # create trans_affs file
    if not os.path.exists(f'{save_root}/trans_affs.npy'):
        ref_img = File(f'{save_root}/motion_fix_.h5', 'r')['default'].value
        ref_img = ref_img.max(axis=0, keepdims=True)
        if down_sample_registration==1:
            trans_affine = denoised_data.map_blocks(lambda x: estimate_rigid2d(x, fixed=ref_img), dtype='float32', drop_axis=(3), chunks=(1,4,4)).compute()
        else:
            #### downsample trans_affine case
            trans_affine = denoised_data[0::down_sample_registration].map_blocks(lambda x: estimate_rigid2d(x, fixed=ref_img), dtype='float32', drop_axis=(3), chunks=(1,4,4)).compute()
            len_dat = denoised_data.shape[0]
            trans_affine = rigid_interp(trans_affine, down_sample_registration, len_dat)
        # save trans_affs file
        np.save(f'{save_root}/trans_affs.npy', trans_affine)
    # load trans_affs file
    trans_affine_ = np.load(f'{save_root}/trans_affs.npy')
    trans_affine_ = da.from_array(trans_affine_, chunks=(1,4,4))
    print('--- Done registration reference image')

    # apply affine transform
    if not os.path.exists(f'{save_root}/motion_corrected_data.zarr'):
        # fix memory issue to load data all together for transpose on local machine
        # load data
        # swap axes
        splits_ = np.array_split(np.arange(num_t).astype('int'), num_t_chunks)
        print(f'Processing total {num_t_chunks} chunks in time.......')
        # estimate size of data to store
        used_ = du(f'{save_root}/denoised_data.zarr/')
        est_data_size = int(used_.decode('utf-8'))//(2**20*num_t_chunks*2)+5 #kb to Gb
        for nz, n_split in enumerate(splits_):
            if not os.path.exists(save_root+'/motion_corrected_data_chunks_%03d.zarr'%(nz)):
                if 'save_root_ext' in locals():
                    if os.path.exists(save_root_ext+'/motion_corrected_data_chunks_%03d.zarr'%(nz)):
                        continue
                print('Apply registration to rechunk layer %03d'%(nz))
                trans_data_ = da.map_blocks(apply_transform3d, denoised_data[n_split], trans_affine_[n_split], chunks=(1, *denoised_data.shape[1:]), dtype='float16')
                print('Starting to rechunk layer %03d'%(nz))
                trans_data_t_z = trans_data_.rechunk((-1, 1, chunks[1]//nsplit[0], chunks[2]//nsplit[1])).transpose((1, 2, 3, 0))
                # check space availablity
                _, _, free_ = shutil.disk_usage(f'{save_root}/')
                if (free_//(2**30)) > est_data_size:
                    print(f'Remaining space {free_//(2**30)} GB..... -- start to save at {save_root}')
                    trans_data_t_z.to_zarr(save_root+'/motion_corrected_data_chunks_%03d.zarr'%(nz))
                else:
                    try:
                        print(f'Remaining space {free_//(2**30)} GB..... -- start to save at {save_root_ext}')
                        trans_data_t_z.to_zarr(save_root_ext+'/motion_corrected_data_chunks_%03d.zarr'%(nz))
                    except Exception as e:
                        # if any error -- break the code
                        print(e)    
                        fdask.terminate_workers(cluster, client)
                        return None
                del trans_data_t_z
                gc.collect()
                print('finishing rechunking time chunk -- %03d of %03d'%(nz, num_t_chunks))

        print('Remove temporal files of registration')
        if os.path.exists(f'{save_root}/denoised_data.zarr'):
            shutil.rmtree(f'{save_root}/denoised_data.zarr')
        for ext_files in tqdm(glob(save_root_ext+'/motion_corrected_data_chunks_*.zarr')):
            print(f'Moving file {ext_files} to Tmp-file folder.....')
            shutil.move(ext_files, save_root+'/')
    fdask.terminate_workers(cluster, client)
    return None
def preprocessing(dir_root, save_root, cameraNoiseMat=cameraNoiseMat, nsplit = (4, 4), num_t_chunks = 80,\
                  dask_tmp=None, memory_limit=0, is_singlePlane=False, down_sample_registration=1):
    from ..utils.getCameraInfo import getCameraInfo
    from tqdm import tqdm
    from ..utils.fileio import du
    
    if isinstance(save_root, list):
        save_root_ext = save_root[1]
        save_root = save_root[0]
    
    print(f'Tmp files will be saved to {save_root}')
    if 'save_root_ext' in locals():
        print(f'With extended drive to {save_root_ext}')
    print(f'is_singlePlane: {is_singlePlane}')
    print(f'nsplit: {nsplit}')

    if not os.path.exists(f'{save_root}/denoised_data.zarr'):
        # set worker
        cluster, client = fdask.setup_workers(is_local=True, dask_tmp=dask_tmp, memory_limit=memory_limit)
        print_client_links(cluster)
        print('========================')
        print('Getting data infos')
        files = sorted(glob(dir_root+'/*.h5'))
        chunks = File(files[0],'r')['default'].shape
        cameraInfo = getCameraInfo(dir_root)
        print('Stacking data')
        imread = dask.delayed(lambda v: pixelDenoiseImag(File(v,'r')['default'].value, cameraNoiseMat=cameraNoiseMat, cameraInfo=cameraInfo))
        
        num_files = len(files)
        splits_ = np.array_split(np.arange(num_files).astype('int'), num_t_chunks)
        npfiles=np.array(files)
        
        for nz, n_split in enumerate(splits_):
            if not os.path.exists(save_root+'/denoised_data_%03d.zarr'%(nz)):
                print('Apply denoising to file chunk %03d'%(nz))
                lazy_data = [imread(fn) for fn in npfiles[n_split]]
                sample = lazy_data[0].compute()
                if not is_singlePlane:
                    denoised_data = da.stack([da.from_delayed(fn, shape=sample.shape, dtype=sample.dtype) for fn in lazy_data])
                else:
                    if len(chunks)==2:
                        denoised_data = da.stack([da.from_delayed(fn, shape=sample.shape, dtype=sample.dtype) for fn in lazy_data])
                    else:
                        denoised_data = da.concatenate([da.from_delayed(fn, shape=sample.shape, dtype=sample.dtype) for fn in lazy_data], axis=0).rechunk((1,-1,-1))
                denoised_data.to_zarr(save_root+'/denoised_data_%03d.zarr'%(nz))
                print('finishing denoising chunk -- %03d of %03d'%(nz, num_t_chunks))
                f = open(f'{save_root}/processing.tmp', "a")
                f.write('finishing denoised data chunk -- %03d of %03d \n'%(nz, num_t_chunks))
                f.close()
        denoised_data = da.concatenate([da.from_zarr(save_root+'/denoised_data_%03d.zarr'%(nz)) for nz in range(num_t_chunks)])
        denoised_data.to_zarr(f'{save_root}/denoised_data.zarr')    
        def rm_tmp(nz, save_root=save_root):
            if os.path.exists(f'{save_root}/denoised_data_%03d.zarr'%(nz)):
                print('Remove temporal files of denoise at %03d'%(nz))
                shutil.rmtree(f'{save_root}/denoised_data_%03d.zarr'%(nz))
            return np.array([1])    
        nz_list = da.from_array(np.arange(num_t_chunks), chunks=(1)) 
        da.map_blocks(rm_tmp, nz_list).compute()
        
#         lazy_data = [imread(fn) for fn in files]
#         sample = lazy_data[0].compute()
#         if not is_singlePlane:
#             denoised_data = da.stack([da.from_delayed(fn, shape=sample.shape, dtype=sample.dtype) for fn in lazy_data])
#         else:
#             if len(chunks)==2:
#                 denoised_data = da.stack([da.from_delayed(fn, shape=sample.shape, dtype=sample.dtype) for fn in lazy_data])
#             else:
#                 denoised_data = da.concatenate([da.from_delayed(fn, shape=sample.shape, dtype=sample.dtype) for fn in lazy_data], axis=0).rechunk((1,-1,-1))        
#         print('========================')
#         print('Denoising camera noise')
#         print('Denoising camera noise -- save data')
#         denoised_data.to_zarr(f'{save_root}/denoised_data.zarr')
        fdask.terminate_workers(cluster, client)
        time.sleep(30)
    
    # set worker
    cluster, client = fdask.setup_workers(is_local=True, dask_tmp=dask_tmp, memory_limit=memory_limit)
    print_client_links(cluster)
    print('Denoising camera noise -- load saved data')
    f = open(f'{save_root}/processing.tmp', "a")
    f.write(f'Denoising camera noise -- load saved data \n')
    f.close()
    denoised_data = da.from_zarr(f'{save_root}/denoised_data.zarr')
    if denoised_data.ndim==3:
        denoised_data = denoised_data[:, None, :, :]
    chunks = denoised_data.shape[1:]
    num_t = denoised_data.shape[0]

    # save and compute reference image
    print('Compute reference image ---')
    f = open(f'{save_root}/processing.tmp', "a")
    f.write(f'Compute reference image --- \n')
    f.close()
    if not os.path.exists(f'{save_root}/motion_fix_.h5'):
        med_win = len(denoised_data)//2
        ref_img = denoised_data[med_win-50:med_win+50].mean(axis=0).compute()
        save_h5(f'{save_root}/motion_fix_.h5', ref_img, dtype='float16')

    print('--- Done computing reference image')
    f = open(f'{save_root}/processing.tmp', "a")
    f.write(f'--- Done computing reference image \n')
    f.close()

    # compute affine transform
    print('Registration to reference image ---')
    f = open(f'{save_root}/processing.tmp', "a")
    f.write(f'Registration to reference image --- \n')
    f.close()
    # create trans_affs file
    if not os.path.exists(f'{save_root}/trans_affs.npy'):
        ref_img = File(f'{save_root}/motion_fix_.h5', 'r')['default'].value
        ref_img = ref_img.max(axis=0, keepdims=True)
        if down_sample_registration==1:
            trans_affine = denoised_data.map_blocks(lambda x: estimate_rigid2d(x, fixed=ref_img), dtype='float32', drop_axis=(3), chunks=(1,4,4)).compute()
        else:
            #### downsample trans_affine case
            trans_affine = denoised_data[0::down_sample_registration].map_blocks(lambda x: estimate_rigid2d(x, fixed=ref_img), dtype='float32', drop_axis=(3), chunks=(1,4,4)).compute()
            len_dat = denoised_data.shape[0]
            trans_affine = rigid_interp(trans_affine, down_sample_registration, len_dat)
        # save trans_affs file
        np.save(f'{save_root}/trans_affs.npy', trans_affine)
    # load trans_affs file
    trans_affine_ = np.load(f'{save_root}/trans_affs.npy')
    trans_affine_ = da.from_array(np.expand_dims(trans_affine_, 3), chunks=(1,4,4,1))
    print('--- Done registration reference image')
    f = open(f'{save_root}/processing.tmp', "a")
    f.write(f'--- Done registration reference image \n')
    f.close()
    fdask.terminate_workers(cluster, client)
    time.sleep(30)

    # apply affine transform
    if not os.path.exists(f'{save_root}/motion_corrected_data.zarr'):
        # fix memory issue to load data all together for transpose on local machine
        # load data
        # swap axes

        # set worker
        cluster, client = fdask.setup_workers(is_local=True, dask_tmp=dask_tmp, memory_limit=memory_limit)
        print_client_links(cluster)

        splits_ = np.array_split(np.arange(num_t).astype('int'), num_t_chunks)
        print(f'Processing total {num_t_chunks} chunks in time.......')
        # estimate size of data to store
        used_ = du(f'{save_root}/denoised_data.zarr/')
        est_data_size = int(used_.decode('utf-8'))//(2**20*num_t_chunks*2)+5 #kb to Gb
        for nz, n_split in enumerate(splits_):
            if not os.path.exists(save_root+'/motion_corrected_data_chunks_%03d.zarr'%(nz)):
                if 'save_root_ext' in locals():
                    if os.path.exists(save_root_ext+'/motion_corrected_data_chunks_%03d.zarr'%(nz)):
                        continue
                print('Apply registration to rechunk layer %03d'%(nz))
                t_start = n_split[0]
                t_end = n_split[-1]+1
                trans_data_ = da.map_blocks(apply_transform3d, denoised_data[t_start:t_end], trans_affine_[t_start:t_end], chunks=(1, *denoised_data.shape[1:]), dtype='float16')
                print('Starting to rechunk layer %03d'%(nz))
                trans_data_t_z = trans_data_.rechunk((-1, 1, chunks[1]//nsplit[0], chunks[2]//nsplit[1])).transpose((1, 2, 3, 0))
                # check space availablity
                _, _, free_ = shutil.disk_usage(f'{save_root}/')
                if (free_//(2**30)) > est_data_size:
                    print(f'Remaining space {free_//(2**30)} GB..... -- start to save at {save_root}')
                    trans_data_t_z.to_zarr(save_root+'/motion_corrected_data_chunks_%03d.zarr'%(nz))
                else:
                    try:
                        print(f'Remaining space {free_//(2**30)} GB..... -- start to save at {save_root_ext}')
                        trans_data_t_z.to_zarr(save_root_ext+'/motion_corrected_data_chunks_%03d.zarr'%(nz))
                    except Exception as e:
                        # if any error -- break the code
                        print(e)    
                        fdask.terminate_workers(cluster, client)
                        return None
                del trans_data_t_z
                gc.collect()
                print('finishing rechunking time chunk -- %03d of %03d'%(nz, num_t_chunks))
                f = open(f'{save_root}/processing.tmp', "a")
                f.write('finishing rechunking time chunk -- %03d of %03d \n'%(nz, num_t_chunks))
                f.close()

        print('Remove temporal files of registration')
        if os.path.exists(f'{save_root}/denoised_data.zarr'):
            shutil.rmtree(f'{save_root}/denoised_data.zarr')
        for ext_files in tqdm(glob(save_root_ext+'/motion_corrected_data_chunks_*.zarr')):
            print(f'Moving file {ext_files} to Tmp-file folder.....')
            shutil.move(ext_files, save_root+'/')
        fdask.terminate_workers(cluster, client)
        time.sleep(60)
    return None