Пример #1
0
def preprocessing_script():
    """
    This script will process all the hybridization folders combined in a 
    processing folder. The input parameters are passed using arparse

    Parameters:
    -----------
    
    scheduler: string
        tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). 
        default = False. If False the process will run on the local computer using nCPUs-1

    path: string
        Path to the processing directory


    """


    # Inputs of the function
    parser = argparse.ArgumentParser(description='Preprocessing script')
    parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003')
    parser.add_argument('-path', help='processing directory')
    args = parser.parse_args()
    
    # Directory to process
    processing_directory = args.path
    # Dask scheduler address
    scheduler_address = args.scheduler
    
    if scheduler_address:
        # Start dask client on server or cluster
        client=Client(scheduler_address)

    else:
        # Start dask client on local machine. It will use all the availabe
        # cores -1

        # number of core to use
        ncores = multiprocessing.cpu_count()-1
        cluster = LocalCluster(n_workers=ncores)
        client=Client(cluster)

    # Subdirectories of the processing_directory that need to be skipped for the
    # analysis
    blocked_directories = ['_logs']

    # Starting logger
    utils.init_file_logger(processing_directory)
    logger = logging.getLogger()

    # Determine the operating system running the code
    os_windows, add_slash = utils.determine_os()

    # Check training slash in the processing directory
    processing_directory=utils.check_trailing_slash(processing_directory,os_windows)

    # Get a list of the hybridization to process
    processing_hyb_list = next(os.walk(processing_directory))[1]

    # Remove the blocked directories from the directories to process
    processing_hyb_list = [el for el in processing_hyb_list if el not in blocked_directories ]

    for processing_hyb in processing_hyb_list:
    
        # Determine the hyb number from the name
        hybridization_number = processing_hyb.split('_hyb')[-1]
        hybridization = 'Hybridization' + hybridization_number
        hyb_dir = processing_directory + processing_hyb + add_slash
        
        # Parse the Experimental metadata file (serial)
        experiment_infos,image_properties, hybridizations_infos, \
        converted_positions, microscope_parameters =\
        utils.experimental_metadata_parser(hyb_dir)
        
        # Parse the configuration file 
        flt_rawcnt_config = utils.filtering_raw_counting_config_parser(hyb_dir)
        
        
        # ----------------- .nd2 FILE CONVERSION ------------------------------

        # Create the temporary subdirectory tree (serial)
        tmp_dir_path, tmp_gene_dirs=utils.create_subdirectory_tree(hyb_dir,\
                    hybridization,hybridizations_infos,processing_hyb,suffix='tmp',add_slash=add_slash)

        # Get the list of the nd2 files to process inside the directory
        files_list = glob.glob(hyb_dir+processing_hyb+'_raw_data'+add_slash+'*.nd2')

        # Get the list of genes that are analyzed in the current hybridization
        gene_list = list(hybridizations_infos[hybridization].keys())

        # Organize the file to process in a list which order match the gene_list for
        # parallel processing
        organized_files_list = [f for gene in gene_list for f in files_list if gene+'.nd2' in f  ]
        organized_tmp_dir_list = [f for gene in gene_list for f in tmp_gene_dirs if gene in f  ]

        # Each .nd2 file will be processed in a worker part of a different node
        # Get the addresses of one process/node to use for conversion
        node_addresses = utils.identify_nodes(client)
        workers_conversion = [list(el.items())[0][1] for key,el in node_addresses.items()]

        # Run the conversion
        futures_processes=client.map(io.nd2_to_npy,gene_list,organized_files_list,
                                    tmp_gene_dirs,processing_hyb=processing_hyb,
                                    use_ram=flt_rawcnt_config['use_ram'],
                                    max_ram=flt_rawcnt_config['max_ram'],
                                    workers=workers_conversion)
        client.gather(futures_processes)

        

        # ---------------------------------------------------------------------
        
        
        # ----------------- FILTERING AND RAW COUNTING ------------------------
        
        # Create directories 

        # Create the directory where to save the filtered images
        suffix = 'filtered_png'
        filtered_png_img_dir_path, filtered_png_img_gene_dirs = \
                utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,
                            processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name'])

        suffix = 'filtered_npy'
        filtered_img_dir_path, filtered_img_gene_dirs = \
                utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,
                            processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name'])

        # Create the directory where to save the counting
        suffix = 'counting'
        counting_dir_path, counting_gene_dirs = \
            utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb,
                            suffix,add_slash,flt_rawcnt_config['skip_tags_counting'],
                            flt_rawcnt_config['skip_genes_counting'],
                            analysis_name=flt_rawcnt_config['analysis_name'])


        if flt_rawcnt_config['illumination_correction']:

            # Create the directory where to save the counting
            suffix = 'illumination_funcs'
            illumination_func_dir_path, illumination_func_gene_dirs = \
                utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb,
                                                suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name'])

            # Loop through channels and calculate illumination
            for gene in hybridizations_infos[hybridization].keys():
                
                flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy')

                logger.debug('Create average image for gene %s', gene)

                # Chunking the image list
                num_chunks = sum(list(client.ncores().values()))
                chunked_list = utils.list_chunking(flist_img_to_filter,num_chunks)

                # Scatter the images sublists to process in parallel
                futures = client.scatter(chunked_list)

                # Create dask processing graph
                output = []
                for future in futures:
                    ImgMean = delayed(utils.partial_image_mean)(future)
                    output.append(ImgMean)
                ImgMean_all = delayed(sum)(output)
                ImgMean_all = ImgMean_all/float(len(futures))

                # Compute the graph
                ImgMean = ImgMean_all.compute()

                logger.debug('Create illumination function for gene %s',gene)
                # Create illumination function
                Illumination=filters.gaussian(ImgMean,sigma=(20,300,300))

                # Normalization of the illumination
                Illumination_flat=np.amax(Illumination,axis=0)
                Illumination_norm=Illumination_flat/np.amax(Illumination_flat)

                logger.debug('Save illumination function for gene %s',gene)
                # Save the illumination function
                illumination_path = [ill_path for ill_path in illumination_func_gene_dirs if gene in ill_path][0]
                illumination_fname=illumination_path+gene+'_illumination_func.npy'
                np.save(illumination_fname,Illumination_norm,allow_pickle=False)  

                # Broadcast the illumination function to all the cores
                client.scatter(Illumination_norm, broadcast=True)

                logger.debug('Filtering %s',gene)
                # Filtering and counting
                futures_processes=client.map(counting.filtering_and_counting_ill_correction,flist_img_to_filter, \
                                illumination_function=Illumination_norm,\
                                filtered_png_img_gene_dirs=filtered_png_img_gene_dirs,\
                                filtered_img_gene_dirs =filtered_img_gene_dirs,\
                                counting_gene_dirs=counting_gene_dirs,plane_keep=flt_rawcnt_config['plane_keep'], \
                                min_distance=flt_rawcnt_config['min_distance'], stringency=flt_rawcnt_config['stringency'],\
                                skip_genes_counting=flt_rawcnt_config['skip_genes_counting'],skip_tags_counting=flt_rawcnt_config['skip_tags_counting'])
                client.gather(futures_processes)
               

        else:
            for gene in hybridizations_infos[hybridization].keys():
                flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy')
                # filtering
                logger.debug('Filtering without illumination correction %s',gene)

                futures_processes=client.map(counting.filtering_and_counting,flist_img_to_filter, \
                                        filtered_png_img_gene_dirs=filtered_png_img_gene_dirs, \
                                        filtered_img_gene_dirs=filtered_img_gene_dirs, \
                                        counting_gene_dirs=counting_gene_dirs, \
                                        plane_keep=flt_rawcnt_config['plane_keep'], min_distance=flt_rawcnt_config['min_distance'],\
                                        stringency=flt_rawcnt_config['stringency'],\
                                        skip_genes_counting=flt_rawcnt_config['skip_genes_counting'],skip_tags_counting=flt_rawcnt_config['skip_tags_counting'])

                client.gather(futures_processes)
                
        # ---------------------------------------------------------------------
        
        # # ----------------- COMBINE THE FILTERED DATA IN .ppf.hdf5 ------------------------
        # # Combine the filter data in one single .ppf for each hybridization
        # # This step will run in serial mode and will not need to shuffle data
        # #  between cores because everything is on the common file system

        # logger.debug('Create .ppf.hdf5 file')

        # # Create the ppf.hdf5 file that contains the filtered data in uint16
        # preprocessing_file_path = hdf5_utils.hdf5_create_preprocessing_file(hybridizations_infos,processing_hyb,
        #                                 hybridization,flt_rawcnt_config['analysis_name'], hyb_dir,converted_positions,image_properties)

        # logger.debug('Write the .npy filtered files into the .ppf file')
        # # Load and write the .npy tmp images into the hdf5 file

        # # open the hdf5 file
        # with h5py.File(preprocessing_file_path) as f_hdl:
        #     # Loop through each gene
        #     for gene in hybridizations_infos[hybridization].keys():

        #         logger.debug('Writing %s images in .ppf.hdf5',gene)
        #         # list of the files to transfer
        #         filtered_gene_dir = [fdir for fdir in filtered_img_gene_dirs if gene in fdir][0]
        #         filtered_files_list = glob.glob(filtered_gene_dir+'*.npy')

        #         # loop through the list of file
        #         for f_file in filtered_files_list:
        #             pos = f_file.split('/')[-1].split('_')[-1].split('.')[0]
        #             f_hdl[gene]['FilteredData'][pos][:] =np.load(f_file)
        #             f_hdl.flush()
        
        # # ---------------------------------------------------------------------
        
        # # ----------------- STITCHING ------------------------
        # # Load the stitching parameters from the .yaml file

        # # Stitch the image in 2D or 3D (3D need more work/testing)
        # nr_dim = flt_rawcnt_config['nr_dim']

        # # Estimated overlapping between images according to the Nikon software
        # est_overlap = image_properties['Overlapping_percentage']

        # # Number of peaks to use for the alignment
        # nr_peaks = flt_rawcnt_config['nr_peaks']

        # # Determine if the coords need to be flipped

        # y_flip = flt_rawcnt_config['y_flip']

        # # Method to use for blending
        # # can be 'linear' or 'non linear'
        # # The methods that performs the best is the 'non linear'

        # blend = flt_rawcnt_config['blend']

        # # Reference gene for stitching
        # reference_gene = flt_rawcnt_config['reference_gene']

        # pixel_size = image_properties['PixelSize']

        # # Get the list of the filtered files of the reference gene
        # filtered_gene_dir = [gene_dir for gene_dir in filtered_img_gene_dirs if reference_gene in gene_dir][0]
        # filtered_files_list = glob.glob(filtered_gene_dir+'*.npy')

        # # Create pointer of the hdf5 file that will store the stitched reference image
        # # for the current hybridization
        # # Writing
        # tile_file_base_name = flt_rawcnt_config['analysis_name']+'_'+ processing_hyb
        # data_name   = (tile_file_base_name
        #                 + '_' + reference_gene
        #                 + '_stitching_data')

        # stitching_file_name = tile_file_base_name + '.sf.hdf5'
        # stitching_file= h5py.File(hyb_dir+stitching_file_name,'w',libver='latest')  # replace with 'a' as soon as you fix the error


        # # Determine the tiles organization
        # tiles, contig_tuples, nr_pixels, z_count, micData = stitching.get_pairwise_input_npy(image_properties,converted_positions, hybridization,
        #                         est_overlap = est_overlap, y_flip = False, nr_dim = 2)



        # # Align the tiles 
        # futures_processes=client.map(pairwisesingle.align_single_pair_npy,contig_tuples,
        #                             filtered_files_list=filtered_files_list,micData=micData, 
        #                         nr_peaks=nr_peaks)

        # # Gather the futures
        # data = client.gather(futures_processes)


        # # In this case the order of the returned contingency tuples is with
        # # the order of the input contig_tuples

        # # P_all = [el for data_single in data for el in data_single[0]]
        # P_all =[data_single[0] for data_single in data ]
        # P_all = np.array(P_all)
        # P_all = P_all.flat[:]
        # covs_all = [data_single[1] for data_single in data]
        # alignment = {'P': P_all,
        #             'covs': covs_all}


        # # Calculates a shift in global coordinates for each tile (global
        # # alignment) and then applies these shifts to the  corner coordinates
        # # of each tile and returns and saves these shifted corner coordinates.
        # joining = stitching.get_place_tile_input(hyb_dir, tiles, contig_tuples,
        #                                             micData, nr_pixels, z_count,
        #                                             alignment, data_name,
        #                                             nr_dim=nr_dim)

        # # Create the hdf5 file structure
        # stitched_group, linear_blending, blend =  hdf5preparation.create_structures_hdf5_stitched_ref_gene_file_npy(stitching_file, joining, nr_pixels,
        #                                 reference_gene, blend = 'non linear')

        # # Fill the hdf5 containing the stitched image with empty data and
        # # create the blending mask
        # stitched_group['final_image'][:]= np.zeros(joining['final_image_shape'],dtype=np.float64)
        # if blend is not None:
        #     # make mask
        #     stitched_group['blending_mask'][:] = np.zeros(joining['final_image_shape'][-2:],dtype=np.float64)
        #     tilejoining.make_mask(joining, nr_pixels, stitched_group['blending_mask'])

            
        # # Create the subdirectory used to save the blended tiles
        # suffix = 'blended_tiles'
        # blended_tiles_directory = utils.create_single_directory(hyb_dir,reference_gene, hybridization,processing_hyb,suffix,add_slash,
        #                                 analysis_name=flt_rawcnt_config['analysis_name'])

        # # Get the directory with the filtered npy images of the reference_gene to use for stitching
        # stitching_files_dir = [npy_dir for npy_dir in filtered_img_gene_dirs if reference_gene in npy_dir][0]


        # # Create the tmp directory where to save the masks
        # suffix = 'masks'
        # masked_tiles_directory = utils.create_single_directory(hyb_dir,reference_gene, hybridization,processing_hyb,suffix,add_slash,
        #                                 analysis_name=flt_rawcnt_config['analysis_name'])

        # # Create and save the mask files
        # for corn_value,corner_coords in joining['corner_list']:
        #     if not(np.isnan(corner_coords[0])):
        #         cur_mask = stitched_group['blending_mask'][int(corner_coords[0]):int(corner_coords[0]) + int(nr_pixels),
        #                             int(corner_coords[1]):int(corner_coords[1]) + int(nr_pixels)]

        #         fname = masked_tiles_directory + flt_rawcnt_config['analysis_name'] +'_'+processing_hyb+'_'+reference_gene+'_masks_joining_pos_'+str(corn_value)
        #         np.save(fname,cur_mask)


        # # Blend all the tiles and save them in a directory
        # futures_processes = client.map(tilejoining.generate_blended_tile_npy,joining['corner_list'],
        #                             stitching_files_dir = stitching_files_dir,
        #                             blended_tiles_directory = blended_tiles_directory,
        #                             masked_tiles_directory = masked_tiles_directory,
        #                             analysis_name = flt_rawcnt_config['analysis_name'],
        #                             processing_hyb = processing_hyb,reference_gene = reference_gene,
        #                             micData = micData,tiles = tiles,nr_pixels=nr_pixels,
        #                             linear_blending=linear_blending)



        # _ = client.gather(futures_processes)


        # # Write the stitched image
        # tilejoining.make_final_image_npy(joining, stitching_file, blended_tiles_directory, tiles,reference_gene, nr_pixels)

        # # close the hdf5 file
        # stitching_file.close()


        # # Delete the directories with blended tiles and masks
        # shutil.rmtree(blended_tiles_directory)
        # shutil.rmtree(masked_tiles_directory)

        # ----------------- DELETE FILES ------------------------
        # Don't delete the *.npy files here because can be used to 
        # create the final images using the apply stitching related function    









    client.close()
Пример #2
0
def apply_stitching():

    """
    Script to apply the registration to all the osmFISH channels. It will create
    a stitched image in an hdf5 file

    All the parameters are entered via argparse

    Parameters:
    -----------

    experiment_path: string
        Path to the folder with the hybridizations
    reference_files_path: string
        Path to the folder with the _reg_data.pkl files
    scheduler: string
        tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). 
        default = False. If False the process will run on the local computer using nCPUs-1

    """

    parser = argparse.ArgumentParser(description='Create the stitched images \
                                    after registration')

    parser.add_argument('-experiment_path', help='path to the folder with the hybridizations')
    parser.add_argument('-reference_files_path', help='path to the folder with the \
                        _reg_data.pkl files')
    parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003')
    args = parser.parse_args()

    processing_experiment_directory = args.experiment_path
    stitched_reference_files_dir = args.reference_files_path
    # Dask scheduler address
    scheduler_address = args.scheduler

    if scheduler_address:
        # Start dask client on server or cluster
        client=Client(scheduler_address)

    else:
        # Start dask client on local machine. It will use all the availabe
        # cores -1

        # number of core to use
        ncores = multiprocessing.cpu_count()-1
        cluster = LocalCluster(n_workers=ncores)
        client=Client(cluster)


    # Determine the operating system running the code
    os_windows, add_slash = utils.determine_os()

    # Check training slash in the processing directory
    processing_experiment_directory=utils.check_trailing_slash(processing_experiment_directory,os_windows)

    stitched_reference_files_dir=utils.check_trailing_slash(stitched_reference_files_dir,os_windows)

    # Starting logger
    utils.init_file_logger(processing_experiment_directory)
    logger = logging.getLogger()


    # Collect the infos of the experiment and the processing
    # Parse the Experimental metadata file (serial)
    experiment_infos,image_properties, hybridizations_infos, \
    converted_positions, microscope_parameters =\
    utils.experimental_metadata_parser(processing_experiment_directory)

    # Parse the configuration file 
    flt_rawcnt_config = utils.filtering_raw_counting_config_parser(processing_experiment_directory)

    # Get the reference gene used
    reference_gene = flt_rawcnt_config['reference_gene']

    # Stitch the image in 2D or 3D (3D need more work/testing)
    nr_dim = flt_rawcnt_config['nr_dim']

    # Determine the hybridizations to process
    if isinstance(flt_rawcnt_config['hybs_to_stitch'],list):
        hybridizations_to_process = flt_rawcnt_config['hybs_to_stitch']
    else:
        if flt_rawcnt_config['hybs_to_stitch'] == 'All':
                hybridizations_to_process = list(hybridizations_infos.keys())
        
        else:
            raise ValueError('Error in the hybridizations to stitch')
    


    for hybridization in hybridizations_to_process:
        
        # Determine the genes to stitch in the processing hybridization
        genes_processing = list(hybridizations_infos[hybridization].keys())


        hyb_short = re.sub('Hybridization','hyb',hybridization)
        processing_hyb = experiment_infos['ExperimentName']+'_'+hyb_short
        hyb_dir = processing_experiment_directory+processing_hyb+add_slash

        # Create pointer of the hdf5 file that will store the stitched images
        # for the current hybridization

        tile_file_base_name = flt_rawcnt_config['analysis_name']+'_'+experiment_infos['ExperimentName']+'_'+hyb_short
        stitching_file_name = tile_file_base_name + '.reg.sf.hdf5'

        data_name = (tile_file_base_name
                            + '_' + reference_gene
                            + '_stitching_data_reg')

        stitching_file= h5py.File(stitched_reference_files_dir+stitching_file_name,'w',libver='latest')  # replace with 'a' as soon as you fix the error

        # Determine the tiles organization
        joining, tiles, nr_pixels, z_count, micData = stitching.get_place_tile_input_apply_npy(hyb_dir,stitched_reference_files_dir,data_name,image_properties,nr_dim)

        for gene in genes_processing:
        
            # Create the hdf5 file structure
            stitched_group, linear_blending, blend =  hdf5preparation.create_structures_hdf5_stitched_ref_gene_file_npy(stitching_file, joining, nr_pixels,
                                            gene, blend = 'non linear')

            # Fill the hdf5 containing the stitched image with empty data and
            # create the blending mask
            stitched_group['final_image'][:]= np.zeros(joining['final_image_shape'],dtype=np.uint16)
            if blend is not None:
                # make mask
                stitched_group['blending_mask'][:] = np.zeros(joining['final_image_shape'][-2:],dtype=np.uint16)
                tilejoining.make_mask(joining, nr_pixels, stitched_group['blending_mask'])

            filtered_img_gene_dirs_path = hyb_dir+flt_rawcnt_config['analysis_name']+'_'+processing_hyb +'_filtered_npy'+add_slash
            filtered_img_gene_dirs = glob.glob(filtered_img_gene_dirs_path+'*')

            # Create the subdirectory used to save the blended tiles
            suffix = 'blended_tiles'
            blended_tiles_directory = utils.create_single_directory(hyb_dir,gene, hybridization,processing_hyb,suffix,add_slash,
                                            analysis_name=flt_rawcnt_config['analysis_name'])

            # Get the directory with the filtered npy images of the reference_gene to use for stitching
            stitching_files_dir = [npy_dir for npy_dir in filtered_img_gene_dirs if gene in npy_dir][0]
            stitching_files_dir= stitching_files_dir+add_slash

            # Create the tmp directory where to save the masks
            suffix = 'masks'
            masked_tiles_directory = utils.create_single_directory(hyb_dir,gene,hybridization,processing_hyb,suffix,add_slash,
                                            analysis_name=flt_rawcnt_config['analysis_name'])

            # Create and save the mask files
            for corn_value,corner_coords in joining['corner_list']:
                if not(np.isnan(corner_coords[0])):
                    cur_mask = stitched_group['blending_mask'][int(corner_coords[0]):int(corner_coords[0]) + int(nr_pixels),
                                        int(corner_coords[1]):int(corner_coords[1]) + int(nr_pixels)]

                    fname = masked_tiles_directory + flt_rawcnt_config['analysis_name'] +'_'+processing_hyb+'_'+gene+'_masks_joining_pos_'+str(corn_value)
                    np.save(fname,cur_mask)


            # Blend all the tiles and save them in a directory
            futures_processes = client.map(tilejoining.generate_blended_tile_npy,joining['corner_list'],
                                        stitching_files_dir = stitching_files_dir,
                                        blended_tiles_directory = blended_tiles_directory,
                                        masked_tiles_directory = masked_tiles_directory,
                                        analysis_name = flt_rawcnt_config['analysis_name'],
                                        processing_hyb = processing_hyb,reference_gene = gene,
                                        micData = micData,tiles = tiles,nr_pixels=nr_pixels,
                                        linear_blending=linear_blending)



            _ = client.gather(futures_processes)


            # Write the stitched image
            tilejoining.make_final_image_npy(joining, stitching_file, blended_tiles_directory, tiles,gene, nr_pixels)
            stitching_file.flush()

            # Remove directories with blended tiles and masks
            shutil.rmtree(blended_tiles_directory)
            shutil.rmtree(masked_tiles_directory)


        stitching_file.close()

    
    client.close()
Пример #3
0
def filtering_speed():
    """
    This script will process all the hybridization folders combined in a 
    processing folder. The input parameters are passed using arparse

    Parameters:
    -----------
    
    scheduler: string
        tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). 
        default = False. If False the process will run on the local computer using nCPUs-1

    path: string
        Path to the processing directory


    """


    # Inputs of the function
    parser = argparse.ArgumentParser(description='Preprocessing script')
    parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003')
    parser.add_argument('-path', help='processing directory')
    args = parser.parse_args()
    
    # Directory to process
    processing_directory = args.path
    # Dask scheduler address
    scheduler_address = args.scheduler
    
    if scheduler_address:
        # Start dask client on server or cluster
        client=Client(scheduler_address)

    else:
        # Start dask client on local machine. It will use all the availabe
        # cores -1

        # number of core to use
        ncores = multiprocessing.cpu_count()-1
        cluster = LocalCluster(n_workers=ncores)
        client=Client(cluster)

    # Subdirectories of the processing_directory that need to be skipped for the
    # analysis
    blocked_directories = ['_logs']

    # Starting logger
    utils.init_file_logger(processing_directory)
    logger = logging.getLogger()

    # Determine the operating system running the code
    os_windows, add_slash = utils.determine_os()

    # Check training slash in the processing directory
    processing_directory=utils.check_trailing_slash(processing_directory,os_windows)

    # Get a list of the hybridization to process
    processing_hyb_list = next(os.walk(processing_directory))[1]

    # Remove the blocked directories from the directories to process
    processing_hyb_list = [el for el in processing_hyb_list if el not in blocked_directories ]

    for processing_hyb in processing_hyb_list:
    
        # Determine the hyb number from the name
        hybridization_number = processing_hyb.split('_hyb')[-1]
        hybridization = 'Hybridization' + hybridization_number
        hyb_dir = processing_directory + processing_hyb + add_slash
        
        # Parse the Experimental metadata file (serial)
        experiment_infos,image_properties, hybridizations_infos, \
        converted_positions, microscope_parameters =\
        utils.experimental_metadata_parser(hyb_dir)
        
        # Parse the configuration file 
        flt_rawcnt_config = utils.filtering_raw_counting_config_parser(hyb_dir)
        
    
        
        # ----------------- FILTERING AND RAW COUNTING ------------------------
        
        # Create directories 

        # Create the directory where to save the filtered images
        suffix = 'filtered_png'
        filtered_png_img_dir_path, filtered_png_img_gene_dirs = \
                utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,
                            processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name'])

        suffix = 'filtered_npy'
        filtered_img_dir_path, filtered_img_gene_dirs = \
                utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,
                            processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name'])

        # Create the directory where to save the counting
        suffix = 'counting'
        counting_dir_path, counting_gene_dirs = \
            utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb,
                            suffix,add_slash,flt_rawcnt_config['skip_tags_counting'],
                            flt_rawcnt_config['skip_genes_counting'],
                            analysis_name=flt_rawcnt_config['analysis_name'])


    
        for gene in hybridizations_infos[hybridization].keys():
            flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy')
            # filtering
            logger.debug('Filtering without illumination correction %s',gene)

            futures_processes=client.map(counting.filtering_and_counting,flist_img_to_filter, \
                                    filtered_png_img_gene_dirs=filtered_png_img_gene_dirs, \
                                    filtered_img_gene_dirs=filtered_img_gene_dirs, \
                                    counting_gene_dirs=counting_gene_dirs, \
                                    plane_keep=flt_rawcnt_config['plane_keep'], min_distance=flt_rawcnt_config['min_distance'],\
                                    stringency=flt_rawcnt_config['stringency'],\
                                    skip_genes_counting=flt_rawcnt_config['skip_genes_counting'],skip_tags_counting=flt_rawcnt_config['skip_tags_counting'])

            client.gather(futures_processes)


        # ----------------- RAW COUNTING ONLY------------------------
        
        skip_genes_counting=flt_rawcnt_config['skip_genes_counting']
        skip_tags_counting=flt_rawcnt_config['skip_tags_counting']

        # Create the directory where to save the counting
        suffix = 'counting'
        counting_dir_path, counting_gene_dirs = \
            utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb,
                            suffix,add_slash,flt_rawcnt_config['skip_tags_counting'],
                            flt_rawcnt_config['skip_genes_counting'],
                            analysis_name=flt_rawcnt_config['analysis_name'])

        suffix = 'filtered_npy'
        gene_list = list(hybridizations_infos[hybridization].keys())
        analysis_name=flt_rawcnt_config['analysis_name']
        sufx_dir_path = hyb_dir+analysis_name+'_'+processing_hyb+'_'+suffix+add_slash
        
    
        for gene in hybridizations_infos[hybridization].keys():

            # Filtering image according to gene
            if gene not in skip_genes_counting or [tag for tag in skip_tags_counting if tag not in gene]:
                if analysis_name:
                    filtered_images_directory =  sufx_dir_path+analysis_name+'_'+processing_hyb+'_'+ gene+'_'+suffix+add_slash
                else:
                    filtered_images_directory =  sufx_dir_path +processing_hyb+'_'+ gene +'_'+suffix+add_slash
                
                flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy')
                # filtering
                logger.debug('Filtering without illumination correction %s',gene)

                futures_processes=client.map(counting.counting_only,flist_img_to_filter, \
                                        counting_gene_dirs=counting_gene_dirs, \
                                        min_distance=flt_rawcnt_config['min_distance'],\
                                        stringency=flt_rawcnt_config['stringency'])

                client.gather(futures_processes)






    client.close()
def process_standalone_experiment():
    """
    Script to run conversion, filtering and raw counting on a small set of images.
    The analysis run locally

    All the parameters are entered with argparse

    Parameters:
    -----------

    path: string
        Path to the experiment to process
    analysis_name: string
        Name of the analysis
    stringency: int
        Value of the stringency to use in the threshold selection. Default=0
    min_distance: int
        Min distance betwenn to peaks. Default=5
    min_plane: int
        Min plane for z-stack cropping. Default=None
    max_plane: int:
        Max plane for z-stack cropping. Default=None
    ncores: int
        Number of cores to use for the processing. Deafault=1


    """

    # input to the function
    parser = argparse.ArgumentParser(
        description='Counting and filtering experiment')
    parser.add_argument('-path', help='path to experiment to analyze')
    parser.add_argument('-analysis_name', help='analysis name')
    parser.add_argument('-stringency', help='stringency', default=0, type=int)
    parser.add_argument('-min_distance',
                        help='min distance between peaks',
                        default=5,
                        type=int)
    parser.add_argument('-min_plane',
                        help='starting plane to consider',
                        default=None,
                        type=int)
    parser.add_argument('-max_plane',
                        help='ending plane to consider',
                        default=None,
                        type=int)
    parser.add_argument('-ncores',
                        help='number of cores to use',
                        default=1,
                        type=int)

    # Parse the input args
    args = parser.parse_args()
    processing_directory = args.path
    analysis_name = args.analysis_name
    stringency = args.stringency
    min_distance = args.min_distance
    min_plane = args.min_plane
    max_plane = args.max_plane
    ncores = args.ncores

    if min_plane != None and max_plane != None:
        plane_keep = [min_plane, max_plane]
    else:
        plane_keep = None

    # Determine the os type
    os_windows, add_slash = utils.determine_os()

    # Starting logger
    utils.init_file_logger(processing_directory)
    logger = logging.getLogger()

    logger.debug('min_plane%s', min_plane)
    logger.debug('max_plane %s', max_plane)
    logger.debug('keep_planes value %s', plane_keep)

    # Start the distributed client
    client = Client(n_workers=ncores, threads_per_worker=1)

    logger.debug('client %s', client)
    logger.debug('check that workers are on the same directory %s',
                 client.run(os.getcwd))

    # Check trail slash
    processing_directory = utils.check_trailing_slash(processing_directory,
                                                      os_windows)

    # Determine the experiment name
    exp_name = processing_directory.split(add_slash)[-2]

    logger.debug('Experiment name: %s', exp_name)

    # Create the directories where to save the output
    tmp_dir_path = processing_directory + analysis_name + '_' + exp_name + '_tmp' + add_slash
    filtered_dir_path = processing_directory + analysis_name + '_' + exp_name + '_filtered' + add_slash
    counting_dir_path = processing_directory + analysis_name + '_' + exp_name + '_counting_pkl' + add_slash
    try:
        os.stat(tmp_dir_path)
    except:
        os.mkdir(tmp_dir_path)
        os.chmod(tmp_dir_path, 0o777)

    try:
        os.stat(filtered_dir_path)
    except:
        os.mkdir(filtered_dir_path)
        os.chmod(filtered_dir_path, 0o777)

    try:
        os.stat(counting_dir_path)
    except:
        os.mkdir(counting_dir_path)
        os.chmod(counting_dir_path, 0o777)

    # Get the list of the nd2 files to process inside the directory
    files_list = glob.glob(processing_directory + '*.nd2')
    logger.debug('files to process %s', files_list)

    # Convert the .nd2 data
    for raw_data_gene_fname in files_list:
        fname = raw_data_gene_fname.split(add_slash)[-1][:-4]
        logger.debug('fname %s', fname)
        with nd2.Nd2(raw_data_gene_fname) as nd2file:
            for channel in nd2file.channels:
                for fov in nd2file.fields_of_view:
                    img_stack = np.empty(
                        [len(nd2file.z_levels), nd2file.height, nd2file.width],
                        dtype='uint16')
                    images = nd2file.select(channels=channel,
                                            fields_of_view=fov,
                                            z_levels=nd2file.z_levels)
                    for idx, im in enumerate(images):
                        img_stack[idx, :, :] = im

                    converted_fname = tmp_dir_path + exp_name + '_' + fname + '_' + channel + '_fov_' + str(
                        fov) + '.npy'
                    np.save(converted_fname, img_stack, allow_pickle=False)

    logger.debug('Finished .nd2 file conversion')

    # Filtering all the data
    # Get list of the files to process
    flist_img_to_filter = glob.glob(tmp_dir_path + '*.npy')

    # logger.debug('files to filter %s',flist_img_to_filter)
    # Parallel process all the data
    futures_processes=client.map(filtering_and_counting_experiment,flist_img_to_filter, \
                                  filtered_dir_path=filtered_dir_path, \
                                 counting_dir_path=counting_dir_path, \
                                 exp_name=exp_name,plane_keep=plane_keep,add_slash=add_slash, \
                                 min_distance=min_distance, stringency=stringency)

    client.gather(futures_processes)
    client.close()

    logger.debug('Finished filtering and counting')

    # delete the tmp folders
    shutil.rmtree(tmp_dir_path)