示例#1
0
def reference_registration():

    """
    This script is used to register the stitched reference channels for the processed
    hybridization. The comparison is sequential (from Hyb1-->HybN) and not all the
    hybridization steps are required.

    The output are pickle files with the recalculated corners according to the 
    registration

    The input parameters are entered via argparse

    Parameters:
    -----------

    path: string. 
        Exact path to the folder with the stitched .sf.hdf5
    reference_gene: string. 
        Reference gene used for stitching
    fraction: float. 
        Fraction of the image to use for the registration. 
        Selection start from the center of the image. 
        Default 0.2

    """

    # Inputs of the function
    parser = argparse.ArgumentParser(description='Register the stitched images \
                                    of the reference channels')

    parser.add_argument('-path', help='path to the folder with the stitched \
                        XX.sf.hdf5 and XX_data_reg.pkl files')
    parser.add_argument('-reference_gene', help='Reference gene used for the \
                        stitching')
    
    parser.add_argument('-fraction',default=0.2, help='fraction of the picture to use for \
                        registration',type=float)

    
    args = parser.parse_args()

    # retrieve the parameters
    processing_directory = args.path
    reference_gene = args.reference_gene
    fraction = args.fraction

    # Determine the operating system running the code
    os_windows, add_slash = utils.determine_os()

    # Check training slash in the processing directory
    processing_directory=utils.check_trailing_slash(processing_directory,os_windows)



    hr.register_final_images_reg_data_only(processing_directory, gene=reference_gene,
                          sub_pic_frac=fraction, use_MPI=False,
                          apply_to_corners=True, apply_warping = False,
                          region=None, compare_in_seq=False)
示例#2
0
def add_coords_to_experimental_metadata():
    """
    Script used to add the coords to the Experimental_metadata.yaml file.
    In order to work: 

    - The Experimental_metadata.yaml file needs to have the processing hybridization 
    and the coords at point 0 initialized: ex:  

    TilesPositions:  
    Hybridization1:  
        0:  

    - The coords file extracted from the Nikon microscope need to be in a text file
    ex. Hyb1_Coords.txt. The Coords tag is necessary and Hyb is the hybridization_key

    - The input parameters are passed via argparse

    Parameters:
    -----------
    hybridization_number: string
        Hybridization number (ex. 3) describing the hybridization to process.
    
    path: string 
        Exact path to the folder containing the text file with the coordinates.
    
    hybridization_key: string 
        Possible values 'Hyb' or 'Strip'. To add coordinates for stripping if necessary.

    """

    # Inputs of the function
    parser = argparse.ArgumentParser(description='Add microscope coords to \
                                    experimental_metadata.yaml file')
    parser.add_argument('-hybridization_number',
                        help='hybridization to add the coords to')
    parser.add_argument(
        '-path',
        help='path to the folder with the experimental_metadata.yaml file')
    parser.add_argument('-hybridization_key',
                        default='Hyb',
                        help='String to identify the specific \
                        coords file')
    args = parser.parse_args()

    # retrieve the parameters
    processing_directory = args.path
    hyb_nr = args.hybridization_number
    hyb_key = args.hybridization_key

    # Determine the operating system running the code
    os_windows, add_slash = utils.determine_os()

    # Check training slash in the processing directory
    processing_directory = utils.check_trailing_slash(processing_directory,
                                                      os_windows)

    utils.add_coords_to_yaml(processing_directory, hyb_nr, hyb_key)
def preprocessing_script():
    """
    This script will process all the hybridization folders combined in a 
    processing folder. The input parameters are passed using arparse

    Parameters:
    -----------
    
    scheduler: string
        tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). 
        default = False. If False the process will run on the local computer using nCPUs-1

    path: string
        Path to the processing directory


    """


    # Inputs of the function
    parser = argparse.ArgumentParser(description='Preprocessing script')
    parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003')
    parser.add_argument('-path', help='processing directory')
    args = parser.parse_args()
    
    # Directory to process
    processing_directory = args.path
    # Dask scheduler address
    scheduler_address = args.scheduler
    
    if scheduler_address:
        # Start dask client on server or cluster
        client=Client(scheduler_address)

    else:
        # Start dask client on local machine. It will use all the availabe
        # cores -1

        # number of core to use
        ncores = multiprocessing.cpu_count()-1
        cluster = LocalCluster(n_workers=ncores)
        client=Client(cluster)

    # Subdirectories of the processing_directory that need to be skipped for the
    # analysis
    blocked_directories = ['_logs']

    # Starting logger
    utils.init_file_logger(processing_directory)
    logger = logging.getLogger()

    # Determine the operating system running the code
    os_windows, add_slash = utils.determine_os()

    # Check training slash in the processing directory
    processing_directory=utils.check_trailing_slash(processing_directory,os_windows)

    # Get a list of the hybridization to process
    processing_hyb_list = next(os.walk(processing_directory))[1]

    # Remove the blocked directories from the directories to process
    processing_hyb_list = [el for el in processing_hyb_list if el not in blocked_directories ]

    for processing_hyb in processing_hyb_list:
    
        # Determine the hyb number from the name
        hybridization_number = processing_hyb.split('_hyb')[-1]
        hybridization = 'Hybridization' + hybridization_number
        hyb_dir = processing_directory + processing_hyb + add_slash
        
        # Parse the Experimental metadata file (serial)
        experiment_infos,image_properties, hybridizations_infos, \
        converted_positions, microscope_parameters =\
        utils.experimental_metadata_parser(hyb_dir)
        
        # Parse the configuration file 
        flt_rawcnt_config = utils.filtering_raw_counting_config_parser(hyb_dir)
        
        
        # ----------------- .nd2 FILE CONVERSION ------------------------------

        # Create the temporary subdirectory tree (serial)
        tmp_dir_path, tmp_gene_dirs=utils.create_subdirectory_tree(hyb_dir,\
                    hybridization,hybridizations_infos,processing_hyb,suffix='tmp',add_slash=add_slash)

        # Get the list of the nd2 files to process inside the directory
        files_list = glob.glob(hyb_dir+processing_hyb+'_raw_data'+add_slash+'*.nd2')

        # Get the list of genes that are analyzed in the current hybridization
        gene_list = list(hybridizations_infos[hybridization].keys())

        # Organize the file to process in a list which order match the gene_list for
        # parallel processing
        organized_files_list = [f for gene in gene_list for f in files_list if gene+'.nd2' in f  ]
        organized_tmp_dir_list = [f for gene in gene_list for f in tmp_gene_dirs if gene in f  ]

        # Each .nd2 file will be processed in a worker part of a different node
        # Get the addresses of one process/node to use for conversion
        node_addresses = utils.identify_nodes(client)
        workers_conversion = [list(el.items())[0][1] for key,el in node_addresses.items()]

        # Run the conversion
        futures_processes=client.map(io.nd2_to_npy,gene_list,organized_files_list,
                                    tmp_gene_dirs,processing_hyb=processing_hyb,
                                    use_ram=flt_rawcnt_config['use_ram'],
                                    max_ram=flt_rawcnt_config['max_ram'],
                                    workers=workers_conversion)
        client.gather(futures_processes)

        

        # ---------------------------------------------------------------------
        
        
        # ----------------- FILTERING AND RAW COUNTING ------------------------
        
        # Create directories 

        # Create the directory where to save the filtered images
        suffix = 'filtered_png'
        filtered_png_img_dir_path, filtered_png_img_gene_dirs = \
                utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,
                            processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name'])

        suffix = 'filtered_npy'
        filtered_img_dir_path, filtered_img_gene_dirs = \
                utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,
                            processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name'])

        # Create the directory where to save the counting
        suffix = 'counting'
        counting_dir_path, counting_gene_dirs = \
            utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb,
                            suffix,add_slash,flt_rawcnt_config['skip_tags_counting'],
                            flt_rawcnt_config['skip_genes_counting'],
                            analysis_name=flt_rawcnt_config['analysis_name'])


        if flt_rawcnt_config['illumination_correction']:

            # Create the directory where to save the counting
            suffix = 'illumination_funcs'
            illumination_func_dir_path, illumination_func_gene_dirs = \
                utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb,
                                                suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name'])

            # Loop through channels and calculate illumination
            for gene in hybridizations_infos[hybridization].keys():
                
                flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy')

                logger.debug('Create average image for gene %s', gene)

                # Chunking the image list
                num_chunks = sum(list(client.ncores().values()))
                chunked_list = utils.list_chunking(flist_img_to_filter,num_chunks)

                # Scatter the images sublists to process in parallel
                futures = client.scatter(chunked_list)

                # Create dask processing graph
                output = []
                for future in futures:
                    ImgMean = delayed(utils.partial_image_mean)(future)
                    output.append(ImgMean)
                ImgMean_all = delayed(sum)(output)
                ImgMean_all = ImgMean_all/float(len(futures))

                # Compute the graph
                ImgMean = ImgMean_all.compute()

                logger.debug('Create illumination function for gene %s',gene)
                # Create illumination function
                Illumination=filters.gaussian(ImgMean,sigma=(20,300,300))

                # Normalization of the illumination
                Illumination_flat=np.amax(Illumination,axis=0)
                Illumination_norm=Illumination_flat/np.amax(Illumination_flat)

                logger.debug('Save illumination function for gene %s',gene)
                # Save the illumination function
                illumination_path = [ill_path for ill_path in illumination_func_gene_dirs if gene in ill_path][0]
                illumination_fname=illumination_path+gene+'_illumination_func.npy'
                np.save(illumination_fname,Illumination_norm,allow_pickle=False)  

                # Broadcast the illumination function to all the cores
                client.scatter(Illumination_norm, broadcast=True)

                logger.debug('Filtering %s',gene)
                # Filtering and counting
                futures_processes=client.map(counting.filtering_and_counting_ill_correction,flist_img_to_filter, \
                                illumination_function=Illumination_norm,\
                                filtered_png_img_gene_dirs=filtered_png_img_gene_dirs,\
                                filtered_img_gene_dirs =filtered_img_gene_dirs,\
                                counting_gene_dirs=counting_gene_dirs,plane_keep=flt_rawcnt_config['plane_keep'], \
                                min_distance=flt_rawcnt_config['min_distance'], stringency=flt_rawcnt_config['stringency'],\
                                skip_genes_counting=flt_rawcnt_config['skip_genes_counting'],skip_tags_counting=flt_rawcnt_config['skip_tags_counting'])
                client.gather(futures_processes)
               

        else:
            for gene in hybridizations_infos[hybridization].keys():
                flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy')
                # filtering
                logger.debug('Filtering without illumination correction %s',gene)

                futures_processes=client.map(counting.filtering_and_counting,flist_img_to_filter, \
                                        filtered_png_img_gene_dirs=filtered_png_img_gene_dirs, \
                                        filtered_img_gene_dirs=filtered_img_gene_dirs, \
                                        counting_gene_dirs=counting_gene_dirs, \
                                        plane_keep=flt_rawcnt_config['plane_keep'], min_distance=flt_rawcnt_config['min_distance'],\
                                        stringency=flt_rawcnt_config['stringency'],\
                                        skip_genes_counting=flt_rawcnt_config['skip_genes_counting'],skip_tags_counting=flt_rawcnt_config['skip_tags_counting'])

                client.gather(futures_processes)
                
        # ---------------------------------------------------------------------
        
        # # ----------------- COMBINE THE FILTERED DATA IN .ppf.hdf5 ------------------------
        # # Combine the filter data in one single .ppf for each hybridization
        # # This step will run in serial mode and will not need to shuffle data
        # #  between cores because everything is on the common file system

        # logger.debug('Create .ppf.hdf5 file')

        # # Create the ppf.hdf5 file that contains the filtered data in uint16
        # preprocessing_file_path = hdf5_utils.hdf5_create_preprocessing_file(hybridizations_infos,processing_hyb,
        #                                 hybridization,flt_rawcnt_config['analysis_name'], hyb_dir,converted_positions,image_properties)

        # logger.debug('Write the .npy filtered files into the .ppf file')
        # # Load and write the .npy tmp images into the hdf5 file

        # # open the hdf5 file
        # with h5py.File(preprocessing_file_path) as f_hdl:
        #     # Loop through each gene
        #     for gene in hybridizations_infos[hybridization].keys():

        #         logger.debug('Writing %s images in .ppf.hdf5',gene)
        #         # list of the files to transfer
        #         filtered_gene_dir = [fdir for fdir in filtered_img_gene_dirs if gene in fdir][0]
        #         filtered_files_list = glob.glob(filtered_gene_dir+'*.npy')

        #         # loop through the list of file
        #         for f_file in filtered_files_list:
        #             pos = f_file.split('/')[-1].split('_')[-1].split('.')[0]
        #             f_hdl[gene]['FilteredData'][pos][:] =np.load(f_file)
        #             f_hdl.flush()
        
        # # ---------------------------------------------------------------------
        
        # # ----------------- STITCHING ------------------------
        # # Load the stitching parameters from the .yaml file

        # # Stitch the image in 2D or 3D (3D need more work/testing)
        # nr_dim = flt_rawcnt_config['nr_dim']

        # # Estimated overlapping between images according to the Nikon software
        # est_overlap = image_properties['Overlapping_percentage']

        # # Number of peaks to use for the alignment
        # nr_peaks = flt_rawcnt_config['nr_peaks']

        # # Determine if the coords need to be flipped

        # y_flip = flt_rawcnt_config['y_flip']

        # # Method to use for blending
        # # can be 'linear' or 'non linear'
        # # The methods that performs the best is the 'non linear'

        # blend = flt_rawcnt_config['blend']

        # # Reference gene for stitching
        # reference_gene = flt_rawcnt_config['reference_gene']

        # pixel_size = image_properties['PixelSize']

        # # Get the list of the filtered files of the reference gene
        # filtered_gene_dir = [gene_dir for gene_dir in filtered_img_gene_dirs if reference_gene in gene_dir][0]
        # filtered_files_list = glob.glob(filtered_gene_dir+'*.npy')

        # # Create pointer of the hdf5 file that will store the stitched reference image
        # # for the current hybridization
        # # Writing
        # tile_file_base_name = flt_rawcnt_config['analysis_name']+'_'+ processing_hyb
        # data_name   = (tile_file_base_name
        #                 + '_' + reference_gene
        #                 + '_stitching_data')

        # stitching_file_name = tile_file_base_name + '.sf.hdf5'
        # stitching_file= h5py.File(hyb_dir+stitching_file_name,'w',libver='latest')  # replace with 'a' as soon as you fix the error


        # # Determine the tiles organization
        # tiles, contig_tuples, nr_pixels, z_count, micData = stitching.get_pairwise_input_npy(image_properties,converted_positions, hybridization,
        #                         est_overlap = est_overlap, y_flip = False, nr_dim = 2)



        # # Align the tiles 
        # futures_processes=client.map(pairwisesingle.align_single_pair_npy,contig_tuples,
        #                             filtered_files_list=filtered_files_list,micData=micData, 
        #                         nr_peaks=nr_peaks)

        # # Gather the futures
        # data = client.gather(futures_processes)


        # # In this case the order of the returned contingency tuples is with
        # # the order of the input contig_tuples

        # # P_all = [el for data_single in data for el in data_single[0]]
        # P_all =[data_single[0] for data_single in data ]
        # P_all = np.array(P_all)
        # P_all = P_all.flat[:]
        # covs_all = [data_single[1] for data_single in data]
        # alignment = {'P': P_all,
        #             'covs': covs_all}


        # # Calculates a shift in global coordinates for each tile (global
        # # alignment) and then applies these shifts to the  corner coordinates
        # # of each tile and returns and saves these shifted corner coordinates.
        # joining = stitching.get_place_tile_input(hyb_dir, tiles, contig_tuples,
        #                                             micData, nr_pixels, z_count,
        #                                             alignment, data_name,
        #                                             nr_dim=nr_dim)

        # # Create the hdf5 file structure
        # stitched_group, linear_blending, blend =  hdf5preparation.create_structures_hdf5_stitched_ref_gene_file_npy(stitching_file, joining, nr_pixels,
        #                                 reference_gene, blend = 'non linear')

        # # Fill the hdf5 containing the stitched image with empty data and
        # # create the blending mask
        # stitched_group['final_image'][:]= np.zeros(joining['final_image_shape'],dtype=np.float64)
        # if blend is not None:
        #     # make mask
        #     stitched_group['blending_mask'][:] = np.zeros(joining['final_image_shape'][-2:],dtype=np.float64)
        #     tilejoining.make_mask(joining, nr_pixels, stitched_group['blending_mask'])

            
        # # Create the subdirectory used to save the blended tiles
        # suffix = 'blended_tiles'
        # blended_tiles_directory = utils.create_single_directory(hyb_dir,reference_gene, hybridization,processing_hyb,suffix,add_slash,
        #                                 analysis_name=flt_rawcnt_config['analysis_name'])

        # # Get the directory with the filtered npy images of the reference_gene to use for stitching
        # stitching_files_dir = [npy_dir for npy_dir in filtered_img_gene_dirs if reference_gene in npy_dir][0]


        # # Create the tmp directory where to save the masks
        # suffix = 'masks'
        # masked_tiles_directory = utils.create_single_directory(hyb_dir,reference_gene, hybridization,processing_hyb,suffix,add_slash,
        #                                 analysis_name=flt_rawcnt_config['analysis_name'])

        # # Create and save the mask files
        # for corn_value,corner_coords in joining['corner_list']:
        #     if not(np.isnan(corner_coords[0])):
        #         cur_mask = stitched_group['blending_mask'][int(corner_coords[0]):int(corner_coords[0]) + int(nr_pixels),
        #                             int(corner_coords[1]):int(corner_coords[1]) + int(nr_pixels)]

        #         fname = masked_tiles_directory + flt_rawcnt_config['analysis_name'] +'_'+processing_hyb+'_'+reference_gene+'_masks_joining_pos_'+str(corn_value)
        #         np.save(fname,cur_mask)


        # # Blend all the tiles and save them in a directory
        # futures_processes = client.map(tilejoining.generate_blended_tile_npy,joining['corner_list'],
        #                             stitching_files_dir = stitching_files_dir,
        #                             blended_tiles_directory = blended_tiles_directory,
        #                             masked_tiles_directory = masked_tiles_directory,
        #                             analysis_name = flt_rawcnt_config['analysis_name'],
        #                             processing_hyb = processing_hyb,reference_gene = reference_gene,
        #                             micData = micData,tiles = tiles,nr_pixels=nr_pixels,
        #                             linear_blending=linear_blending)



        # _ = client.gather(futures_processes)


        # # Write the stitched image
        # tilejoining.make_final_image_npy(joining, stitching_file, blended_tiles_directory, tiles,reference_gene, nr_pixels)

        # # close the hdf5 file
        # stitching_file.close()


        # # Delete the directories with blended tiles and masks
        # shutil.rmtree(blended_tiles_directory)
        # shutil.rmtree(masked_tiles_directory)

        # ----------------- DELETE FILES ------------------------
        # Don't delete the *.npy files here because can be used to 
        # create the final images using the apply stitching related function    









    client.close()
示例#4
0
def filtering_speed():
    """
    This script will process all the hybridization folders combined in a 
    processing folder. The input parameters are passed using arparse

    Parameters:
    -----------
    
    scheduler: string
        tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). 
        default = False. If False the process will run on the local computer using nCPUs-1

    path: string
        Path to the processing directory


    """


    # Inputs of the function
    parser = argparse.ArgumentParser(description='Preprocessing script')
    parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003')
    parser.add_argument('-path', help='processing directory')
    args = parser.parse_args()
    
    # Directory to process
    processing_directory = args.path
    # Dask scheduler address
    scheduler_address = args.scheduler
    
    if scheduler_address:
        # Start dask client on server or cluster
        client=Client(scheduler_address)

    else:
        # Start dask client on local machine. It will use all the availabe
        # cores -1

        # number of core to use
        ncores = multiprocessing.cpu_count()-1
        cluster = LocalCluster(n_workers=ncores)
        client=Client(cluster)

    # Subdirectories of the processing_directory that need to be skipped for the
    # analysis
    blocked_directories = ['_logs']

    # Starting logger
    utils.init_file_logger(processing_directory)
    logger = logging.getLogger()

    # Determine the operating system running the code
    os_windows, add_slash = utils.determine_os()

    # Check training slash in the processing directory
    processing_directory=utils.check_trailing_slash(processing_directory,os_windows)

    # Get a list of the hybridization to process
    processing_hyb_list = next(os.walk(processing_directory))[1]

    # Remove the blocked directories from the directories to process
    processing_hyb_list = [el for el in processing_hyb_list if el not in blocked_directories ]

    for processing_hyb in processing_hyb_list:
    
        # Determine the hyb number from the name
        hybridization_number = processing_hyb.split('_hyb')[-1]
        hybridization = 'Hybridization' + hybridization_number
        hyb_dir = processing_directory + processing_hyb + add_slash
        
        # Parse the Experimental metadata file (serial)
        experiment_infos,image_properties, hybridizations_infos, \
        converted_positions, microscope_parameters =\
        utils.experimental_metadata_parser(hyb_dir)
        
        # Parse the configuration file 
        flt_rawcnt_config = utils.filtering_raw_counting_config_parser(hyb_dir)
        
    
        
        # ----------------- FILTERING AND RAW COUNTING ------------------------
        
        # Create directories 

        # Create the directory where to save the filtered images
        suffix = 'filtered_png'
        filtered_png_img_dir_path, filtered_png_img_gene_dirs = \
                utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,
                            processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name'])

        suffix = 'filtered_npy'
        filtered_img_dir_path, filtered_img_gene_dirs = \
                utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,
                            processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name'])

        # Create the directory where to save the counting
        suffix = 'counting'
        counting_dir_path, counting_gene_dirs = \
            utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb,
                            suffix,add_slash,flt_rawcnt_config['skip_tags_counting'],
                            flt_rawcnt_config['skip_genes_counting'],
                            analysis_name=flt_rawcnt_config['analysis_name'])


    
        for gene in hybridizations_infos[hybridization].keys():
            flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy')
            # filtering
            logger.debug('Filtering without illumination correction %s',gene)

            futures_processes=client.map(counting.filtering_and_counting,flist_img_to_filter, \
                                    filtered_png_img_gene_dirs=filtered_png_img_gene_dirs, \
                                    filtered_img_gene_dirs=filtered_img_gene_dirs, \
                                    counting_gene_dirs=counting_gene_dirs, \
                                    plane_keep=flt_rawcnt_config['plane_keep'], min_distance=flt_rawcnt_config['min_distance'],\
                                    stringency=flt_rawcnt_config['stringency'],\
                                    skip_genes_counting=flt_rawcnt_config['skip_genes_counting'],skip_tags_counting=flt_rawcnt_config['skip_tags_counting'])

            client.gather(futures_processes)


        # ----------------- RAW COUNTING ONLY------------------------
        
        skip_genes_counting=flt_rawcnt_config['skip_genes_counting']
        skip_tags_counting=flt_rawcnt_config['skip_tags_counting']

        # Create the directory where to save the counting
        suffix = 'counting'
        counting_dir_path, counting_gene_dirs = \
            utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb,
                            suffix,add_slash,flt_rawcnt_config['skip_tags_counting'],
                            flt_rawcnt_config['skip_genes_counting'],
                            analysis_name=flt_rawcnt_config['analysis_name'])

        suffix = 'filtered_npy'
        gene_list = list(hybridizations_infos[hybridization].keys())
        analysis_name=flt_rawcnt_config['analysis_name']
        sufx_dir_path = hyb_dir+analysis_name+'_'+processing_hyb+'_'+suffix+add_slash
        
    
        for gene in hybridizations_infos[hybridization].keys():

            # Filtering image according to gene
            if gene not in skip_genes_counting or [tag for tag in skip_tags_counting if tag not in gene]:
                if analysis_name:
                    filtered_images_directory =  sufx_dir_path+analysis_name+'_'+processing_hyb+'_'+ gene+'_'+suffix+add_slash
                else:
                    filtered_images_directory =  sufx_dir_path +processing_hyb+'_'+ gene +'_'+suffix+add_slash
                
                flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy')
                # filtering
                logger.debug('Filtering without illumination correction %s',gene)

                futures_processes=client.map(counting.counting_only,flist_img_to_filter, \
                                        counting_gene_dirs=counting_gene_dirs, \
                                        min_distance=flt_rawcnt_config['min_distance'],\
                                        stringency=flt_rawcnt_config['stringency'])

                client.gather(futures_processes)






    client.close()
def staining_segmentation():
    """
    This script will segment the selected staining and output the identified 
    objects.

    All the parameters are entered via argparse.

    Parameters:
    -----------

    scheduler: string
        tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). 
        default = False. If False the process will run on the local computer using nCPUs-1
    path: string
        Path to the processing directory
    processing_file: string
        Path to the hdf5 file with the staning to process
    segmentation_staining: string
        Staining to be segmented

    """

    # Inputs of the function
    parser = argparse.ArgumentParser(description='Segmentation script')
    parser.add_argument(
        '-scheduler',
        default=False,
        help='dask scheduler address ex. tcp://192.168.0.4:7003')
    parser.add_argument('-path', help='processing directory')
    parser.add_argument('-processing_file',
                        help='path to the file with the \
                        staning to process')
    parser.add_argument('-segmentation_staining',
                        help='staining to be \
                        segmented')

    args = parser.parse_args()

    # Directory to process
    processing_directory = args.path

    # File to process
    processing_file = args.processing_file

    # staining to segment
    segmentation_staining = args.segmentation_staining

    # Dask scheduler address
    scheduler_address = args.scheduler

    if scheduler_address:
        # Start dask client on server or cluster
        client = Client(scheduler_address)

    else:
        # Start dask client on local machine. It will use all the availabe
        # cores -1

        # number of core to use
        ncores = multiprocessing.cpu_count() - 1
        cluster = LocalCluster(n_workers=ncores)
        client = Client(cluster)

    # Determine the operating system running the code
    os_windows, add_slash = utils.determine_os()

    # Check training slash in the processing directory
    processing_directory = utils.check_trailing_slash(processing_directory,
                                                      os_windows)

    segmentation_parameters = utils.general_yaml_parser(
        processing_directory + 'Staining_segmentation.config.yaml')

    # Chunking parameters
    chunk_size = segmentation_parameters[segmentation_staining][
        'image_chunking_parameters']['chunk_size']
    percent_padding = segmentation_parameters[segmentation_staining][
        'image_chunking_parameters']['percent_padding']

    # Segmentation parameters
    trimming = segmentation_parameters[segmentation_staining][
        'segmentation_parameters']['trimming']
    min_object_size = segmentation_parameters[segmentation_staining][
        'segmentation_parameters']['min_object_size']
    disk_radium_rank_filer = segmentation_parameters[segmentation_staining][
        'segmentation_parameters']['disk_radium_rank_filer']
    min_distance = segmentation_parameters[segmentation_staining][
        'segmentation_parameters']['min_distance']
    threshold_rel = segmentation_parameters[segmentation_staining][
        'segmentation_parameters']['threshold_rel']

    # Load the image (will be modified after the change to hdf5 input)
    img = io.imread(processing_file)

    # Image chunking
    nr_chunks,nc_chunks,Coords_Chunks_list, Coords_Padded_Chunks_list,r_coords_tl_all_padded,\
                c_coords_tl_all_padded,r_coords_br_all_padded,c_coords_br_all_padded = \
                object_based_segmentation.image_chunking(img,chunk_size,percent_padding)

    # Create the chunks idx
    Chunks_idxs_linear = np.arange(len(Coords_Padded_Chunks_list),
                                   dtype='int32')

    # Distribute the chunks idx and distridute them in an array according to the position
    # in the chunked image
    Chunks_idxs = Chunks_idxs_linear.reshape(nr_chunks, nc_chunks)

    # Flatten the array for make it easier the creation of the coords combination
    Chunks_idxs_rows = np.ravel(Chunks_idxs)
    Chunks_idxs_cols = np.ravel(Chunks_idxs, order='F')

    # Calculate coords of the overlapping chunks
    Overlapping_chunks_coords = list()
    counter = 0
    left_pos = Chunks_idxs_rows[0]
    for el in Chunks_idxs_rows[1:]:
        if counter < nc_chunks - 1:
            Coords_left = Coords_Padded_Chunks_list[left_pos]
            Coords_right = Coords_Padded_Chunks_list[el]
            row_tl = Coords_left[0]
            row_br = Coords_left[1]
            col_tl = Coords_right[2]
            col_br = Coords_left[3]
            Overlapping_chunks_coords.append((row_tl, row_br, col_tl, col_br))
            left_pos = el
            counter += 1
        else:
            left_pos = el
            counter = 0

    counter = 0
    top_pos = Chunks_idxs_cols[0]
    for el in Chunks_idxs_cols[1:]:
        if counter < nr_chunks - 1:
            Coords_top = Coords_Padded_Chunks_list[top_pos]
            Coords_bottom = Coords_Padded_Chunks_list[el]

            row_tl = Coords_bottom[0]
            row_br = Coords_top[1]
            col_tl = Coords_top[2]
            col_br = Coords_top[3]
            Overlapping_chunks_coords.append((row_tl, row_br, col_tl, col_br))

            counter += 1
            top_pos = el
        else:
            top_pos = el
            counter = 0

    # Now i use this approach for testing. If the image gets to big to fit in RAM
    # then save the files and load them separately in each node
    chunked_image_seq = list()
    for coords in Coords_Padded_Chunks_list:
        chunked_image_seq.append(img[coords[0]:coords[1], coords[2]:coords[3]])

    # Run the segmentation
    futures_processes = client.map(
        object_based_segmentation.polyT_segmentation,
        chunked_image_seq,
        min_object_size=min_object_size,
        min_distance=min_distance,
        disk_radium_rank_filer=disk_radium_rank_filer,
        threshold_rel=threshold_rel,
        trimming=trimming)

    Objects_list = client.gather(futures_processes)

    # Recalculate labels and coords

    processed_images_data = dict()

    max_starting_label = 0
    total_data_dict = dict()

    for idx, objs_chunk in enumerate(Objects_list):
        for label, cvalues in objs_chunk.items():
            new_label = max_starting_label + 1
            coords = Coords_Padded_Chunks_list[idx][0::2]
            total_data_dict[new_label] = cvalues + coords
            max_starting_label = new_label

    # Calculate all the intersecting objects
    futures_processes = client.map(
        object_based_segmentation.OverlappingCouples,
        Overlapping_chunks_coords,
        TotalDataDict=total_data_dict)

    All_intersecting = client.gather(futures_processes)

    # Put together the couple with the same label for multiple intersection
    # for the labels of objects where there is intersection between multiple regions
    # Then scatter all of them and calculate intersection

    # Combine the results from the parallel processing
    flatten_couple = [el for grp in All_intersecting for el in grp]
    # Remove duplicates
    flatten_couple = list(set(flatten_couple))

    # Create a list of the labels (removing the repeats)
    singles = list()
    [singles.append(x) for cpl in flatten_couple for x in cpl]
    singles = list(set(singles))

    # Identify the couples containing singles
    Combined_all_singles = list()
    for item in singles:
        Combined_single = list()
        for couple in flatten_couple:
            if item in couple:
                Combined_single.append(couple)
        Combined_all_singles.append(Combined_single)

    if Combined_all_singles:
        # Combine all the intersecting labeles
        start = Combined_all_singles[0]
        ComparisonList = Combined_all_singles[1:].copy()
        #merged=start.copy()
        merged = list()
        SavedCombinations = list()
        tmp_list = ComparisonList.copy()
        KeepGoing = True
        Loop = 0
        while KeepGoing:
            Loop += 1

            for idx, el in enumerate(ComparisonList):

                if set(start).intersection(set(el)):
                    #merged=list(set(merged)|set(el))
                    [merged.append(x) for x in el]
                    tmp_list = [e for e in tmp_list if e != el]

            intersection = list(set.intersection(set(merged), set(start)))
            if intersection:
                merged = list(set.union(set(merged), set(start)))
                #merged=list(set(merged))
                start = merged.copy()
                merged = list()
                ComparisonList = tmp_list.copy()
                #tmp_list.append(merged)
            else:
                SavedCombinations.append(start)
                start = tmp_list[0]
                tmp_list = tmp_list[1:]
                ComparisonList = tmp_list.copy()

            if len(tmp_list) < 1:
                [SavedCombinations.append(x) for x in tmp_list]
                KeepGoing = False

        # Remove all the duplicated labeled that intersect
        # in this case the labeled are merged. It will be nice to run an extra
        # segmentation on the merged objects
        # If it is too slow this step can be parallelised

        SavedLab_list = list()
        CleanedDict = total_data_dict.copy()
        for couple in SavedCombinations:
            SaveLab, RemoveLabs, NewCoords = object_based_segmentation.IntersectionCouples(
                couple, total_data_dict)
            SavedLab_list.append(SaveLab)
            for lab in RemoveLabs:
                del CleanedDict[lab]
            CleanedDict[SaveLab] = NewCoords
    else:
        CleanedDict = total_data_dict

    # Calculate all objects properties
    all_objects_list = [(key, coords) for key, coords in CleanedDict.items()]

    futures_processes = client.map(
        object_based_segmentation.obj_properties_calculator, all_objects_list)

    all_objects_properties_list = client.gather(futures_processes)

    # convert the list to a dictionary
    all_objects_properties_dict = {
        k: v
        for d in all_objects_properties_list for k, v in d.items()
    }

    # Save all the objects
    segmented_objs_fname = processing_directory + 'segmented_' + segmentation_staining + '_all_objs_properties.pkl'
    pickle.dump(all_objects_properties_dict, open(segmented_objs_fname, 'wb'))
def dots_coords_correction():
    """
    This script is used to collect all the raw countings from the different
    hybridization, correct the coords according to the registration of the 
    reference gene and remove the dots that overlap in the overlapping
    regions between the images. Save the aggregate coords and also the coords after dots processing

    Input via argparse

    Parameters:
    -----------

    path: string. 
        Exact path to the experiment folder
    pxl: int 
        Radius of pixel used to create the neighbourhood (nhood) used to define 
        when two dots are the same
    
    """

    # Inputs of the function
    parser = argparse.ArgumentParser(description='Dots coords consolidation \
                                    and correction')

    parser.add_argument('-path', help='path to the experiment folder')
    parser.add_argument('-pixel_radius', help='adius of pixel used to create the nhood \
                            that is used to define that two pixels are the same', 
                            type=int)
    parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003')
    
    args = parser.parse_args()

    # retrieve the parameters
    processing_experiment_directory = args.path
    pxl = args.pixel_radius

    # Dask scheduler address
    scheduler_address = args.scheduler

    if scheduler_address:
        # Start dask client on server or cluster
        client=Client(scheduler_address)

    else:
        # Start dask client on local machine. It will use all the availabe
        # cores -1

        # number of core to use
        ncores = multiprocessing.cpu_count()-1
        cluster = LocalCluster(n_workers=ncores)
        client=Client(cluster)

    # Determine the operating system running the code
    os_windows, add_slash = utils.determine_os()

    # Check training slash in the experiment directory
    processing_experiment_directory=utils.check_trailing_slash(processing_experiment_directory,os_windows)

    stitched_reference_files_dir = processing_experiment_directory + 'stitched_reference_files'

    # Check training slash in the stitched reference directory
    stitched_reference_files_dir=utils.check_trailing_slash(stitched_reference_files_dir,os_windows)

    # Collect the infos of the experiment and the processing
    # Parse the Experimental metadata file (serial)
    experiment_infos,image_properties, hybridizations_infos, \
    converted_positions, microscope_parameters =\
    utils.experimental_metadata_parser(processing_experiment_directory)

    # Parse the configuration file 
    flt_rawcnt_config = utils.filtering_raw_counting_config_parser(processing_experiment_directory)


    # get the reference gene
    reference_gene = flt_rawcnt_config['reference_gene']

    # get the overlapping percentage and image_size
    overlapping_percentage = image_properties['Overlapping_percentage']

    # Consider a square image
    image_size = image_properties['HybImageSize']['columns']

    # Combine all counts
    all_raw_counts = dots_coords_calculations.combine_raw_counting_results(flt_rawcnt_config,
                                    hybridizations_infos,experiment_infos,
                                    processing_experiment_directory,stitched_reference_files_dir,
                                    reference_gene,add_slash)

    # Create a dictionary with only the selected peaks coords after alignment
    aligned_peaks_dict = all_raw_counts['selected_peaks_coords_aligned']

    # Create list of tuples to process each hybridization/gene on a different core
    combinations = dots_coords_calculations.processing_combinations(list(hybridizations_infos.keys()),aligned_peaks_dict)

    # Add corresponding registration_data and the corresponding coords files to the
    # tuple is order to recduce the size of the info transferred in the newtwork
    added_combinations =list()
    for idx,combination in enumerate(combinations):
        hybridization = combination[0]
        gene = combination[1]
        reg_data_combination = all_raw_counts['registration_data'][hybridization]
        aligned_peaks_dict_gene = all_raw_counts['selected_peaks_coords_aligned'][hybridization][gene]
        combination_dict = {
                'hybridization':hybridization,
                'gene':gene,
                'reg_data_combination':reg_data_combination,
                'aligned_peaks_dict_gene': aligned_peaks_dict_gene
        }
        added_combinations.append(combination_dict)

        # Process each gene in parallel
        futures_processes = client.map(dots_coords_calculations.function_to_run_dots_removal_parallel,added_combinations,
                            overlapping_percentage = overlapping_percentage,
                            image_size = image_size,pxl = pxl)

        cleaned_dots_list = client.gather(futures_processes)

    # Convert the list of dictionaries in one single dictionary
    # The saved dictionary cotains all the dots, the reference to the tile pos
    # has been removed during the overlapping dots removal step

    all_countings = dict()
    all_countings['all_coords_cleaned'] = dict()
    all_countings['all_coords'] = dict()
    all_countings['removed_coords'] = dict()

    for el in cleaned_dots_list:
        hybridization = list(el.keys())[0]
        gene = list(el[hybridization].keys())[0]
        
        renamed_gene = gene + '_' + hybridization
        
        all_countings['all_coords_cleaned'][renamed_gene] = el[hybridization][gene]['all_coords_cleaned']
        all_countings['all_coords'][renamed_gene] = el[hybridization][gene]['all_coords']
        all_countings['removed_coords'][renamed_gene] = el[hybridization][gene]['removed_coords']

    # Save all the data
    counting_data_name = processing_experiment_directory +experiment_infos['ExperimentName']+'_all_cleaned_raw_counting_data.pkl'
    pickle.dump(all_countings,open(counting_data_name,'wb'))

    client.close()
示例#7
0
def apply_stitching():

    """
    Script to apply the registration to all the osmFISH channels. It will create
    a stitched image in an hdf5 file

    All the parameters are entered via argparse

    Parameters:
    -----------

    experiment_path: string
        Path to the folder with the hybridizations
    reference_files_path: string
        Path to the folder with the _reg_data.pkl files
    scheduler: string
        tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). 
        default = False. If False the process will run on the local computer using nCPUs-1

    """

    parser = argparse.ArgumentParser(description='Create the stitched images \
                                    after registration')

    parser.add_argument('-experiment_path', help='path to the folder with the hybridizations')
    parser.add_argument('-reference_files_path', help='path to the folder with the \
                        _reg_data.pkl files')
    parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003')
    args = parser.parse_args()

    processing_experiment_directory = args.experiment_path
    stitched_reference_files_dir = args.reference_files_path
    # Dask scheduler address
    scheduler_address = args.scheduler

    if scheduler_address:
        # Start dask client on server or cluster
        client=Client(scheduler_address)

    else:
        # Start dask client on local machine. It will use all the availabe
        # cores -1

        # number of core to use
        ncores = multiprocessing.cpu_count()-1
        cluster = LocalCluster(n_workers=ncores)
        client=Client(cluster)


    # Determine the operating system running the code
    os_windows, add_slash = utils.determine_os()

    # Check training slash in the processing directory
    processing_experiment_directory=utils.check_trailing_slash(processing_experiment_directory,os_windows)

    stitched_reference_files_dir=utils.check_trailing_slash(stitched_reference_files_dir,os_windows)

    # Starting logger
    utils.init_file_logger(processing_experiment_directory)
    logger = logging.getLogger()


    # Collect the infos of the experiment and the processing
    # Parse the Experimental metadata file (serial)
    experiment_infos,image_properties, hybridizations_infos, \
    converted_positions, microscope_parameters =\
    utils.experimental_metadata_parser(processing_experiment_directory)

    # Parse the configuration file 
    flt_rawcnt_config = utils.filtering_raw_counting_config_parser(processing_experiment_directory)

    # Get the reference gene used
    reference_gene = flt_rawcnt_config['reference_gene']

    # Stitch the image in 2D or 3D (3D need more work/testing)
    nr_dim = flt_rawcnt_config['nr_dim']

    # Determine the hybridizations to process
    if isinstance(flt_rawcnt_config['hybs_to_stitch'],list):
        hybridizations_to_process = flt_rawcnt_config['hybs_to_stitch']
    else:
        if flt_rawcnt_config['hybs_to_stitch'] == 'All':
                hybridizations_to_process = list(hybridizations_infos.keys())
        
        else:
            raise ValueError('Error in the hybridizations to stitch')
    


    for hybridization in hybridizations_to_process:
        
        # Determine the genes to stitch in the processing hybridization
        genes_processing = list(hybridizations_infos[hybridization].keys())


        hyb_short = re.sub('Hybridization','hyb',hybridization)
        processing_hyb = experiment_infos['ExperimentName']+'_'+hyb_short
        hyb_dir = processing_experiment_directory+processing_hyb+add_slash

        # Create pointer of the hdf5 file that will store the stitched images
        # for the current hybridization

        tile_file_base_name = flt_rawcnt_config['analysis_name']+'_'+experiment_infos['ExperimentName']+'_'+hyb_short
        stitching_file_name = tile_file_base_name + '.reg.sf.hdf5'

        data_name = (tile_file_base_name
                            + '_' + reference_gene
                            + '_stitching_data_reg')

        stitching_file= h5py.File(stitched_reference_files_dir+stitching_file_name,'w',libver='latest')  # replace with 'a' as soon as you fix the error

        # Determine the tiles organization
        joining, tiles, nr_pixels, z_count, micData = stitching.get_place_tile_input_apply_npy(hyb_dir,stitched_reference_files_dir,data_name,image_properties,nr_dim)

        for gene in genes_processing:
        
            # Create the hdf5 file structure
            stitched_group, linear_blending, blend =  hdf5preparation.create_structures_hdf5_stitched_ref_gene_file_npy(stitching_file, joining, nr_pixels,
                                            gene, blend = 'non linear')

            # Fill the hdf5 containing the stitched image with empty data and
            # create the blending mask
            stitched_group['final_image'][:]= np.zeros(joining['final_image_shape'],dtype=np.uint16)
            if blend is not None:
                # make mask
                stitched_group['blending_mask'][:] = np.zeros(joining['final_image_shape'][-2:],dtype=np.uint16)
                tilejoining.make_mask(joining, nr_pixels, stitched_group['blending_mask'])

            filtered_img_gene_dirs_path = hyb_dir+flt_rawcnt_config['analysis_name']+'_'+processing_hyb +'_filtered_npy'+add_slash
            filtered_img_gene_dirs = glob.glob(filtered_img_gene_dirs_path+'*')

            # Create the subdirectory used to save the blended tiles
            suffix = 'blended_tiles'
            blended_tiles_directory = utils.create_single_directory(hyb_dir,gene, hybridization,processing_hyb,suffix,add_slash,
                                            analysis_name=flt_rawcnt_config['analysis_name'])

            # Get the directory with the filtered npy images of the reference_gene to use for stitching
            stitching_files_dir = [npy_dir for npy_dir in filtered_img_gene_dirs if gene in npy_dir][0]
            stitching_files_dir= stitching_files_dir+add_slash

            # Create the tmp directory where to save the masks
            suffix = 'masks'
            masked_tiles_directory = utils.create_single_directory(hyb_dir,gene,hybridization,processing_hyb,suffix,add_slash,
                                            analysis_name=flt_rawcnt_config['analysis_name'])

            # Create and save the mask files
            for corn_value,corner_coords in joining['corner_list']:
                if not(np.isnan(corner_coords[0])):
                    cur_mask = stitched_group['blending_mask'][int(corner_coords[0]):int(corner_coords[0]) + int(nr_pixels),
                                        int(corner_coords[1]):int(corner_coords[1]) + int(nr_pixels)]

                    fname = masked_tiles_directory + flt_rawcnt_config['analysis_name'] +'_'+processing_hyb+'_'+gene+'_masks_joining_pos_'+str(corn_value)
                    np.save(fname,cur_mask)


            # Blend all the tiles and save them in a directory
            futures_processes = client.map(tilejoining.generate_blended_tile_npy,joining['corner_list'],
                                        stitching_files_dir = stitching_files_dir,
                                        blended_tiles_directory = blended_tiles_directory,
                                        masked_tiles_directory = masked_tiles_directory,
                                        analysis_name = flt_rawcnt_config['analysis_name'],
                                        processing_hyb = processing_hyb,reference_gene = gene,
                                        micData = micData,tiles = tiles,nr_pixels=nr_pixels,
                                        linear_blending=linear_blending)



            _ = client.gather(futures_processes)


            # Write the stitched image
            tilejoining.make_final_image_npy(joining, stitching_file, blended_tiles_directory, tiles,gene, nr_pixels)
            stitching_file.flush()

            # Remove directories with blended tiles and masks
            shutil.rmtree(blended_tiles_directory)
            shutil.rmtree(masked_tiles_directory)


        stitching_file.close()

    
    client.close()
示例#8
0
def run_stitching_reference_only():
    """
    This script will stitch the filtered data of the hybridizations
    in experiment directory. The inputs parameters are passed using arparse

    Parameters:
    -----------
    
    scheduler: string
        tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). 
        default = False. If False the process will run on the local computer using nCPUs-1

    path: string
        Path to the experiment directory

    """

    # Inputs of the function
    parser = argparse.ArgumentParser(description='Preprocessing script')
    parser.add_argument(
        '-scheduler',
        default=False,
        help='dask scheduler address ex. tcp://192.168.0.4:7003')
    parser.add_argument('-path', help='processing directory')
    args = parser.parse_args()

    # Directory to process
    processing_directory = args.path
    # Dask scheduler address
    scheduler_address = args.scheduler

    if scheduler_address:
        # Start dask client on server or cluster
        client = Client(scheduler_address)

    else:
        # Start dask client on local machine. It will use all the availabe
        # cores -1

        # number of core to use
        ncores = multiprocessing.cpu_count() - 1
        cluster = LocalCluster(n_workers=ncores)
        client = Client(cluster)

    # Subdirectories of the processing_directory that need to be skipped for the
    # analysis
    blocked_directories = ['_logs']

    # Starting logger
    # utils.init_file_logger(processing_directory)
    # logger = logging.getLogger()

    # Determine the operating system running the code
    os_windows, add_slash = utils.determine_os()

    # Check training slash in the processing directory
    processing_directory = utils.check_trailing_slash(processing_directory,
                                                      os_windows)

    # Get a list of the hybridization to process
    processing_hyb_list = next(os.walk(processing_directory))[1]

    # Remove the blocked directories from the directories to process
    processing_hyb_list = [
        el for el in processing_hyb_list if el not in blocked_directories
    ]

    for processing_hyb in processing_hyb_list:

        # Determine the hyb number from the name
        hybridization_number = processing_hyb.split('_hyb')[-1]
        hybridization = 'Hybridization' + hybridization_number
        hyb_dir = processing_directory + processing_hyb + add_slash

        # Parse the Experimental metadata file (serial)
        experiment_infos,image_properties, hybridizations_infos, \
        converted_positions, microscope_parameters =\
        utils.experimental_metadata_parser(hyb_dir)

        # Parse the configuration file
        flt_rawcnt_config = utils.filtering_raw_counting_config_parser(hyb_dir)

        # ----------------- STITCHING ------------------------

        # Determine the directory of the filtered images

        suffix = 'filtered_npy'
        analysis_name = flt_rawcnt_config['analysis_name']
        sufx_dir_path = hyb_dir + analysis_name + '_' + processing_hyb + '_' + suffix + add_slash

        # Reference gene for stitching
        reference_gene = flt_rawcnt_config['reference_gene']

        filtered_gene_dir = sufx_dir_path + analysis_name + '_' + processing_hyb + '_' + reference_gene + '_' + suffix + add_slash

        # Load the stitching parameters from the .yaml file

        # Stitch the image in 2D or 3D (3D need more work/testing)
        nr_dim = flt_rawcnt_config['nr_dim']

        # Estimated overlapping between images according to the Nikon software
        est_overlap = np.float_(image_properties['Overlapping_percentage'])

        # Number of peaks to use for the alignment
        nr_peaks = flt_rawcnt_config['nr_peaks']

        # Determine if the coords need to be flipped

        y_flip = flt_rawcnt_config['y_flip']

        # Method to use for blending
        # can be 'linear' or 'non linear'
        # The methods that performs the best is the 'non linear'

        blend = flt_rawcnt_config['blend']

        pixel_size = image_properties['PixelSize']

        # Get the list of the filtered files of the reference gene
        filtered_files_list = glob.glob(filtered_gene_dir + '*.npy')

        # Create pointer of the hdf5 file that will store the stitched reference image
        # for the current hybridization
        # Writing
        tile_file_base_name = flt_rawcnt_config[
            'analysis_name'] + '_' + processing_hyb
        data_name = (tile_file_base_name + '_' + reference_gene +
                     '_stitching_data')

        stitching_file_name = tile_file_base_name + '.sf.hdf5'
        stitching_file = h5py.File(
            hyb_dir + stitching_file_name, 'w',
            libver='latest')  # replace with 'a' as soon as you fix the error

        # Determine the tiles organization
        tiles, contig_tuples, nr_pixels, z_count, micData = stitching.get_pairwise_input_npy(
            image_properties,
            converted_positions,
            hybridization,
            est_overlap,
            y_flip=False,
            nr_dim=2)

        # Align the tiles
        futures_processes = client.map(pairwisesingle.align_single_pair_npy,
                                       contig_tuples,
                                       filtered_files_list=filtered_files_list,
                                       micData=micData,
                                       nr_peaks=nr_peaks)

        # Gather the futures
        data = client.gather(futures_processes)

        # In this case the order of the returned contingency tuples is with
        # the order of the input contig_tuples

        # P_all = [el for data_single in data for el in data_single[0]]
        P_all = [data_single[0] for data_single in data]
        P_all = np.array(P_all)
        P_all = P_all.flat[:]
        covs_all = [data_single[1] for data_single in data]
        alignment = {'P': P_all, 'covs': covs_all}

        # Calculates a shift in global coordinates for each tile (global
        # alignment) and then applies these shifts to the  corner coordinates
        # of each tile and returns and saves these shifted corner coordinates.
        joining = stitching.get_place_tile_input(hyb_dir,
                                                 tiles,
                                                 contig_tuples,
                                                 micData,
                                                 nr_pixels,
                                                 z_count,
                                                 alignment,
                                                 data_name,
                                                 nr_dim=nr_dim)

        # Create the hdf5 file structure
        stitched_group, linear_blending, blend = hdf5preparation.create_structures_hdf5_stitched_ref_gene_file_npy(
            stitching_file,
            joining,
            nr_pixels,
            reference_gene,
            blend='non linear')

        # Fill the hdf5 containing the stitched image with empty data and
        # create the blending mask
        stitched_group['final_image'][:] = np.zeros(
            joining['final_image_shape'], dtype=np.float64)
        if blend is not None:
            # make mask
            stitched_group['blending_mask'][:] = np.zeros(
                joining['final_image_shape'][-2:], dtype=np.float64)
            tilejoining.make_mask(joining, nr_pixels,
                                  stitched_group['blending_mask'])

        # Create the subdirectory used to save the blended tiles
        suffix = 'blended_tiles'
        blended_tiles_directory = utils.create_single_directory(
            hyb_dir,
            reference_gene,
            hybridization,
            processing_hyb,
            suffix,
            add_slash,
            analysis_name=flt_rawcnt_config['analysis_name'])

        # Create the tmp directory where to save the masks
        suffix = 'masks'
        masked_tiles_directory = utils.create_single_directory(
            hyb_dir,
            reference_gene,
            hybridization,
            processing_hyb,
            suffix,
            add_slash,
            analysis_name=flt_rawcnt_config['analysis_name'])

        # Create and save the mask files
        for corn_value, corner_coords in joining['corner_list']:
            if not (np.isnan(corner_coords[0])):
                cur_mask = stitched_group['blending_mask'][
                    int(corner_coords[0]):int(corner_coords[0]) +
                    int(nr_pixels),
                    int(corner_coords[1]):int(corner_coords[1]) +
                    int(nr_pixels)]

                fname = masked_tiles_directory + flt_rawcnt_config[
                    'analysis_name'] + '_' + processing_hyb + '_' + reference_gene + '_masks_joining_pos_' + str(
                        corn_value)
                np.save(fname, cur_mask)

        # Blend all the tiles and save them in a directory
        futures_processes = client.map(
            tilejoining.generate_blended_tile_npy,
            joining['corner_list'],
            stitching_files_dir=filtered_gene_dir,
            blended_tiles_directory=blended_tiles_directory,
            masked_tiles_directory=masked_tiles_directory,
            analysis_name=flt_rawcnt_config['analysis_name'],
            processing_hyb=processing_hyb,
            reference_gene=reference_gene,
            micData=micData,
            tiles=tiles,
            nr_pixels=nr_pixels,
            linear_blending=linear_blending)

        _ = client.gather(futures_processes)

        # Write the stitched image
        tilejoining.make_final_image_npy(joining, stitching_file,
                                         blended_tiles_directory, tiles,
                                         reference_gene, nr_pixels)

        # close the hdf5 file
        stitching_file.close()

        # Delete the directories with blended tiles and masks
        shutil.rmtree(blended_tiles_directory)
        shutil.rmtree(masked_tiles_directory)

    client.close()
def process_standalone_experiment():
    """
    Script to run conversion, filtering and raw counting on a small set of images.
    The analysis run locally

    All the parameters are entered with argparse

    Parameters:
    -----------

    path: string
        Path to the experiment to process
    analysis_name: string
        Name of the analysis
    stringency: int
        Value of the stringency to use in the threshold selection. Default=0
    min_distance: int
        Min distance betwenn to peaks. Default=5
    min_plane: int
        Min plane for z-stack cropping. Default=None
    max_plane: int:
        Max plane for z-stack cropping. Default=None
    ncores: int
        Number of cores to use for the processing. Deafault=1


    """

    # input to the function
    parser = argparse.ArgumentParser(
        description='Counting and filtering experiment')
    parser.add_argument('-path', help='path to experiment to analyze')
    parser.add_argument('-analysis_name', help='analysis name')
    parser.add_argument('-stringency', help='stringency', default=0, type=int)
    parser.add_argument('-min_distance',
                        help='min distance between peaks',
                        default=5,
                        type=int)
    parser.add_argument('-min_plane',
                        help='starting plane to consider',
                        default=None,
                        type=int)
    parser.add_argument('-max_plane',
                        help='ending plane to consider',
                        default=None,
                        type=int)
    parser.add_argument('-ncores',
                        help='number of cores to use',
                        default=1,
                        type=int)

    # Parse the input args
    args = parser.parse_args()
    processing_directory = args.path
    analysis_name = args.analysis_name
    stringency = args.stringency
    min_distance = args.min_distance
    min_plane = args.min_plane
    max_plane = args.max_plane
    ncores = args.ncores

    if min_plane != None and max_plane != None:
        plane_keep = [min_plane, max_plane]
    else:
        plane_keep = None

    # Determine the os type
    os_windows, add_slash = utils.determine_os()

    # Starting logger
    utils.init_file_logger(processing_directory)
    logger = logging.getLogger()

    logger.debug('min_plane%s', min_plane)
    logger.debug('max_plane %s', max_plane)
    logger.debug('keep_planes value %s', plane_keep)

    # Start the distributed client
    client = Client(n_workers=ncores, threads_per_worker=1)

    logger.debug('client %s', client)
    logger.debug('check that workers are on the same directory %s',
                 client.run(os.getcwd))

    # Check trail slash
    processing_directory = utils.check_trailing_slash(processing_directory,
                                                      os_windows)

    # Determine the experiment name
    exp_name = processing_directory.split(add_slash)[-2]

    logger.debug('Experiment name: %s', exp_name)

    # Create the directories where to save the output
    tmp_dir_path = processing_directory + analysis_name + '_' + exp_name + '_tmp' + add_slash
    filtered_dir_path = processing_directory + analysis_name + '_' + exp_name + '_filtered' + add_slash
    counting_dir_path = processing_directory + analysis_name + '_' + exp_name + '_counting_pkl' + add_slash
    try:
        os.stat(tmp_dir_path)
    except:
        os.mkdir(tmp_dir_path)
        os.chmod(tmp_dir_path, 0o777)

    try:
        os.stat(filtered_dir_path)
    except:
        os.mkdir(filtered_dir_path)
        os.chmod(filtered_dir_path, 0o777)

    try:
        os.stat(counting_dir_path)
    except:
        os.mkdir(counting_dir_path)
        os.chmod(counting_dir_path, 0o777)

    # Get the list of the nd2 files to process inside the directory
    files_list = glob.glob(processing_directory + '*.nd2')
    logger.debug('files to process %s', files_list)

    # Convert the .nd2 data
    for raw_data_gene_fname in files_list:
        fname = raw_data_gene_fname.split(add_slash)[-1][:-4]
        logger.debug('fname %s', fname)
        with nd2.Nd2(raw_data_gene_fname) as nd2file:
            for channel in nd2file.channels:
                for fov in nd2file.fields_of_view:
                    img_stack = np.empty(
                        [len(nd2file.z_levels), nd2file.height, nd2file.width],
                        dtype='uint16')
                    images = nd2file.select(channels=channel,
                                            fields_of_view=fov,
                                            z_levels=nd2file.z_levels)
                    for idx, im in enumerate(images):
                        img_stack[idx, :, :] = im

                    converted_fname = tmp_dir_path + exp_name + '_' + fname + '_' + channel + '_fov_' + str(
                        fov) + '.npy'
                    np.save(converted_fname, img_stack, allow_pickle=False)

    logger.debug('Finished .nd2 file conversion')

    # Filtering all the data
    # Get list of the files to process
    flist_img_to_filter = glob.glob(tmp_dir_path + '*.npy')

    # logger.debug('files to filter %s',flist_img_to_filter)
    # Parallel process all the data
    futures_processes=client.map(filtering_and_counting_experiment,flist_img_to_filter, \
                                  filtered_dir_path=filtered_dir_path, \
                                 counting_dir_path=counting_dir_path, \
                                 exp_name=exp_name,plane_keep=plane_keep,add_slash=add_slash, \
                                 min_distance=min_distance, stringency=stringency)

    client.gather(futures_processes)
    client.close()

    logger.debug('Finished filtering and counting')

    # delete the tmp folders
    shutil.rmtree(tmp_dir_path)