def preprocessing_script(): """ This script will process all the hybridization folders combined in a processing folder. The input parameters are passed using arparse Parameters: ----------- scheduler: string tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). default = False. If False the process will run on the local computer using nCPUs-1 path: string Path to the processing directory """ # Inputs of the function parser = argparse.ArgumentParser(description='Preprocessing script') parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003') parser.add_argument('-path', help='processing directory') args = parser.parse_args() # Directory to process processing_directory = args.path # Dask scheduler address scheduler_address = args.scheduler if scheduler_address: # Start dask client on server or cluster client=Client(scheduler_address) else: # Start dask client on local machine. It will use all the availabe # cores -1 # number of core to use ncores = multiprocessing.cpu_count()-1 cluster = LocalCluster(n_workers=ncores) client=Client(cluster) # Subdirectories of the processing_directory that need to be skipped for the # analysis blocked_directories = ['_logs'] # Starting logger utils.init_file_logger(processing_directory) logger = logging.getLogger() # Determine the operating system running the code os_windows, add_slash = utils.determine_os() # Check training slash in the processing directory processing_directory=utils.check_trailing_slash(processing_directory,os_windows) # Get a list of the hybridization to process processing_hyb_list = next(os.walk(processing_directory))[1] # Remove the blocked directories from the directories to process processing_hyb_list = [el for el in processing_hyb_list if el not in blocked_directories ] for processing_hyb in processing_hyb_list: # Determine the hyb number from the name hybridization_number = processing_hyb.split('_hyb')[-1] hybridization = 'Hybridization' + hybridization_number hyb_dir = processing_directory + processing_hyb + add_slash # Parse the Experimental metadata file (serial) experiment_infos,image_properties, hybridizations_infos, \ converted_positions, microscope_parameters =\ utils.experimental_metadata_parser(hyb_dir) # Parse the configuration file flt_rawcnt_config = utils.filtering_raw_counting_config_parser(hyb_dir) # ----------------- .nd2 FILE CONVERSION ------------------------------ # Create the temporary subdirectory tree (serial) tmp_dir_path, tmp_gene_dirs=utils.create_subdirectory_tree(hyb_dir,\ hybridization,hybridizations_infos,processing_hyb,suffix='tmp',add_slash=add_slash) # Get the list of the nd2 files to process inside the directory files_list = glob.glob(hyb_dir+processing_hyb+'_raw_data'+add_slash+'*.nd2') # Get the list of genes that are analyzed in the current hybridization gene_list = list(hybridizations_infos[hybridization].keys()) # Organize the file to process in a list which order match the gene_list for # parallel processing organized_files_list = [f for gene in gene_list for f in files_list if gene+'.nd2' in f ] organized_tmp_dir_list = [f for gene in gene_list for f in tmp_gene_dirs if gene in f ] # Each .nd2 file will be processed in a worker part of a different node # Get the addresses of one process/node to use for conversion node_addresses = utils.identify_nodes(client) workers_conversion = [list(el.items())[0][1] for key,el in node_addresses.items()] # Run the conversion futures_processes=client.map(io.nd2_to_npy,gene_list,organized_files_list, tmp_gene_dirs,processing_hyb=processing_hyb, use_ram=flt_rawcnt_config['use_ram'], max_ram=flt_rawcnt_config['max_ram'], workers=workers_conversion) client.gather(futures_processes) # --------------------------------------------------------------------- # ----------------- FILTERING AND RAW COUNTING ------------------------ # Create directories # Create the directory where to save the filtered images suffix = 'filtered_png' filtered_png_img_dir_path, filtered_png_img_gene_dirs = \ utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos, processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name']) suffix = 'filtered_npy' filtered_img_dir_path, filtered_img_gene_dirs = \ utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos, processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name']) # Create the directory where to save the counting suffix = 'counting' counting_dir_path, counting_gene_dirs = \ utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb, suffix,add_slash,flt_rawcnt_config['skip_tags_counting'], flt_rawcnt_config['skip_genes_counting'], analysis_name=flt_rawcnt_config['analysis_name']) if flt_rawcnt_config['illumination_correction']: # Create the directory where to save the counting suffix = 'illumination_funcs' illumination_func_dir_path, illumination_func_gene_dirs = \ utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb, suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name']) # Loop through channels and calculate illumination for gene in hybridizations_infos[hybridization].keys(): flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy') logger.debug('Create average image for gene %s', gene) # Chunking the image list num_chunks = sum(list(client.ncores().values())) chunked_list = utils.list_chunking(flist_img_to_filter,num_chunks) # Scatter the images sublists to process in parallel futures = client.scatter(chunked_list) # Create dask processing graph output = [] for future in futures: ImgMean = delayed(utils.partial_image_mean)(future) output.append(ImgMean) ImgMean_all = delayed(sum)(output) ImgMean_all = ImgMean_all/float(len(futures)) # Compute the graph ImgMean = ImgMean_all.compute() logger.debug('Create illumination function for gene %s',gene) # Create illumination function Illumination=filters.gaussian(ImgMean,sigma=(20,300,300)) # Normalization of the illumination Illumination_flat=np.amax(Illumination,axis=0) Illumination_norm=Illumination_flat/np.amax(Illumination_flat) logger.debug('Save illumination function for gene %s',gene) # Save the illumination function illumination_path = [ill_path for ill_path in illumination_func_gene_dirs if gene in ill_path][0] illumination_fname=illumination_path+gene+'_illumination_func.npy' np.save(illumination_fname,Illumination_norm,allow_pickle=False) # Broadcast the illumination function to all the cores client.scatter(Illumination_norm, broadcast=True) logger.debug('Filtering %s',gene) # Filtering and counting futures_processes=client.map(counting.filtering_and_counting_ill_correction,flist_img_to_filter, \ illumination_function=Illumination_norm,\ filtered_png_img_gene_dirs=filtered_png_img_gene_dirs,\ filtered_img_gene_dirs =filtered_img_gene_dirs,\ counting_gene_dirs=counting_gene_dirs,plane_keep=flt_rawcnt_config['plane_keep'], \ min_distance=flt_rawcnt_config['min_distance'], stringency=flt_rawcnt_config['stringency'],\ skip_genes_counting=flt_rawcnt_config['skip_genes_counting'],skip_tags_counting=flt_rawcnt_config['skip_tags_counting']) client.gather(futures_processes) else: for gene in hybridizations_infos[hybridization].keys(): flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy') # filtering logger.debug('Filtering without illumination correction %s',gene) futures_processes=client.map(counting.filtering_and_counting,flist_img_to_filter, \ filtered_png_img_gene_dirs=filtered_png_img_gene_dirs, \ filtered_img_gene_dirs=filtered_img_gene_dirs, \ counting_gene_dirs=counting_gene_dirs, \ plane_keep=flt_rawcnt_config['plane_keep'], min_distance=flt_rawcnt_config['min_distance'],\ stringency=flt_rawcnt_config['stringency'],\ skip_genes_counting=flt_rawcnt_config['skip_genes_counting'],skip_tags_counting=flt_rawcnt_config['skip_tags_counting']) client.gather(futures_processes) # --------------------------------------------------------------------- # # ----------------- COMBINE THE FILTERED DATA IN .ppf.hdf5 ------------------------ # # Combine the filter data in one single .ppf for each hybridization # # This step will run in serial mode and will not need to shuffle data # # between cores because everything is on the common file system # logger.debug('Create .ppf.hdf5 file') # # Create the ppf.hdf5 file that contains the filtered data in uint16 # preprocessing_file_path = hdf5_utils.hdf5_create_preprocessing_file(hybridizations_infos,processing_hyb, # hybridization,flt_rawcnt_config['analysis_name'], hyb_dir,converted_positions,image_properties) # logger.debug('Write the .npy filtered files into the .ppf file') # # Load and write the .npy tmp images into the hdf5 file # # open the hdf5 file # with h5py.File(preprocessing_file_path) as f_hdl: # # Loop through each gene # for gene in hybridizations_infos[hybridization].keys(): # logger.debug('Writing %s images in .ppf.hdf5',gene) # # list of the files to transfer # filtered_gene_dir = [fdir for fdir in filtered_img_gene_dirs if gene in fdir][0] # filtered_files_list = glob.glob(filtered_gene_dir+'*.npy') # # loop through the list of file # for f_file in filtered_files_list: # pos = f_file.split('/')[-1].split('_')[-1].split('.')[0] # f_hdl[gene]['FilteredData'][pos][:] =np.load(f_file) # f_hdl.flush() # # --------------------------------------------------------------------- # # ----------------- STITCHING ------------------------ # # Load the stitching parameters from the .yaml file # # Stitch the image in 2D or 3D (3D need more work/testing) # nr_dim = flt_rawcnt_config['nr_dim'] # # Estimated overlapping between images according to the Nikon software # est_overlap = image_properties['Overlapping_percentage'] # # Number of peaks to use for the alignment # nr_peaks = flt_rawcnt_config['nr_peaks'] # # Determine if the coords need to be flipped # y_flip = flt_rawcnt_config['y_flip'] # # Method to use for blending # # can be 'linear' or 'non linear' # # The methods that performs the best is the 'non linear' # blend = flt_rawcnt_config['blend'] # # Reference gene for stitching # reference_gene = flt_rawcnt_config['reference_gene'] # pixel_size = image_properties['PixelSize'] # # Get the list of the filtered files of the reference gene # filtered_gene_dir = [gene_dir for gene_dir in filtered_img_gene_dirs if reference_gene in gene_dir][0] # filtered_files_list = glob.glob(filtered_gene_dir+'*.npy') # # Create pointer of the hdf5 file that will store the stitched reference image # # for the current hybridization # # Writing # tile_file_base_name = flt_rawcnt_config['analysis_name']+'_'+ processing_hyb # data_name = (tile_file_base_name # + '_' + reference_gene # + '_stitching_data') # stitching_file_name = tile_file_base_name + '.sf.hdf5' # stitching_file= h5py.File(hyb_dir+stitching_file_name,'w',libver='latest') # replace with 'a' as soon as you fix the error # # Determine the tiles organization # tiles, contig_tuples, nr_pixels, z_count, micData = stitching.get_pairwise_input_npy(image_properties,converted_positions, hybridization, # est_overlap = est_overlap, y_flip = False, nr_dim = 2) # # Align the tiles # futures_processes=client.map(pairwisesingle.align_single_pair_npy,contig_tuples, # filtered_files_list=filtered_files_list,micData=micData, # nr_peaks=nr_peaks) # # Gather the futures # data = client.gather(futures_processes) # # In this case the order of the returned contingency tuples is with # # the order of the input contig_tuples # # P_all = [el for data_single in data for el in data_single[0]] # P_all =[data_single[0] for data_single in data ] # P_all = np.array(P_all) # P_all = P_all.flat[:] # covs_all = [data_single[1] for data_single in data] # alignment = {'P': P_all, # 'covs': covs_all} # # Calculates a shift in global coordinates for each tile (global # # alignment) and then applies these shifts to the corner coordinates # # of each tile and returns and saves these shifted corner coordinates. # joining = stitching.get_place_tile_input(hyb_dir, tiles, contig_tuples, # micData, nr_pixels, z_count, # alignment, data_name, # nr_dim=nr_dim) # # Create the hdf5 file structure # stitched_group, linear_blending, blend = hdf5preparation.create_structures_hdf5_stitched_ref_gene_file_npy(stitching_file, joining, nr_pixels, # reference_gene, blend = 'non linear') # # Fill the hdf5 containing the stitched image with empty data and # # create the blending mask # stitched_group['final_image'][:]= np.zeros(joining['final_image_shape'],dtype=np.float64) # if blend is not None: # # make mask # stitched_group['blending_mask'][:] = np.zeros(joining['final_image_shape'][-2:],dtype=np.float64) # tilejoining.make_mask(joining, nr_pixels, stitched_group['blending_mask']) # # Create the subdirectory used to save the blended tiles # suffix = 'blended_tiles' # blended_tiles_directory = utils.create_single_directory(hyb_dir,reference_gene, hybridization,processing_hyb,suffix,add_slash, # analysis_name=flt_rawcnt_config['analysis_name']) # # Get the directory with the filtered npy images of the reference_gene to use for stitching # stitching_files_dir = [npy_dir for npy_dir in filtered_img_gene_dirs if reference_gene in npy_dir][0] # # Create the tmp directory where to save the masks # suffix = 'masks' # masked_tiles_directory = utils.create_single_directory(hyb_dir,reference_gene, hybridization,processing_hyb,suffix,add_slash, # analysis_name=flt_rawcnt_config['analysis_name']) # # Create and save the mask files # for corn_value,corner_coords in joining['corner_list']: # if not(np.isnan(corner_coords[0])): # cur_mask = stitched_group['blending_mask'][int(corner_coords[0]):int(corner_coords[0]) + int(nr_pixels), # int(corner_coords[1]):int(corner_coords[1]) + int(nr_pixels)] # fname = masked_tiles_directory + flt_rawcnt_config['analysis_name'] +'_'+processing_hyb+'_'+reference_gene+'_masks_joining_pos_'+str(corn_value) # np.save(fname,cur_mask) # # Blend all the tiles and save them in a directory # futures_processes = client.map(tilejoining.generate_blended_tile_npy,joining['corner_list'], # stitching_files_dir = stitching_files_dir, # blended_tiles_directory = blended_tiles_directory, # masked_tiles_directory = masked_tiles_directory, # analysis_name = flt_rawcnt_config['analysis_name'], # processing_hyb = processing_hyb,reference_gene = reference_gene, # micData = micData,tiles = tiles,nr_pixels=nr_pixels, # linear_blending=linear_blending) # _ = client.gather(futures_processes) # # Write the stitched image # tilejoining.make_final_image_npy(joining, stitching_file, blended_tiles_directory, tiles,reference_gene, nr_pixels) # # close the hdf5 file # stitching_file.close() # # Delete the directories with blended tiles and masks # shutil.rmtree(blended_tiles_directory) # shutil.rmtree(masked_tiles_directory) # ----------------- DELETE FILES ------------------------ # Don't delete the *.npy files here because can be used to # create the final images using the apply stitching related function client.close()
def apply_stitching(): """ Script to apply the registration to all the osmFISH channels. It will create a stitched image in an hdf5 file All the parameters are entered via argparse Parameters: ----------- experiment_path: string Path to the folder with the hybridizations reference_files_path: string Path to the folder with the _reg_data.pkl files scheduler: string tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). default = False. If False the process will run on the local computer using nCPUs-1 """ parser = argparse.ArgumentParser(description='Create the stitched images \ after registration') parser.add_argument('-experiment_path', help='path to the folder with the hybridizations') parser.add_argument('-reference_files_path', help='path to the folder with the \ _reg_data.pkl files') parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003') args = parser.parse_args() processing_experiment_directory = args.experiment_path stitched_reference_files_dir = args.reference_files_path # Dask scheduler address scheduler_address = args.scheduler if scheduler_address: # Start dask client on server or cluster client=Client(scheduler_address) else: # Start dask client on local machine. It will use all the availabe # cores -1 # number of core to use ncores = multiprocessing.cpu_count()-1 cluster = LocalCluster(n_workers=ncores) client=Client(cluster) # Determine the operating system running the code os_windows, add_slash = utils.determine_os() # Check training slash in the processing directory processing_experiment_directory=utils.check_trailing_slash(processing_experiment_directory,os_windows) stitched_reference_files_dir=utils.check_trailing_slash(stitched_reference_files_dir,os_windows) # Starting logger utils.init_file_logger(processing_experiment_directory) logger = logging.getLogger() # Collect the infos of the experiment and the processing # Parse the Experimental metadata file (serial) experiment_infos,image_properties, hybridizations_infos, \ converted_positions, microscope_parameters =\ utils.experimental_metadata_parser(processing_experiment_directory) # Parse the configuration file flt_rawcnt_config = utils.filtering_raw_counting_config_parser(processing_experiment_directory) # Get the reference gene used reference_gene = flt_rawcnt_config['reference_gene'] # Stitch the image in 2D or 3D (3D need more work/testing) nr_dim = flt_rawcnt_config['nr_dim'] # Determine the hybridizations to process if isinstance(flt_rawcnt_config['hybs_to_stitch'],list): hybridizations_to_process = flt_rawcnt_config['hybs_to_stitch'] else: if flt_rawcnt_config['hybs_to_stitch'] == 'All': hybridizations_to_process = list(hybridizations_infos.keys()) else: raise ValueError('Error in the hybridizations to stitch') for hybridization in hybridizations_to_process: # Determine the genes to stitch in the processing hybridization genes_processing = list(hybridizations_infos[hybridization].keys()) hyb_short = re.sub('Hybridization','hyb',hybridization) processing_hyb = experiment_infos['ExperimentName']+'_'+hyb_short hyb_dir = processing_experiment_directory+processing_hyb+add_slash # Create pointer of the hdf5 file that will store the stitched images # for the current hybridization tile_file_base_name = flt_rawcnt_config['analysis_name']+'_'+experiment_infos['ExperimentName']+'_'+hyb_short stitching_file_name = tile_file_base_name + '.reg.sf.hdf5' data_name = (tile_file_base_name + '_' + reference_gene + '_stitching_data_reg') stitching_file= h5py.File(stitched_reference_files_dir+stitching_file_name,'w',libver='latest') # replace with 'a' as soon as you fix the error # Determine the tiles organization joining, tiles, nr_pixels, z_count, micData = stitching.get_place_tile_input_apply_npy(hyb_dir,stitched_reference_files_dir,data_name,image_properties,nr_dim) for gene in genes_processing: # Create the hdf5 file structure stitched_group, linear_blending, blend = hdf5preparation.create_structures_hdf5_stitched_ref_gene_file_npy(stitching_file, joining, nr_pixels, gene, blend = 'non linear') # Fill the hdf5 containing the stitched image with empty data and # create the blending mask stitched_group['final_image'][:]= np.zeros(joining['final_image_shape'],dtype=np.uint16) if blend is not None: # make mask stitched_group['blending_mask'][:] = np.zeros(joining['final_image_shape'][-2:],dtype=np.uint16) tilejoining.make_mask(joining, nr_pixels, stitched_group['blending_mask']) filtered_img_gene_dirs_path = hyb_dir+flt_rawcnt_config['analysis_name']+'_'+processing_hyb +'_filtered_npy'+add_slash filtered_img_gene_dirs = glob.glob(filtered_img_gene_dirs_path+'*') # Create the subdirectory used to save the blended tiles suffix = 'blended_tiles' blended_tiles_directory = utils.create_single_directory(hyb_dir,gene, hybridization,processing_hyb,suffix,add_slash, analysis_name=flt_rawcnt_config['analysis_name']) # Get the directory with the filtered npy images of the reference_gene to use for stitching stitching_files_dir = [npy_dir for npy_dir in filtered_img_gene_dirs if gene in npy_dir][0] stitching_files_dir= stitching_files_dir+add_slash # Create the tmp directory where to save the masks suffix = 'masks' masked_tiles_directory = utils.create_single_directory(hyb_dir,gene,hybridization,processing_hyb,suffix,add_slash, analysis_name=flt_rawcnt_config['analysis_name']) # Create and save the mask files for corn_value,corner_coords in joining['corner_list']: if not(np.isnan(corner_coords[0])): cur_mask = stitched_group['blending_mask'][int(corner_coords[0]):int(corner_coords[0]) + int(nr_pixels), int(corner_coords[1]):int(corner_coords[1]) + int(nr_pixels)] fname = masked_tiles_directory + flt_rawcnt_config['analysis_name'] +'_'+processing_hyb+'_'+gene+'_masks_joining_pos_'+str(corn_value) np.save(fname,cur_mask) # Blend all the tiles and save them in a directory futures_processes = client.map(tilejoining.generate_blended_tile_npy,joining['corner_list'], stitching_files_dir = stitching_files_dir, blended_tiles_directory = blended_tiles_directory, masked_tiles_directory = masked_tiles_directory, analysis_name = flt_rawcnt_config['analysis_name'], processing_hyb = processing_hyb,reference_gene = gene, micData = micData,tiles = tiles,nr_pixels=nr_pixels, linear_blending=linear_blending) _ = client.gather(futures_processes) # Write the stitched image tilejoining.make_final_image_npy(joining, stitching_file, blended_tiles_directory, tiles,gene, nr_pixels) stitching_file.flush() # Remove directories with blended tiles and masks shutil.rmtree(blended_tiles_directory) shutil.rmtree(masked_tiles_directory) stitching_file.close() client.close()
def filtering_speed(): """ This script will process all the hybridization folders combined in a processing folder. The input parameters are passed using arparse Parameters: ----------- scheduler: string tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). default = False. If False the process will run on the local computer using nCPUs-1 path: string Path to the processing directory """ # Inputs of the function parser = argparse.ArgumentParser(description='Preprocessing script') parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003') parser.add_argument('-path', help='processing directory') args = parser.parse_args() # Directory to process processing_directory = args.path # Dask scheduler address scheduler_address = args.scheduler if scheduler_address: # Start dask client on server or cluster client=Client(scheduler_address) else: # Start dask client on local machine. It will use all the availabe # cores -1 # number of core to use ncores = multiprocessing.cpu_count()-1 cluster = LocalCluster(n_workers=ncores) client=Client(cluster) # Subdirectories of the processing_directory that need to be skipped for the # analysis blocked_directories = ['_logs'] # Starting logger utils.init_file_logger(processing_directory) logger = logging.getLogger() # Determine the operating system running the code os_windows, add_slash = utils.determine_os() # Check training slash in the processing directory processing_directory=utils.check_trailing_slash(processing_directory,os_windows) # Get a list of the hybridization to process processing_hyb_list = next(os.walk(processing_directory))[1] # Remove the blocked directories from the directories to process processing_hyb_list = [el for el in processing_hyb_list if el not in blocked_directories ] for processing_hyb in processing_hyb_list: # Determine the hyb number from the name hybridization_number = processing_hyb.split('_hyb')[-1] hybridization = 'Hybridization' + hybridization_number hyb_dir = processing_directory + processing_hyb + add_slash # Parse the Experimental metadata file (serial) experiment_infos,image_properties, hybridizations_infos, \ converted_positions, microscope_parameters =\ utils.experimental_metadata_parser(hyb_dir) # Parse the configuration file flt_rawcnt_config = utils.filtering_raw_counting_config_parser(hyb_dir) # ----------------- FILTERING AND RAW COUNTING ------------------------ # Create directories # Create the directory where to save the filtered images suffix = 'filtered_png' filtered_png_img_dir_path, filtered_png_img_gene_dirs = \ utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos, processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name']) suffix = 'filtered_npy' filtered_img_dir_path, filtered_img_gene_dirs = \ utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos, processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name']) # Create the directory where to save the counting suffix = 'counting' counting_dir_path, counting_gene_dirs = \ utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb, suffix,add_slash,flt_rawcnt_config['skip_tags_counting'], flt_rawcnt_config['skip_genes_counting'], analysis_name=flt_rawcnt_config['analysis_name']) for gene in hybridizations_infos[hybridization].keys(): flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy') # filtering logger.debug('Filtering without illumination correction %s',gene) futures_processes=client.map(counting.filtering_and_counting,flist_img_to_filter, \ filtered_png_img_gene_dirs=filtered_png_img_gene_dirs, \ filtered_img_gene_dirs=filtered_img_gene_dirs, \ counting_gene_dirs=counting_gene_dirs, \ plane_keep=flt_rawcnt_config['plane_keep'], min_distance=flt_rawcnt_config['min_distance'],\ stringency=flt_rawcnt_config['stringency'],\ skip_genes_counting=flt_rawcnt_config['skip_genes_counting'],skip_tags_counting=flt_rawcnt_config['skip_tags_counting']) client.gather(futures_processes) # ----------------- RAW COUNTING ONLY------------------------ skip_genes_counting=flt_rawcnt_config['skip_genes_counting'] skip_tags_counting=flt_rawcnt_config['skip_tags_counting'] # Create the directory where to save the counting suffix = 'counting' counting_dir_path, counting_gene_dirs = \ utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb, suffix,add_slash,flt_rawcnt_config['skip_tags_counting'], flt_rawcnt_config['skip_genes_counting'], analysis_name=flt_rawcnt_config['analysis_name']) suffix = 'filtered_npy' gene_list = list(hybridizations_infos[hybridization].keys()) analysis_name=flt_rawcnt_config['analysis_name'] sufx_dir_path = hyb_dir+analysis_name+'_'+processing_hyb+'_'+suffix+add_slash for gene in hybridizations_infos[hybridization].keys(): # Filtering image according to gene if gene not in skip_genes_counting or [tag for tag in skip_tags_counting if tag not in gene]: if analysis_name: filtered_images_directory = sufx_dir_path+analysis_name+'_'+processing_hyb+'_'+ gene+'_'+suffix+add_slash else: filtered_images_directory = sufx_dir_path +processing_hyb+'_'+ gene +'_'+suffix+add_slash flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy') # filtering logger.debug('Filtering without illumination correction %s',gene) futures_processes=client.map(counting.counting_only,flist_img_to_filter, \ counting_gene_dirs=counting_gene_dirs, \ min_distance=flt_rawcnt_config['min_distance'],\ stringency=flt_rawcnt_config['stringency']) client.gather(futures_processes) client.close()
def process_standalone_experiment(): """ Script to run conversion, filtering and raw counting on a small set of images. The analysis run locally All the parameters are entered with argparse Parameters: ----------- path: string Path to the experiment to process analysis_name: string Name of the analysis stringency: int Value of the stringency to use in the threshold selection. Default=0 min_distance: int Min distance betwenn to peaks. Default=5 min_plane: int Min plane for z-stack cropping. Default=None max_plane: int: Max plane for z-stack cropping. Default=None ncores: int Number of cores to use for the processing. Deafault=1 """ # input to the function parser = argparse.ArgumentParser( description='Counting and filtering experiment') parser.add_argument('-path', help='path to experiment to analyze') parser.add_argument('-analysis_name', help='analysis name') parser.add_argument('-stringency', help='stringency', default=0, type=int) parser.add_argument('-min_distance', help='min distance between peaks', default=5, type=int) parser.add_argument('-min_plane', help='starting plane to consider', default=None, type=int) parser.add_argument('-max_plane', help='ending plane to consider', default=None, type=int) parser.add_argument('-ncores', help='number of cores to use', default=1, type=int) # Parse the input args args = parser.parse_args() processing_directory = args.path analysis_name = args.analysis_name stringency = args.stringency min_distance = args.min_distance min_plane = args.min_plane max_plane = args.max_plane ncores = args.ncores if min_plane != None and max_plane != None: plane_keep = [min_plane, max_plane] else: plane_keep = None # Determine the os type os_windows, add_slash = utils.determine_os() # Starting logger utils.init_file_logger(processing_directory) logger = logging.getLogger() logger.debug('min_plane%s', min_plane) logger.debug('max_plane %s', max_plane) logger.debug('keep_planes value %s', plane_keep) # Start the distributed client client = Client(n_workers=ncores, threads_per_worker=1) logger.debug('client %s', client) logger.debug('check that workers are on the same directory %s', client.run(os.getcwd)) # Check trail slash processing_directory = utils.check_trailing_slash(processing_directory, os_windows) # Determine the experiment name exp_name = processing_directory.split(add_slash)[-2] logger.debug('Experiment name: %s', exp_name) # Create the directories where to save the output tmp_dir_path = processing_directory + analysis_name + '_' + exp_name + '_tmp' + add_slash filtered_dir_path = processing_directory + analysis_name + '_' + exp_name + '_filtered' + add_slash counting_dir_path = processing_directory + analysis_name + '_' + exp_name + '_counting_pkl' + add_slash try: os.stat(tmp_dir_path) except: os.mkdir(tmp_dir_path) os.chmod(tmp_dir_path, 0o777) try: os.stat(filtered_dir_path) except: os.mkdir(filtered_dir_path) os.chmod(filtered_dir_path, 0o777) try: os.stat(counting_dir_path) except: os.mkdir(counting_dir_path) os.chmod(counting_dir_path, 0o777) # Get the list of the nd2 files to process inside the directory files_list = glob.glob(processing_directory + '*.nd2') logger.debug('files to process %s', files_list) # Convert the .nd2 data for raw_data_gene_fname in files_list: fname = raw_data_gene_fname.split(add_slash)[-1][:-4] logger.debug('fname %s', fname) with nd2.Nd2(raw_data_gene_fname) as nd2file: for channel in nd2file.channels: for fov in nd2file.fields_of_view: img_stack = np.empty( [len(nd2file.z_levels), nd2file.height, nd2file.width], dtype='uint16') images = nd2file.select(channels=channel, fields_of_view=fov, z_levels=nd2file.z_levels) for idx, im in enumerate(images): img_stack[idx, :, :] = im converted_fname = tmp_dir_path + exp_name + '_' + fname + '_' + channel + '_fov_' + str( fov) + '.npy' np.save(converted_fname, img_stack, allow_pickle=False) logger.debug('Finished .nd2 file conversion') # Filtering all the data # Get list of the files to process flist_img_to_filter = glob.glob(tmp_dir_path + '*.npy') # logger.debug('files to filter %s',flist_img_to_filter) # Parallel process all the data futures_processes=client.map(filtering_and_counting_experiment,flist_img_to_filter, \ filtered_dir_path=filtered_dir_path, \ counting_dir_path=counting_dir_path, \ exp_name=exp_name,plane_keep=plane_keep,add_slash=add_slash, \ min_distance=min_distance, stringency=stringency) client.gather(futures_processes) client.close() logger.debug('Finished filtering and counting') # delete the tmp folders shutil.rmtree(tmp_dir_path)