def find_contact_sites(cset, knossos_path, filename='cs', n_max_co_processes=None, size=None, offset=None): """ # TODO: add additional chunk-chunking (less number of submitted jobs) Parameters ---------- cset : knossos_path : filename : n_max_co_processes : qsub_pe : qsub_queue : size : offset : Returns ------- """ log_extraction.warning( DeprecationWarning('"find_contact_sites" was replaced by ' '"extract_contact_sites".')) if size is not None and offset is not None: chunk_list, _ = \ calculate_chunk_numbers_for_box(cset, offset, size) else: chunk_list = [ii for ii in range(len(cset.chunk_dict))] os.makedirs(cset.path_head_folder, exist_ok=True) multi_params = [] for chunk_k in chunk_list: multi_params.append([cset.chunk_dict[chunk_k], knossos_path, filename]) if not qu.batchjob_enabled(): _ = start_multiprocess_imap(_contact_site_detection_thread, multi_params, debug=False, nb_cpus=n_max_co_processes) else: _ = qu.QSUB_script(multi_params, "contact_site_detection", n_max_co_processes=n_max_co_processes) chunky.save_dataset(cset)
def extract_agg_contact_sites(cset, working_dir, filename='cs', hdf5name='cs', n_folders_fs=10000, suffix="", n_max_co_processes=None, n_chunk_jobs=2000, size=None, offset=None, log=None): """ Parameters ---------- cset : working_dir : filename : hdf5name : n_folders_fs : suffix : n_max_co_processes : n_chunk_jobs : size : offset : Returns ------- """ if log is None: log = log_extraction chunky.save_dataset(cset) # init CS segmentation KD kd = kd_factory(global_params.config.kd_seg_path) path = "{}/knossosdatasets/{}_seg/".format( global_params.config.working_dir, filename) if os.path.isdir(path): log.debug('Found existing KD at {}. Removing it now.'.format(path)) shutil.rmtree(path) target_kd = knossosdataset.KnossosDataset() scale = np.array(global_params.config.entries["Dataset"]["scaling"]) target_kd.initialize_without_conf(path, kd.boundary, scale, kd.experiment_name, mags=[ 1, ]) target_kd = knossosdataset.KnossosDataset() target_kd.initialize_from_knossos_path(path) # convert Chunkdataset to KD export_cset_to_kd_batchjob(cset, target_kd, '{}'.format(filename), [hdf5name], offset=offset, size=size, stride=[4 * 128, 4 * 128, 4 * 128], as_raw=False, orig_dtype=np.uint64, unified_labels=False, n_max_co_processes=n_max_co_processes) log.debug( 'Finished conversion of ChunkDataset ({}) into KnossosDataset ({})'. format(cset.path_head_folder, target_kd.knossos_path)) # get CS SD from_ids_to_objects(cset, filename, overlaydataset_path=target_kd.conf_path, n_chunk_jobs=n_chunk_jobs, hdf5names=[hdf5name], n_max_co_processes=n_max_co_processes, workfolder=working_dir, n_folders_fs=n_folders_fs, use_combined_extraction=True, suffix=suffix, size=size, offset=offset, log=log)
def _pred_dataset(kd_p, kd_pred_p, cd_p, model_p, imposed_patch_size=None, mfp_active=False, gpu_id=0, overwrite=False, i=None, n=None): """ Helper function for dataset prediction. Runs prediction on whole or partial knossos dataset. Imposed patch size has to be given in Z, X, Y! Parameters ---------- kd_p : str path to knossos dataset .conf file kd_pred_p : str path to the knossos dataset head folder which will contain the prediction cd_p : str destination folder for chunk dataset containing prediction model_p : str path tho ELEKTRONN2 model imposed_patch_size : tuple or None patch size (Z, X, Y) of the model mfp_active : bool activate max-fragment pooling (might be necessary to change patch_size) gpu_id : int the GPU used overwrite : bool True: fresh predictions ; False: earlier prediction continues Returns ------- """ initgpu(gpu_id) from elektronn2.neuromancer.model import modelload kd = KnossosDataset() kd.initialize_from_knossos_path(kd_p, fixed_mag=1) m = modelload(model_p, imposed_patch_size=list(imposed_patch_size) if isinstance( imposed_patch_size, tuple) else imposed_patch_size, override_mfp_to_active=mfp_active, imposed_batch_size=1) original_do_rates = m.dropout_rates m.dropout_rates = ([ 0.0, ] * len(original_do_rates)) offset = m.target_node.shape.offsets offset = np.array([offset[1], offset[2], offset[0]], dtype=np.int) cd = ChunkDataset() cd.initialize(kd, kd.boundary, [512, 512, 256], cd_p, overlap=offset, box_coords=np.zeros(3), fit_box_size=True) ch_dc = cd.chunk_dict print('Total number of chunks for GPU/GPUs:', len(ch_dc.keys())) if i is not None and n is not None: chunks = ch_dc.values()[i::n] else: chunks = ch_dc.values() print("Starting prediction of %d chunks in gpu %d\n" % (len(chunks), gpu_id)) if not overwrite: for chunk in chunks: try: _ = chunk.load_chunk("pred")[0] except Exception as e: chunk_pred(chunk, m) else: for chunk in chunks: try: chunk_pred(chunk, m) except KeyboardInterrupt as e: print("Exiting out from chunk prediction: ", str(e)) return save_dataset(cd) # single gpu processing also exports the cset to kd if n is None: kd_pred = KnossosDataset() kd_pred.initialize_without_conf(kd_pred_p, kd.boundary, kd.scale, kd.experiment_name, mags=[1, 2, 4, 8]) cd.export_cset_to_kd(kd_pred, "pred", ["pred"], [4, 4], as_raw=True, stride=[256, 256, 256])
def extract_contact_sites(n_max_co_processes: Optional[int] = None, chunk_size: Optional[Tuple[int, int, int]] = None, log: Optional[Logger] = None, max_n_jobs: Optional[int] = None, cube_of_interest_bb: Optional[np.ndarray] = None, n_folders_fs: int = 1000): """ Extracts contact sites and their overlap with `sj` objects and stores them in a :class:`~syconn.reps.segmentation.SegmentationDataset` of type ``cs`` and ``syn`` respectively. If synapse type is available, this information will be stored as the voxel-ratio per class in the attribute dictionary of the ``syn`` objects (keys: ``sym_prop``, ``asym_prop``). Notes: Replaced ``find_contact_sites``, ``extract_agg_contact_sites``, ` `syn_gen_via_cset`` and ``extract_synapse_type``. Args: n_max_co_processes: Number of parallel workers. chunk_size: Sub-cube volume which is processed at a time. log: Logger. max_n_jobs: Maximum number of jobs. cube_of_interest_bb: Sub-volume of the data set which is processed. Default: Entire data set. n_folders_fs: Number of folders used for organizing supervoxel data. """ if extract_cs_syntype is None: msg = '`extract_contact_sites` requires the cythonized method ' \ '`extract_cs_syntype`. Use `find_contact_sites` and others ' \ 'for contact site processing.' log_extraction.error(msg) raise ImportError(msg) kd = kd_factory(global_params.config.kd_seg_path) if cube_of_interest_bb is None: cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary] if chunk_size is None: chunk_size = (512, 512, 512) size = cube_of_interest_bb[1] - cube_of_interest_bb[0] + 1 offset = cube_of_interest_bb[0] # Initital contact site extraction cd_dir = global_params.config.temp_path + "/chunkdatasets/cs/" # Class that contains a dict of chunks (with coordinates) after initializing it cset = chunky.ChunkDataset() cset.initialize(kd, kd.boundary, chunk_size, cd_dir, box_coords=[0, 0, 0], fit_box_size=True) if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 2 if log is None: log = log_extraction if size is not None and offset is not None: chunk_list, _ = \ calculate_chunk_numbers_for_box(cset, offset, size) else: chunk_list = [ii for ii in range(len(cset.chunk_dict))] # shuffle chunklist to get a more balanced work-load rand_ixs = np.arange(len(chunk_list)) np.random.shuffle(rand_ixs) chunk_list = np.array(chunk_list)[rand_ixs] os.makedirs(cset.path_head_folder, exist_ok=True) multi_params = [] # TODO: currently pickles Chunk objects -> job submission might be slow for chunk_k in chunkify(chunk_list, max_n_jobs): multi_params.append([[cset.chunk_dict[k] for k in chunk_k], global_params.config.kd_seg_path]) if not qu.batchjob_enabled(): results = start_multiprocess_imap(_contact_site_extraction_thread, multi_params, debug=False, nb_cpus=n_max_co_processes) else: path_to_out = qu.QSUB_script(multi_params, "contact_site_extraction", n_max_co_processes=n_max_co_processes, log=log) out_files = glob.glob(path_to_out + "/*") results = [] for out_file in out_files: with open(out_file, 'rb') as f: results.append(pkl.load(f)) shutil.rmtree(os.path.abspath(path_to_out + "/../"), ignore_errors=True) # reduce step cs_props = [{}, defaultdict(list), {}] syn_props = [{}, defaultdict(list), {}] tot_sym_cnt = {} tot_asym_cnt = {} for curr_props, curr_syn_props, asym_cnt, sym_cnt in results: merge_prop_dicts([cs_props, curr_props]) merge_prop_dicts([syn_props, curr_syn_props]) merge_type_dicts([tot_asym_cnt, asym_cnt]) merge_type_dicts([tot_sym_cnt, sym_cnt]) log.info('Finished contact site (#objects: {}) and synapse (#objects: {})' ' extraction.'.format(len(cs_props[0]), len(syn_props[0]))) if len(syn_props[0]) == 0: log.critical( 'WARNING: Did not find any synapses during extraction step.') # TODO: extract syn objects! maybe replace sj_0 Segmentation dataset by the overlapping CS<-> # sj objects -> run syn. extraction and sd_generation in parallel and return mi_0, vc_0 and # syn_0 -> use syns as new sjs during rendering! # -> Run CS generation in parallel with mapping to at least get the syn objects before # rendering the neuron views (which need subcellular structures, there one can then use mi, # vc and syn (instead of sj)) dict_paths = [] # dump intermediate results # TODO: size filter here or during write-out? TODO: use config parameter dict_p = "{}/cs_prop_dict.pkl".format(global_params.config.temp_path) with open(dict_p, "wb") as f: pkl.dump(cs_props, f) del cs_props dict_paths.append(dict_p) dict_p = "{}/syn_prop_dict.pkl".format(global_params.config.temp_path) with open(dict_p, "wb") as f: pkl.dump(syn_props, f) del syn_props dict_paths.append(dict_p) # convert counting dicts to store ratio of syn. type voxels dict_p = "{}/cs_sym_cnt.pkl".format(global_params.config.temp_path) with open(dict_p, "wb") as f: pkl.dump(tot_sym_cnt, f) del tot_sym_cnt dict_paths.append(dict_p) dict_p = "{}/cs_asym_cnt.pkl".format(global_params.config.temp_path) with open(dict_p, "wb") as f: pkl.dump(tot_asym_cnt, f) del tot_asym_cnt dict_paths.append(dict_p) # write cs and syn segmentation to KD and SD chunky.save_dataset(cset) kd = kd_factory(global_params.config.kd_seg_path) # convert Chunkdataset to syn and cs KD # TODO: spawn in parallel for obj_type in ['cs', 'syn']: path = "{}/knossosdatasets/{}_seg/".format( global_params.config.working_dir, obj_type) if os.path.isdir(path): log.debug('Found existing KD at {}. Removing it now.'.format(path)) shutil.rmtree(path) target_kd = knossosdataset.KnossosDataset() scale = np.array(global_params.config.entries["Dataset"]["scaling"]) target_kd.initialize_without_conf(path, kd.boundary, scale, kd.experiment_name, mags=[ 1, ]) target_kd = knossosdataset.KnossosDataset() target_kd.initialize_from_knossos_path(path) export_cset_to_kd_batchjob(cset, target_kd, obj_type, [obj_type], offset=offset, size=size, stride=chunk_size, as_raw=False, orig_dtype=np.uint64, unified_labels=False, n_max_co_processes=n_max_co_processes, log=log) log.debug( 'Finished conversion of ChunkDataset ({}) into KnossosDataset' ' ({})'.format(cset.path_head_folder, target_kd.knossos_path)) # Write SD max_n_jobs = global_params.NNODES_TOTAL * 2 path = "{}/knossosdatasets/syn_seg/".format( global_params.config.working_dir) path_cs = "{}/knossosdatasets/cs_seg/".format( global_params.config.working_dir) storage_location_ids = rep_helper.get_unique_subfold_ixs(n_folders_fs) multi_params = [ (sv_id_block, n_folders_fs, path, path_cs) for sv_id_block in basics.chunkify(storage_location_ids, max_n_jobs) ] if not qu.batchjob_enabled(): start_multiprocess_imap(_write_props_to_syn_singlenode_thread, multi_params, nb_cpus=1, debug=False) else: qu.QSUB_script(multi_params, "write_props_to_syn_singlenode", log=log, n_cores=global_params.NCORES_PER_NODE, n_max_co_processes=global_params.NNODES_TOTAL, remove_jobfolder=True) sd = segmentation.SegmentationDataset( working_dir=global_params.config.working_dir, obj_type='syn', version=0) dataset_analysis(sd, recompute=True, compute_meshprops=False) sd = segmentation.SegmentationDataset( working_dir=global_params.config.working_dir, obj_type='cs', version=0) dataset_analysis(sd, recompute=True, compute_meshprops=False) for p in dict_paths: os.remove(p) shutil.rmtree(cd_dir, ignore_errors=True)
def from_probabilities_to_objects(cset, filename, hdf5names, object_names=None, overlap="auto", sigmas=None, thresholds=None, debug=False, swapdata=0, target_kd=None, offset=None, size=None, prob_kd_path_dict=None, membrane_filename=None, membrane_kd_path=None, hdf5_name_membrane=None, n_folders_fs=1000, suffix="", n_max_co_processes=None, transform_func=None, func_kwargs=None, nb_cpus=None, workfolder=None, n_erosion=0, overlap_thresh=0, stitch_overlap=None, load_from_kd_overlaycubes=False, transf_func_kd_overlay=None, log=None): """ # TODO: Merge this method with mapping (e.g. iterate over chunks of cell SV segm. and over all objects to extract bounding boxes and overlap (i.e. mapping) at the same time Main function for the object extraction step; combines all needed steps # TODO: change object_names to dataset_names as in other methods Parameters ---------- cset : chunkdataset instance filename : str Filename of the prediction in the chunkdataset hdf5names: List[str] List of names/ labels to be extracted and processed from the prediction file object_names : list of str list of names used for 'object_type' when creating SegmentationDataset. Must have same length as 'hdf5_names'. overlap: str or np.array Defines the overlap with neighbouring chunks that is left for later processing steps; if 'auto' the overlap is calculated from the sigma and the stitch_overlap (here: [1., 1., 1.]) sigmas: List[List] or None Defines the sigmas of the gaussian filters applied to the probability maps. Has to be the same length as hdf5names. If None no gaussian filter is applied thresholds: list of float Threshold for cutting the probability map. Has to be the same length as hdf5names. If None zeros are used instead (not recommended!) chunk_list: List[int] Selective list of chunks for which this function should work on. If None all chunks are used. debug: boolean If true multiprocessed steps only operate on one core using 'map' which allows for better error messages swapdata: boolean If true an x-z swap is applied to the data prior to processing label_density: np.array Defines the density of the data. If the data was downsampled prior to saving; it has to be interpolated first before processing due to alignment issues with the coordinate system. Two-times downsampled data would have a label_density of [2, 2, 2] offset : np.array offset of the volume to the origin size: np.array size of the volume membrane_filename: str One way to allow access to a membrane segmentation when processing vesicle clouds. Filename of the prediction in the chunkdataset. The threshold is currently set at 0.4. membrane_kd_path: str One way to allow access to a membrane segmentation when processing vesicle clouds. Path to the knossosdataset containing a membrane segmentation. The threshold is currently set at 0.4. hdf5_name_membrane: str When using the membrane_filename this key has to be given to access the data in the saved chunk suffix: str Suffix for the intermediate results qsub_pe: str or None qsub parallel environment qsub_queue: str or None qsub queue transform_func: callable Segmentation method which is applied func_kwargs : dict key word arguments for transform_func nb_cpus : int Number of cpus used if QSUB is disabled workfolder : str destination where SegmentationDataset will be stored n_erosion : int Number of erosions applied to the segmentation of unique_components0 to avoid segmentation artefacts caused by start location dependency in chunk data array. overlap_thresh : float Overlap fraction of object in different chunks to be considered stitched. If zero this behavior is disabled. stitch_overlap : np.array volume evaluated during stitching procedure load_from_kd_overlaycubes : bool Load prob/seg data from overlaycubes instead of raw cubes. transf_func_kd_overlay : callable Method which is to applied to cube data if `load_from_kd_overlaycubes` is True. log : logging.logger """ if log is None: log = log_extraction all_times = [] step_names = [] if prob_kd_path_dict is not None: kd_keys = list(prob_kd_path_dict.keys()) assert len(kd_keys) == len(hdf5names) for kd_key in kd_keys: assert kd_key in hdf5names if size is not None and offset is not None: chunk_list, chunk_translator = \ calculate_chunk_numbers_for_box(cset, offset, size) else: chunk_translator = {} chunk_list = [ii for ii in range(len(cset.chunk_dict))] for ii in range(len(cset.chunk_dict)): chunk_translator[ii] = ii if thresholds is not None and thresholds[0] <= 1.: thresholds = np.array(thresholds) thresholds *= 255 if sigmas is not None and swapdata == 1: for nb_sigma in range(len(sigmas)): if len(sigmas[nb_sigma]) == 3: sigmas[nb_sigma] = \ basics.switch_array_entries(sigmas[nb_sigma], [0, 2]) # -------------------------------------------------------------------------- # time_start = time.time() cc_info_list, overlap_info = oes.object_segmentation( cset, filename, hdf5names, overlap=overlap, sigmas=sigmas, thresholds=thresholds, chunk_list=chunk_list, debug=debug, swapdata=swapdata, prob_kd_path_dict=prob_kd_path_dict, membrane_filename=membrane_filename, membrane_kd_path=membrane_kd_path, hdf5_name_membrane=hdf5_name_membrane, fast_load=True, suffix=suffix, transform_func=transform_func, transform_func_kwargs=func_kwargs, n_max_co_processes=n_max_co_processes, nb_cpus=nb_cpus, load_from_kd_overlaycubes=load_from_kd_overlaycubes, transf_func_kd_overlay=transf_func_kd_overlay) if stitch_overlap is None: stitch_overlap = overlap_info[1] else: overlap_info[1] = stitch_overlap if not np.all(stitch_overlap <= overlap_info[0]): msg = "Stitch overlap ({}) has to be <= than chunk overlap ({})." \ "".format(overlap_info[1], overlap_info[0]) log.error(msg) raise ValueError(msg) overlap = overlap_info[0] all_times.append(time.time() - time_start) step_names.append("conneceted components") basics.write_obj2pkl(cset.path_head_folder.rstrip("/") + "/connected_components.pkl", [cc_info_list, overlap_info]) # # # ------------------------------------------------------------------------ # time_start = time.time() nb_cc_dict = {} max_nb_dict = {} max_labels = {} for hdf5_name in hdf5names: nb_cc_dict[hdf5_name] = np.zeros(len(chunk_list), dtype=np.int32) max_nb_dict[hdf5_name] = np.zeros(len(chunk_list), dtype=np.int32) for cc_info in cc_info_list: nb_cc_dict[cc_info[1]][chunk_translator[cc_info[0]]] = cc_info[2] for hdf5_name in hdf5names: max_nb_dict[hdf5_name][0] = 0 for nb_chunk in range(1, len(chunk_list)): max_nb_dict[hdf5_name][nb_chunk] = \ max_nb_dict[hdf5_name][nb_chunk - 1] + \ nb_cc_dict[hdf5_name][nb_chunk - 1] max_labels[hdf5_name] = int(max_nb_dict[hdf5_name][-1] + \ nb_cc_dict[hdf5_name][-1]) all_times.append(time.time() - time_start) step_names.append("extracting max labels") basics.write_obj2pkl(cset.path_head_folder.rstrip("/") + "/max_labels.pkl", [max_labels]) # # # ------------------------------------------------------------------------ # time_start = time.time() oes.make_unique_labels(cset, filename, hdf5names, chunk_list, max_nb_dict, chunk_translator, debug, suffix=suffix, n_max_co_processes=n_max_co_processes, nb_cpus=nb_cpus) all_times.append(time.time() - time_start) step_names.append("unique labels") # # # ------------------------------------------------------------------------ # chunky.save_dataset(cset) # save dataset to be able to load it during make_stitch_list (this # allows to load the ChunkDataset inside the worker instead of pickling it for each which # slows down the submission process. time_start = time.time() stitch_list = oes.make_stitch_list(cset, filename, hdf5names, chunk_list, stitch_overlap, overlap, debug, suffix=suffix, n_erosion=n_erosion, n_max_co_processes=n_max_co_processes, overlap_thresh=overlap_thresh) all_times.append(time.time() - time_start) step_names.append("stitch list") basics.write_obj2pkl(cset.path_head_folder.rstrip("/") + "/stitch_list.pkl", [stitch_list]) # # # ------------------------------------------------------------------------ # time_start = time.time() merge_dict, merge_list_dict = oes.make_merge_list(hdf5names, stitch_list, max_labels) all_times.append(time.time() - time_start) step_names.append("merge list") basics.write_obj2pkl(cset.path_head_folder.rstrip("/") + "/merge_list.pkl", [merge_dict, merge_list_dict]) # -------------------------------------------------------------------------- time_start = time.time() oes.apply_merge_list(cset, chunk_list, filename, hdf5names, merge_list_dict, debug, suffix=suffix, nb_cpus=nb_cpus, n_max_co_processes=n_max_co_processes) all_times.append(time.time() - time_start) step_names.append("apply merge list") if target_kd is not None: time_start = time.time() chunky.save_dataset(cset) oes.export_cset_to_kd_batchjob( cset, target_kd, '{}_stitched_components'.format(filename), hdf5names, offset=offset, size=size, stride=[4 * 128, 4 * 128, 4 * 128], as_raw=False, orig_dtype=np.uint64, unified_labels=False, n_max_co_processes=n_max_co_processes) all_times.append(time.time() - time_start) step_names.append("export KD") # -------------------------------------------------------------------------- time_start = time.time() oes.extract_voxels_combined(cset, filename, hdf5names, n_folders_fs=n_folders_fs, chunk_list=chunk_list, suffix=suffix, workfolder=workfolder, overlaydataset_path=target_kd.conf_path, n_max_co_processes=n_max_co_processes) all_times.append(time.time() - time_start) step_names.append("extract and combine voxels") else: time_start = time.time() oes.extract_voxels(cset, filename, hdf5names, chunk_list=chunk_list, suffix=suffix, workfolder=global_params.config.working_dir, n_folders_fs=n_folders_fs, n_max_co_processes=n_max_co_processes) all_times.append(time.time() - time_start) step_names.append("extract voxels") # -------------------------------------------------------------------------- time_start = time.time() oes.combine_voxels(global_params.config.working_dir, hdf5names, n_folders_fs=n_folders_fs, n_chunk_jobs=5000, n_max_co_processes=n_max_co_processes) all_times.append(time.time() - time_start) step_names.append("combine voxels") # -------------------------------------------------------------------------- log.debug("Time overview [from_probabilities_to_objects]:") for ii in range(len(all_times)): log.debug("%s: %.3fs" % (step_names[ii], all_times[ii])) log.debug("--------------------------") log.debug("Total Time: %.1f min" % (np.sum(all_times) / 60)) log.debug("--------------------------")