def __init__(self, wt_dir: Path, mut_dir: Path, mask: np.ndarray, config: Dict, label_info_file: Path, lines_to_process: Union[List, None] = None, baseline_file: Union[str, None] = None, mutant_file: Union[str, None] = None, memmap: bool = False): """ Parameters ---------- wt_dir mut_dir mask config label_info_file lines_to_process baseline_file Path to csv containing baseline ids to use. If None, use all baselines """ self.norm_to_mask_volume_on = False self.label_info: pd.DataFrame = None self.baseline_ids = self.load_ids(baseline_file) if mutant_file: try: self.mutant_ids = common.cfg_load(mutant_file) except toml.decoder.TomlDecodeError as e: raise ValueError( 'The mutant id file is not correctly formatted\n{e}') else: self.mutant_ids = None if label_info_file: self.label_info = pd.read_csv(label_info_file) self.wt_dir = wt_dir self.mut_dir = mut_dir self.config = config self.label_info_file = label_info_file self.lines_to_process = lines_to_process self.mask = mask # 3D mask self.shape = None self.normaliser = None self.blur_fwhm = config.get('blur', DEFAULT_FWHM) self.voxel_size = config.get('voxel_size', DEFAULT_VOXEL_SIZE) self.memmap = memmap
def generate_organ_volumes(config: LamaConfig): # Get the final inversion stage invert_config = config['inverted_transforms'] / INVERT_CONFIG first_stage = cfg_load(invert_config)['inversion_order'][-1] inverted_label_dir = config['inverted_labels'] / first_stage out_path = config['organ_vol_result_csv'] # Generate the organ volume csv label_sizes(inverted_label_dir, out_path)
def __init__(self, config_path: Path, invertable, outdir, threads=None, noclobber=False): """ Inverts a series of volumes. A yaml config file specifies the order of inverted transform parameters to use. This config file should be in the root of the directory containing these inverted tform dirs. Also need to input a directory containing volumes/label maps etc to invert. These need to be in directories named with the same name as the corresponding inverted tform file directories Parameters ---------- config_path path to yaml config containing the oder of the inverted directories to use. The directories containing propagation tfransfrom files should be in the same diretory threads: str/ None number of threas to use. If None, use all available threads invertable: str path to object to invert (raw image, mask, label map etc) outdir where to store inverted volumes invertable: str dir or path. If dir, invert all objects within the subdirectories. If path to object (eg. labelmap) invert that instead noclobber: bool if True do not overwrite already inverted labels """ self.noclobber = noclobber common.test_installation('transformix') self.config = cfg_load(config_path) self.invertables = invertable self.config_dir = config_path.parent # The dir containing the inverted elx param files self.threads = threads self.out_dir = outdir common.mkdir_if_not_exists(self.out_dir) self.elx_param_prefix = ELX_PARAM_PREFIX self.PROPAGATION_TFORM_NAME = None # Set in subclasses self.last_invert_dir = None # I thik this is used as a way to find volumes to do organ vol calculation on
def _get_reg_order(self, spec_root): """ Text file in registrations folder that shows the order of registrations """ reg_order = [] inv_order = [] with open((spec_root / 'output' / 'registrations' / REG_DIR_ORDER_CFG), 'r') as fh: for line in fh: if line.strip(): reg_order.append(line.strip()) try: inv_order_cfg = spec_root / 'output' / 'inverted_transforms' / PROPAGATE_CONFIG c = cfg_load(inv_order_cfg) for stage in c['label_propagation_order']: inv_order.append(stage) except FileNotFoundError: inv_order = None return reg_order, inv_order
def test_config_errors(): """ Read in the current config that shuld work """ # config_file = registration_root / 'registration_config.toml' config_file = registration_root / 'registration_config.toml' config = cfg_load(config_file) # Staging = embryo_volume needs at least one similarity/affine stage to work # for i, stage in enumerate(config['registration_stage_params']): # if stage['elastix_parameters']['Transform'] in ['EulerTransform', 'AffineTransform']: # del(config['registration_stage_params'][i]) config['registration_stage_params'][:] = [x for x in config['registration_stage_params'] if x['elastix_parameters']['Transform'] not in ['SimilarityTransform', 'AffineTransform']] cfg = validate_config.LamaConfig(config, config_file)
def run(config_path: Path, wt_dir: Path, mut_dir: Path, out_dir: Path, target_dir: Path, treatment_dir: Path = None, interaction_dir: Path = None, lines_to_process: Union[List, None] = None ): """ The entry point to the stats pipeline. Read in the stats_config, and iterate over the stats analysis methods and the mutant lines Parameters ---------- config_path The lama stats_config (in TOML format) wt_dir Root of the wild type data. Should contain mutant line subfolders mut_dir Root of the mutant data. Should contain mutant line subfolders out_dir The root output directory. Will be made if not existing target_dir Contains the population average, masks, label_maps and label infor files All Volumes should have been padded to the same size before registration. lines_to_process list: optional mutant line ids to process only. None: process all lines """ if not (wt_dir / 'output').is_dir(): raise FileNotFoundError(f'{wt_dir / "output"} folder with registration results is not present') if not (mut_dir / 'output').is_dir(): raise FileNotFoundError(f'{mut_dir / "output"} folder with registration results is not present') try: out_dir.mkdir(exist_ok=True) except FileNotFoundError: raise FileNotFoundError('Cannot create output folder') master_log_file = out_dir / f'{common.date_dhm()}_stats.log' logzero.logfile(str(master_log_file)) logging.info(common.git_log()) logging.info('### Started stats analysis ###}') stats_config = cfg_load(config_path) mask = load_mask(target_dir, stats_config['mask']) label_info_file = target_dir / stats_config.get('label_info') # What if not exists label_map_file = target_dir / stats_config.get('label_map') label_map = common.LoadImage(label_map_file).array memmap = stats_config.get('memmap') if memmap: logging.info('Memory mapping input data') baseline_file = stats_config.get('baseline_ids') if baseline_file: baseline_file = config_path.parent / baseline_file mutant_file = stats_config.get('mutant_ids') if mutant_file: mutant_file = config_path.parent / mutant_file # Run each data class through the pipeline. for stats_type in stats_config['stats_types']: logzero.logfile(str(master_log_file)) logging.info(f"---Doing {stats_type} analysis---") gc.collect() # load the required stats object and data loader loader_class = DataLoader.factory(stats_type) loader = loader_class(wt_dir, mut_dir, mask, stats_config, label_info_file, lines_to_process=lines_to_process, baseline_file=baseline_file, mutant_file=mutant_file, memmap=memmap, treatment_dir=treatment_dir, interaction_dir=interaction_dir) # Only affects organ vol loader. if not stats_config.get('normalise_organ_vol_to_mask'): loader.norm_to_mask_volume_on = False if loader_class == JacobianDataLoader: if stats_config.get('use_log_jacobians') is False: loader.data_folder_name = 'jacobians' # Currently only the intensity stats get normalised loader.normaliser = Normaliser.factory(stats_config.get('normalise'), stats_type) # move this into subclass logging.info("Start iterate through lines") common.logMemoryUsageInfo() #USe different iterator if using doing a two-way analysis if stats_config['two_way']: line_iterator = loader.two_way_iterator() line_input_data = None else: line_iterator = loader.line_iterator() line_input_data = None while True: try: line_input_data = next(line_iterator) logging.info(f"Data for line {line_input_data.line} loaded") common.logMemoryUsageInfo() line_id = line_input_data.line line_stats_out_dir = out_dir / line_id / stats_type line_stats_out_dir.mkdir(parents=True, exist_ok=True) line_log_file = line_stats_out_dir / f'{common.date_dhm()}_stats.log' logzero.logfile(str(line_log_file)) logging.info(f"Processing line: {line_id}") stats_class = Stats.factory(stats_type) stats_obj = stats_class(line_input_data, stats_type, stats_config.get('use_staging', True), stats_config.get('two_way', False)) stats_obj.stats_runner = linear_model.lm_r stats_obj.run_stats() logging.info('Statistical analysis finished.') common.logMemoryUsageInfo() logging.info('Writing results...') rw = ResultsWriter.factory(stats_type) writer = rw(stats_obj, mask, line_stats_out_dir, stats_type, label_map, label_info_file, stats_config.get('two_way', False)) logging.info('Finished writing results.') common.logMemoryUsageInfo() # # if stats_type == 'organ_volumes': # c_data = {spec: data['t'] for spec, data in stats_obj.specimen_results.items()} # c_df = pd.DataFrame.from_dict(c_data) # # cluster_plots.tsne_on_raw_data(c_df, line_stats_out_dir) if stats_config.get('invert_stats'): if writer.line_heatmap: # Organ vols wil not have this # How do I now sensibily get the path to the invert.yaml # get the invert_configs for each specimen in the line logging.info('Writing heatmaps...') logging.info('Propogating the heatmaps back onto the input images ') line_heatmap = writer.line_heatmap line_reg_dir = mut_dir / 'output' / line_id invert_heatmaps(line_heatmap, line_stats_out_dir, line_reg_dir, line_input_data) logging.info('Finished writing heatmaps.') logging.info(f"Finished processing line: {line_id} - All done") common.logMemoryUsageInfo() except StopIteration: if (line_input_data != None): logging.info(f"Finish iterate through lines") line_input_data.cleanup() common.logMemoryUsageInfo() break;
def __init__(self, config: Union[Path, Dict], cfg_path: Path=None, no_validate=False): """ Parameters ---------- config path to the lama config file or config dictionary cfg_path Used for testing. If we want to pass in a dict rather than a path we with also need a path of the project directory, which is normally the cfg parent directory Raises ------ OSError or subclasses thereof if config file cannot be opened """ if isinstance(config, dict): if cfg_path is None: raise ValueError("Please supply a project root path") self.config = config config_path = cfg_path elif isinstance(config, Path): self.config = common.cfg_load(config) config_path = config else: raise ValueError("config must me a Path or Dict") self.config_path = Path(config_path) # The variable names mapped to the actual names of output directories # If the value is a string, it will be created in the output_dir # If the value is a tuple [0] is the folder name and the rest are parent folders self.output_path_names = OrderedDict({ # output_dir must always be 'output' as some other modules depend upon this # Must add way to enforce as it can be overriden in the config at the moment 'output_dir': 'output', 'target_folder': 'target', 'qc_dir': 'qc', 'input_image_histograms': ('input_image_histograms', 'qc'), 'metric_charts_dir': ('metric_charts', 'qc'), 'registered_midslice_dir': ('registered_midslices', 'qc'), 'inverted_label_overlay_dir': ('inverted_label_overlay', 'qc'), 'cyan_red_dir': ('cyan_red_overlay', 'qc'), 'average_folder': 'averages', 'deformations': 'deformations', 'jacobians': 'jacobians', 'log_jacobians': 'log_jacobians', 'jacmat': 'jacobian_matrices', 'glcm_dir': 'glcms', 'root_reg_dir': 'registrations', 'inverted_transforms': 'inverted_transforms', 'inverted_labels': 'inverted_labels', 'inverted_stats_masks': 'inverted_stats_masks', 'organ_vol_result_csv': common.ORGAN_VOLUME_CSV_FILE, 'additional_seg_dir': 'additional_seg' }) # Options in the config that map to files that can be present in the target folder self.target_names = ( 'fixed_mask', 'stats_mask', 'fixed_volume', 'label_map', 'label_info' ) self.input_options = { # Config parameters to be validated (non-elastix related parameters) # parameter: ([options...], default) # Options can be types to check against or functions that retrn True is value is valid 'global_elastix_params': ('dict', 'required'), 'registration_stage_params': ('dict', 'required'), 'no_qc': ('bool', False), 'threads': ('int', 4), 'filetype': ('func', self.validate_filetype), 'voxel_size': ('float', 14.0), 'generate_new_target_each_stage': ('bool', False), 'skip_transform_inversion': ('bool', False), 'pairwise_registration': ('bool', False), 'generate_deformation_fields': ('dict', None), 'staging': ('func', self.validate_staging), 'data_type': (['uint8', 'int8', 'int16', 'uint16', 'float32'], 'uint8'), 'glcm': ('bool', False), 'config_version': ('float', 1.1), 'stage_targets': (Path, False), 'fix_folding': (bool, False), # 'inverse_transform_method': (['invert_transform', 'reverse_registration'], 'invert_transform') 'label_propagation': (['invert_transform', 'reverse_registration'], 'reverse_registration'), 'skip_forward_registration': (bool, False), 'seg_plugin_dir': (Path, None), # The following options are used for saving dsk space 'write_deformation_vectors': (bool, False), 'delete_inverted_transforms': (bool, False), 'write_raw_jacobians': (bool, True), 'write_log_jacobians': (bool, True), } # The paths to each stage output dir: stage_id: Path self.stage_dirs = OrderedDict() self.all_keys = list(self.output_path_names.keys()) + list(self.target_names) + list(self.input_options.keys()) # options is where the final options (either default or from config) are stored. # Paths from config or default will have been resolved relative to config directoryu self.options = {} self.config_dir = config_path.parent # Check if there are any unkown options in the config in order to spot typos if no_validate: return self.check_for_unknown_options() self.convert_image_pyramid() self.pairwise_check() self.check_paths() self.check_options() self.check_images() self.resolve_output_paths() self.check_stages() self.check_propagation_options() self.check_problematic_elx_params()
def lama_job_runner(config_path: Path, root_directory: Path, make_job_file: bool = False): """ Parameters ---------- config_path: path to registration config file: root_directory path to root directory. The folder names from job_file.dir will be appending to this path to resolve project directories make_job_file if true, just make the job_file that other instances can consume Notes ----- This function uses a SoftFileLock for locking the job_file csv to prevent multiple instances of this code from processing the same line or specimen. A SoftFileLock works by creating a lock file, and the presence of this file prevents other instances from accessing it. We don't use FileLock (atlhough this is more robust) as it's not supported on nfs file systems. The advantage of SoftFileLock is you can create a lock file manually if you want to edit a job file manually while job_runner is running (make sure to delete after editing). If this script terminates unexpectedly while it has a lock on the file, it will not be released and the file remains. Therefore before running this script, ensure no previous lock file is hanging around. """ if not config_path.is_file(): raise FileNotFoundError(f"can't find config file {config_path}") root_directory = root_directory.resolve() job_file = root_directory / JOBFILE_NAME lock_file = job_file.with_suffix('.lock') lock = SoftFileLock(lock_file) # init_file = root_directory / 'init' HN = socket.gethostname() if make_job_file: # Delete any lockfile and job_file that might be present from previous runs. if job_file.is_file(): os.remove(job_file) if lock_file.is_file(): os.remove(lock_file) try: with lock.acquire(timeout=1): logging.info('Making job list file') make_jobs_file(job_file, root_directory) logging.info( 'Job file created!. You can now run job_runner from multiple machines' ) return except Timeout: print( f"Make sure lock file: {lock_file} is not present on running first instance" ) sys.exit() config_name = config_path.name while True: try: with lock.acquire(timeout=60): # Create a lock then read jobs and add status to job file to ensure job is run once only. df_jobs = pd.read_csv(job_file, index_col=0) # Get an unfinished job jobs_to_do = df_jobs[df_jobs['status'] == 'to_run'] if len(jobs_to_do) < 1: logging.info("No more jobs left on jobs list") break indx = jobs_to_do.index[0] vol = root_directory / (jobs_to_do.at[indx, 'job']) df_jobs.at[indx, 'status'] = 'running' df_jobs.at[indx, 'start_time'] = datetime.now().strftime( '%Y-%m-%d %H:%M:%S') df_jobs.at[indx, 'host'] = socket.gethostname() df_jobs.to_csv(job_file) # Make a project dir drectory for specimen # vol.parent should be the line name # vol.stem is the specimen name minus the extension spec_root_dir = root_directory / 'output' / vol.parent.name / vol.stem spec_input_dir = spec_root_dir / 'inputs' spec_input_dir.mkdir(exist_ok=True, parents=True) spec_out_dir = spec_root_dir / 'output' spec_out_dir.mkdir(exist_ok=True, parents=True) shutil.copy(vol, spec_input_dir) # Copy the config into the project directory dest_config_path = spec_root_dir / config_name if dest_config_path.is_file(): os.remove(dest_config_path) shutil.copy(config_path, dest_config_path) # rename the target_folder now we've moved the config c = cfg_load(dest_config_path) target_folder = config_path.parent / c.get('target_folder') # Can't seem to get this to work with pathlib target_folder_relpath = os.path.relpath( target_folder, str(dest_config_path.parent)) c['target_folder'] = target_folder_relpath with open(dest_config_path, 'w') as fh: fh.write(toml.dumps(c)) except Timeout: sys.exit('Timed out' + socket.gethostname()) try: print(f'debug {HN}, {linenum()}') print(f'trying {vol.name}') run_lama.run(dest_config_path) except LamaConfigError as lce: status = 'config_error' logging.exception(f'There is a problem with the config\n{lce}') sys.exit() except Exception as e: if e.__class__.__name__ == 'KeyboardInterrupt': logging.info('terminating') sys.exit('Exiting') status = 'failed' logging.exception(e) else: status = 'complete' finally: with lock: df_jobs = pd.read_csv(job_file, index_col=0) df_jobs.at[indx, 'status'] = status df_jobs.at[indx, 'end_time'] = datetime.now().strftime( '%Y-%m-%d %H:%M:%S') df_jobs.to_csv(job_file) print('Exiting job_runner') return True
def batch_invert_transform_parameters(config: Union[Path, LamaConfig], clobber=True, new_log:bool=False): """ Create new elastix TransformParameter files that can then be used by transformix to invert labelmaps, stats etc Parameters ---------- config path to original reg pipeline config file clobber if True overwrite inverted parameters present new_log: Whether to create a new log file. If called from another module, logging may happen there """ common.test_installation('elastix') if isinstance(config, (Path, str)): config = LamaConfig(config) threads = str(config['threads']) if new_log: common.init_logging(config / 'invert_transforms.log') reg_dirs = get_reg_dirs(config) # Get the image basenames from the first stage registration folder (usually rigid) # ignore images in non-relevent folder that may be present volume_names = [x.stem for x in common.get_file_paths(reg_dirs[0], ignore_folders=[RESOLUTION_IMGS_DIR, IMG_PYRAMID_DIR])] inv_outdir = config.mkdir('inverted_transforms') stages_to_invert = defaultdict(list) jobs: List[Dict] = [] reg_stage_dir: Path for i, vol_id in enumerate(volume_names): for reg_stage_dir in reg_dirs: if not reg_stage_dir.is_dir(): logging.error('cannot find {}'.format(reg_stage_dir)) raise FileNotFoundError(f'Cannot find registration dir {reg_stage_dir}') inv_stage_dir = inv_outdir / reg_stage_dir.name specimen_stage_reg_dir = reg_stage_dir / vol_id specimen_stage_inversion_dir = inv_stage_dir / vol_id transform_file = common.getfile_startswith(specimen_stage_reg_dir, ELX_TRANSFORM_NAME) parameter_file = common.getfile_startswith(reg_stage_dir, ELX_PARAM_PREFIX) # Create the folder to put the specimen inversion parameter files in. inv_stage_dir.mkdir(exist_ok=True) # Add the stage to the inversion order config (in reverse order), if not already. if reg_stage_dir.name not in stages_to_invert['label_propagation_order']: stages_to_invert['label_propagation_order'].insert(0, reg_stage_dir.name) if clobber: common.mkdir_force(specimen_stage_inversion_dir) # Overwrite any inversion file that exist for a single specimen # Each registration directory contains a metadata file, which contains the relative path to the fixed volume reg_metadata = cfg_load(specimen_stage_reg_dir / common.INDV_REG_METADATA) fixed_volume = (specimen_stage_reg_dir / reg_metadata['fixed_vol']).resolve() # Invert the Transform parameters with options for normal image inversion job = { 'specimen_stage_inversion_dir': specimen_stage_inversion_dir, 'parameter_file': abspath(parameter_file), 'transform_file': transform_file, 'fixed_volume': fixed_volume, 'param_file_output_name': 'inversion_parameters.txt', 'image_replacements': IMAGE_REPLACEMENTS, 'label_replacements': LABEL_REPLACEMENTS, 'image_transform_file': PROPAGATE_IMAGE_TRANSFORM, 'label_transform_file': PROPAGATE_LABEL_TRANFORM, 'clobber': clobber, 'threads': threads } jobs.append(job) # By putting each inverison job (a single job per registration stage) we can speed things up a bit # If we can get multithreded inversion in elastix we can remove this python multithreading pool = Pool(8) try: pool.map(_invert_transform_parameters, jobs) except KeyboardInterrupt: print('terminating inversion') pool.terminate() pool.join() # TODO: Should we replace the need for this invert.yaml? reg_dir = Path(os.path.relpath(reg_stage_dir, inv_outdir)) stages_to_invert['registration_directory'] = str(reg_dir) # Doc why we need this # Create a yaml config file so that inversions can be run seperatley invert_config = config['inverted_transforms'] / PROPAGATE_CONFIG with open(invert_config, 'w') as yf: yf.write(yaml.dump(dict(stages_to_invert), default_flow_style=False))
def batch_invert_transform_parameters(config: Union[str, LamaConfig], clobber=True, new_log: bool = False): """ Create new elastix TransformParameter files that can then be used by transformix to invert labelmaps, stats etc Parameters ---------- config path to original reg pipeline config file clobber if True overwrite inverted parameters present new_log: Whether to create a new log file. If called from another module, logging may happen there """ common.test_installation('elastix') if isinstance(config, Path): config = LamaConfig(config) threads = str(config['threads']) if new_log: common.init_logging(config / 'invert_transforms.log') reg_dirs = get_reg_dirs(config) # Get the image basenames from the first stage registration folder (usually rigid) # ignore images in non-relevent folder that may be present volume_names = [ x.stem for x in common.get_file_paths(reg_dirs[0], ignore_folder=IGNORE_FOLDER) ] inv_outdir = config.mkdir('inverted_transforms') stages_to_invert = defaultdict(list) jobs: List[Dict] = [] reg_stage_dir: Path for i, vol_id in enumerate(volume_names): label_replacements = { 'FinalBSplineInterpolationOrder': '0', 'FixedInternalImagePixelType': 'short', 'MovingInternalImagePixelType': 'short', 'ResultImagePixelType': 'unsigned char', 'WriteTransformParametersEachResolution': 'false', 'WriteResultImageAfterEachResolution': 'false' } image_replacements = { 'FinalBSplineInterpolationOrder': '3', 'FixedInternalImagePixelType': 'float', 'MovingInternalImagePixelType': 'float', 'ResultImagePixelType': 'float', 'WriteTransformParametersEachResolution': 'false', 'WriteResultImageAfterEachResolution': 'false' } for reg_stage_dir in reg_dirs: if not reg_stage_dir.is_dir(): logging.error('cannot find {}'.format(reg_stage_dir)) raise FileNotFoundError( f'Cannot find registration dir {reg_stage_dir}') inv_stage_dir = inv_outdir / reg_stage_dir.name specimen_stage_reg_dir = reg_stage_dir / vol_id specimen_stage_inversion_dir = inv_stage_dir / vol_id transform_file = common.getfile_startswith(specimen_stage_reg_dir, ELX_TRANSFORM_PREFIX) parameter_file = common.getfile_startswith(reg_stage_dir, ELX_PARAM_PREFIX) # Create the folder to put the specimen inversion parameter files in. inv_stage_dir.mkdir(exist_ok=True) # Add the stage to the inversion order config (in reverse order), if not already. if reg_stage_dir.name not in stages_to_invert['inversion_order']: stages_to_invert['inversion_order'].insert( 0, reg_stage_dir.name) if clobber: common.mkdir_force( specimen_stage_inversion_dir ) # Overwrite any inversion file that exist for a single specimen # Each registration directory contains a metadata file, which contains the relative path to the fixed volume reg_metadata = cfg_load(specimen_stage_reg_dir / common.INDV_REG_METADATA) fixed_volume = (specimen_stage_reg_dir / reg_metadata['fixed_vol']).resolve() # Invert the Transform parameters with options for normal image inversion job = { 'specimen_stage_inversion_dir': specimen_stage_inversion_dir, 'parameter_file': abspath(parameter_file), 'transform_file': transform_file, 'fixed_volume': fixed_volume, 'param_file_output_name': 'inversion_parameters.txt', 'image_replacements': image_replacements, 'label_replacements': label_replacements, 'image_transform_file': IMAGE_INVERTED_TRANSFORM, 'label_transform_file': LABEL_INVERTED_TRANFORM, 'clobber': clobber, 'threads': threads } jobs.append(job) # Run the inversion jobs. Currently using only one thread as it seems that elastix now uses multiple threads on the # Inversions logging.info('inverting with {} threads: '.format(threads)) pool = Pool( 1 ) # 17/09/18 If we can get multithreded inversion in elastix 4.9 we can remove the python multithreading try: pool.map(_invert_transform_parameters, jobs) except KeyboardInterrupt: print('terminating inversion') pool.terminate() pool.join() # TODO: Should we replace the need for this invert.yaml? reg_dir = Path(os.path.relpath(reg_stage_dir, inv_outdir)) stages_to_invert['registration_directory'] = str( reg_dir) # Doc why we need this # Create a yaml config file so that inversions can be run seperatley invert_config = config['inverted_transforms'] / INVERT_CONFIG with open(invert_config, 'w') as yf: yf.write(yaml.dump(dict(stages_to_invert), default_flow_style=False))
def __init__(self, config_path: Path): """ Parameters ---------- config_path pat to the lama config file Raises ------ OSError of subclasses thereof if config file cannot be opened """ self.config_path = config_path self.config = common.cfg_load(config_path) # The variable names mapped to the actual names of output directories # If the value is a string, it will be created in the output_dir # If the value is a tuple [0] is the folder name and the rest are parent folders self.output_path_names = OrderedDict({ # output_dir must always be 'output' as some other modules depend upon this # Must add way to enforce as it can be overriden in the config at the moment 'output_dir': 'output', 'target_folder': 'target', 'qc_dir': 'qc', 'input_image_histograms': ('input_image_histograms', 'qc'), 'metric_charts_dir': ('metric_charts', 'qc'), 'registered_midslice_dir': ('registered_midslices', 'qc'), 'inverted_label_overlay_dir': ('inverted_label_overlay', 'qc'), 'cyan_red_dir': ('cyan_red_overlay', 'qc'), 'average_folder': 'averages', 'deformations': 'deformations', 'jacobians': 'jacobians', 'log_jacobians': 'log_jacobians', 'jacmat': 'jacobian_matrices', 'glcm_dir': 'glcms', 'root_reg_dir': 'registrations', 'inverted_transforms': 'inverted_transforms', 'inverted_labels': 'inverted_labels', 'inverted_stats_masks': 'inverted_stats_masks', 'organ_vol_result_csv': common.ORGAN_VOLUME_CSV_FILE }) # Options in the config that map to files that can be present in the target folder self.target_names = ( 'fixed_mask', 'stats_mask', 'fixed_volume', 'label_map', 'label_names' ) self.input_options = { # parameter: [options...], default] # Options can be types or functions 'global_elastix_params': ('dict', 'required'), 'registration_stage_params': ('dict', 'required'), 'no_qc': ('bool', False), 'threads': ('int', 4), 'filetype': ('func', self.validate_filetype), 'voxel_size': ('float', 14.0), 'generate_new_target_each_stage': ('bool', False), 'skip_transform_inversion': ('bool', False), 'pairwise_registration': ('bool', False), 'generate_deformation_fields': ('dict', None), 'skip_deformation_fields': ('bool', True), 'staging': ('func', self.validate_staging), 'data_type': (['uint8', 'int8', 'int16', 'uint16', 'float32'], 'uint8'), 'glcm': ('bool', False), 'config_version': ('float', 1.1) } # The paths to each stage output dir: stage_id: Path self.stage_dirs = OrderedDict() self.all_keys = list(self.output_path_names.keys()) + list(self.target_names) + list(self.input_options.keys()) # options is where the final options (either default or from config) are stored. # Paths from config or default will have been resolved relative to config directoryu self.options = {} self.config_dir = config_path.parent # Check if there are any unkown options in the config in order to spot typos self.check_for_unknown_options() self.convert_image_pyramid() self.pairwise_check() self.check_paths() self.check_options() # self.check_images() self.resolve_output_paths() self.check_stages()
def run_registration_schedule(config: LamaConfig, first_stage_only=False) -> Path: """ Run the registrations specified in the config file Parameters ---------- config: Parsed and validated lama config first_stage_only: If True, just do the initial rigid stage Returns ------- The path to the final registrered images """ st = config['stage_targets'] if st: with open(st, 'r') as stfh: stage_targets = cfg_load(stfh)['targets'] if len(config['registration_stage_params']) != len(stage_targets): logging.error(f'Len stage targets: {len(stage_targets)}') logging.error( f'Len reg stages: {len(config["registration_stage_params"])}') raise LamaConfigError( "restage len != number of registration stages") # Create a folder to store mid section coronal images to keep an eye on registration process if not config['no_qc']: qc_metric_dir = config['metric_charts_dir'] elastix_stage_parameters = generate_elx_parameters( config, do_pairwise=config['pairwise_registration']) regenerate_target = config['generate_new_target_each_stage'] if regenerate_target and st: raise LamaConfigError( 'cannot have regenerate_target and stage_targets') if regenerate_target: logging.info( 'Creating new target each stage for population average creation') else: logging.info('Using same target for each stage') # Set the moving volume dir and the fixed image for the first stage moving_vols_dir = config['inputs'] # Set the fixed volume up for the first stage. This will checnge each stage if doing population average if st: fixed_vol = stage_targets[0] else: fixed_vol = config['fixed_volume'] for i, reg_stage in enumerate(config['registration_stage_params']): tform_type = reg_stage['elastix_parameters']['Transform'] euler_stage = True if tform_type == 'EulerTransform' else False # affine_similarity_stage = True if tform_type in ['AffineTransform', 'SimilarityTransform'] else False if config['pairwise_registration']: if not euler_stage: logging.info('doing pairwise registration') reg_method = PairwiseBasedRegistration else: reg_method = TargetBasedRegistration logging.info( 'using target-based registration for initial rigid stage of pairwise registrations' ) else: logging.info('using target-based registration') reg_method = TargetBasedRegistration # Make the stage output dir stage_id = reg_stage['stage_id'] stage_dir = config.stage_dirs[stage_id] common.mkdir_force(stage_dir) logging.info("### Current registration step: {} ###".format(stage_id)) # Make the elastix parameter file for this stage elxparam = elastix_stage_parameters[stage_id] elxparam_path = join(stage_dir, ELX_PARAM_PREFIX + stage_id + '.txt') with open(elxparam_path, 'w') as fh: if elxparam: # Not sure why I put this here fh.write(elxparam) # if i < 2: # TODO: shall we keep the fixed mask throughout? I think we should in next release # fixed_mask = config['fixed_mask'] # # If we are doing target-based phenotype detection, we can used the fixed mask for every stage if not config['generate_new_target_each_stage']: fixed_mask = config['fixed_mask'] else: fixed_mask = None # Do the registrations registrator = reg_method(elxparam_path, moving_vols_dir, stage_dir, config['filetype'], config['threads'], fixed_mask) if (not config['pairwise_registration']) or ( config['pairwise_registration'] and euler_stage): registrator.set_target(fixed_vol) if reg_stage['elastix_parameters']['Transform'] == 'BSplineTransform': if config['fix_folding']: logging.info(f'Folding correction for stage {stage_id} set') registrator.fix_folding = config[ 'fix_folding'] # Curently only works for TargetBasedRegistration registrator.run() # Do the registrations for a single stage # Make average from the stage outputs if regenerate_target: average_path = join(config['average_folder'], '{0}.{1}'.format(stage_id, config['filetype'])) registrator.make_average(average_path) if not config['no_qc']: stage_metrics_dir = qc_metric_dir / stage_id common.mkdir_force(stage_metrics_dir) make_charts(stage_dir, stage_metrics_dir) # Setup the fixed and moving for the next stage, if there is one if i + 1 < len(config['registration_stage_params']): if regenerate_target: fixed_vol = average_path # The avergae from the previous step elif st: fixed_vol = stage_targets[i + 1] moving_vols_dir = stage_dir # Set the output of the current stage top be the input of the next if first_stage_only: return stage_dir logging.info("### Registration finished ###") return stage_dir
def secondary_segmentation(config: LamaConfig): """ Use user-added scripts to segment/cleanup organs Parameters ---------- config Returns ------- """ plugin_dir = config.config_dir / config['seg_plugin_dir'] if not plugin_dir.is_dir(): logging.error(f'Cannot find plugin director: {plugin_dir}') return # Find the directories containing the segmentations # Get the final inversion stage invert_config = config['inverted_transforms'] / PROPAGATE_CONFIG segmentation_dir = cfg_load(invert_config)['label_propagation_order'][ -1] # rename to segmentation stage inverted_label_dir = config['inverted_labels'] / segmentation_dir initial_segmentation_path = next(inverted_label_dir.glob('**/*.nrrd')) first_reg_dir = config['root_reg_dir'] / config[ 'registration_stage_params'][0]['stage_id'] # usually rigid image_to_segment = next(first_reg_dir.glob('**/*.nrrd')) segmentations = [] for plugin_src in [ x for x in plugin_dir.iterdir() if str(x).endswith('.py') and x.name != 'plugin_interface.py' ]: # catch all exceptions as we don't want plugin crashing the pipeline try: spec = importlib.util.spec_from_file_location( plugin_src.stem, str(plugin_src)) plugin = importlib.util.module_from_spec(spec) spec.loader.exec_module(plugin) new_segmetation = plugin.run(image_to_segment, initial_segmentation_path) except Exception as e: logging.error(f'Plugin {plugin_src} failed\n{e}') else: segmentations.append(new_segmetation) if not segmentations: logging.error(f'No segmentations returned from {plugin_src.name}') # Merge all the segmentations into a single label map. If there are any overlaps, the plugin called last will have # priority seg = None for s in segmentations: if not seg: seg = s continue seg[s != 0] = s[s != 0] additional_seg_dir = config.mkdir('additional_seg_dir') write_array(seg, additional_seg_dir / f'{config.config_dir.name}_additonal_seg.nrrd' ) # TODO include specimen name