def mktree(crumb: hansel.Crumb, values_map: CrumbArgsSequences) -> List[str]: """ Create the tree of folders given the values for the crumb arguments of the current crumb path. Parameters ---------- crumb: Crumb values_map: Sequence[Sequence[2-Tuple[str, str]]] or Sequence[Dict[str, str]] The lists of values to substitute each crumb argument that you want. Do not leave dependent arguments alone. Example: [[('subject_id', 'pat_0001'), ('session_id', 'session_1')], [('subject_id', 'pat_0002'), ('session_id', 'session_1')], .... ] Example: [{'subject_id': 'pat_0001', 'session_id': 'session_1'}, {'subject_id': 'pat_0002', 'session_id': 'session_1'}, .... ] Returns ------- paths: list of Paths The paths that have been created. """ if values_map is None: return [crumb.touch()] if not isinstance(values_map, (list, dict)): raise TypeError( "Expected keys in `values_map` to be a Sequence, got {}.".format( type(values_map))) paths = [] for idx, aval in enumerate(values_map): if not isinstance(aval, Mapping): aval = dict(aval) if not set(aval.keys()).issubset(set(crumb.all_args())): raise ValueError( "Expected keys in `values_map` item to be a subset of {}, got {}." .format(crumb.all_args(), aval.keys())) rem_deps = crumb._args_open_parents(list(aval.keys())) if rem_deps: raise KeyError( "Expected `values_map` item to not leave crumbs alone," " you forgot to add: {} in item {}".format(rem_deps, idx)) paths.append(crumb.replace(**aval)) for path in paths: path.touch() return paths
def pandas_fill_crumbs( df: 'pandas.DataFrame', crumb: hansel.Crumb, names_map: CrumbArgsSequences = None) -> Iterator[hansel.Crumb]: """ Create a generator of crumbs filled with the `df` column names and `crumb` arguments that match or the ones indicated in `names_map`. Parameters ---------- df: pandas.DataFrame crumb: hansel.Crumb names_map: sequence of sequences of 2-tuple or dict[str] -> str This is a "DataFrame column name" to "crumb argument name" relation dictionary. Example: {'Subject ID': 'subject_id'} If None will make a dictionary from the open crumbs arguments, e.g., {'subject_id': 'subject_id'}. The values of this dict will be used to filter the columns in `df` and the crumb arguments in `crumb`. You may need to rename the columns of `df` before using this. Returns ------- crumbs: generator of crumbs Crumbs filled with the data in `df`. """ if names_map is None: names_map = {arg_name: arg_name for arg_name in crumb.open_args()} nmap = names_map if not isinstance(nmap, dict): nmap = dict(nmap) values_map = (df.pipe(_pandas_rename_cols, nmap).pipe(df_to_valuesmap, list(crumb.all_args()), arg_names=list(nmap.values()))) yield from (crumb.replace(**dict(argvals)) for argvals in values_map)
def dcm2nii(ctx, input_crumb_path, output_dir, regex='fnmatch', ncpus=3): """ Convert all DICOM files within `input_crumb_path` into NifTI in `output_folder`. Will copy only the NifTI files reoriented by MRICron's dcm2nii command. Will rename the NifTI files that are matched with recognized modalities to the short modality name from config.ACQ_PATTERNS. Parameters ---------- input_dir: str A crumb path str indicating the whole path until the DICOM files. Example: '/home/hansel/data/{group}/{subj_id}/{session_id}/{acquisition}/{dcm_file} The crumb argument just before the last one will be used as folder container reference for the DICOM series. output_dir: str The root folder path where to save the tree of nifti files. Example: '/home/hansel/nifti' This function will create the same tree as the crumbs in input_crumb_path, hence for the example above the output would have the following structure: '/home/hansel/nifti/{group}/{subj_id}/{session_id}/{nifti_file}' Where {nifti_file} will take the name from the {acquisition} or from the patterns in ACQ_PATTERNS in `config.py` file. regex: str The regular expression syntax you may want to set in the Crumbs. See hansel.Crumb documentation for this. ncpus: int this says the number of processes that will be launched for dcm2nii in parallel. """ from boyle.dicom.convert import convert_dcm2nii input_dir = os.path.expanduser(input_crumb_path) output_dir = os.path.expanduser(output_dir) if not os.path.exists(output_dir): log.info('Creating output folder {}.'.format(output_dir)) os.makedirs(output_dir) else: log.info('Output folder {} already exists, this will overwrite/merge ' 'whatever is inside.'.format(output_dir)) input_dir = Crumb(input_dir, regex=regex, ignore_list=['.*']) if not input_dir.has_crumbs(): raise ValueError( 'I am almost sure that this cannot work if you do not ' 'use crumb arguments in the input path, got {}.'.format(input_dir)) acq_folder_arg, last_in_arg = tuple(input_dir.all_args())[-2:] out_arg_names = [ '{' + arg + '}' for arg in tuple(input_dir.all_args())[:-1] ] output_dir = Crumb(os.path.join(output_dir, *out_arg_names), regex=regex, ignore_list=['.*']) src_dst = [] acquisitions = input_dir.ls(acq_folder_arg, make_crumbs=True) for acq in acquisitions: out_args = acq.arg_values.copy() acq_out = output_dir.replace(**out_args) out_dir = os.path.dirname(acq_out.path) out_file = os.path.basename(acq_out.path) + '.nii.gz' os.makedirs(out_dir, exist_ok=True) src_dst.append((acq.split()[0], out_dir, out_file)) if ncpus > 1: import multiprocessing as mp pool = mp.Pool(processes=ncpus) results = [ pool.apply_async(convert_dcm2nii, args=(dr, ss, dst)) for dr, ss, dst in src_dst ] _ = [p.get() for p in results] else: _ = [convert_dcm2nii(path, sess, dst) for path, sess, dst in src_dst]
def intersection(crumb1: hansel.Crumb, crumb2: hansel.Crumb, on: Iterator[str] = None) -> List[str]: """Return an 'inner join' of both given Crumbs, i.e., will return a list of Crumbs with common values for the common arguments of both crumbs. If `on` is None, will use all the common arguments names of both crumbs. Otherwise will use only the elements of `on`. All its items must be in both crumbs. Returns ------- arg_names: list The matching items. Parameters ---------- crumb1: hansel.Crumb crumb2: hansel.Crumb on: str or list of str Crumb argument names common to both input crumbs. Raises ------ ValueError: If an element of `on` does not exists in either `list1` or `list2`. KeyError: If the result is empty. Returns ------- inner_join: list[hansel.Crumb] Notes ----- Use with care, ideally the argument matches should be in the same order in both crumbs. Both crumbs must have at least one matching identifier argument and one of those must be the one in `on`. """ if isinstance(on, str): on = [on] arg_names = list( _get_matching_items(list(crumb1.all_args()), list(crumb2.all_args()), items=on)) if not arg_names: raise KeyError( "Could not find matching arguments between {} and {} limited by {}." .format(list(crumb1.all_args()), list(crumb2.all_args()), on)) maps1 = joint_value_map(crumb1, arg_names, check_exists=True) maps2 = joint_value_map(crumb2, arg_names, check_exists=True) intersect = set(maps1) & (set(maps2)) return sorted(list(intersect))