def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None: """ The bidscoiner plugin to add the PET meta data in the excel file to the json-file :param session: The full-path name of the subject/session source folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsses: The full-path name of the BIDS output `ses-` folder :return: Nothing """ # Get started and see what data format we have plugin = {'petxls2bids': bidsmap['Options']['plugins']['petxls2bids']} datasource = bids.get_datasource(session, plugin) dataformat = datasource.dataformat if not dataformat: LOGGER.info(f"No {__name__} sourcedata found in: {session}") return n = 0 for file in sorted(session.iterdir()): if is_sourcefile(file): # Check if there is only one Excel file and one sidecar file (as expected in PET) n += 1 if n > 1: LOGGER.error(f"Found ambiguous PET meta data file: {file}") return # Load the Excel data. TODO: Discuss this with Anthony and Cyril metadata = pd.read_excel(file) # Load the json sidecar data (there should be only one) jsonfile = sorted((bidsses / 'pet').rglob('*.json')) if len(jsonfile) > 1: LOGGER.error(f"Found ambiguous PET sidecar files: {jsonfile}") return with jsonfile[0].open('r') as json_fid: jsondata = json.load(json_fid) # Add the meta-data. TODO: implement this once we know how `metadata` is organised for key in metadata: jsondata[key] = metadata[key] # Save the meta-data to the json sidecar file with jsonfile[0].open('w') as json_fid: json.dump(jsondata, json_fid, indent=4)
def scanpersonals(bidsmap: dict, session: Path, personals: dict) -> bool: """ Converts the session source-files into BIDS-valid nifti-files in the corresponding bidsfolder and extracts personals (e.g. Age, Sex) from the source header :param bidsmap: The study bidsmap with the mapping heuristics :param session: The full-path name of the subject/session source file/folder :param personals: The dictionary with the personal information :return: True if successful """ # Get valid BIDS subject/session identifiers from the (first) DICOM- or PAR/XML source file datasource = bids.get_datasource(session, bidsmap['Options']['plugins']) dataformat = datasource.dataformat if not datasource.dataformat: LOGGER.info(f"No supported datasources found in '{session}'") return False # Collect personal data from a source header (PAR/XML does not contain personal info) if dataformat in ('DICOM', 'Twix'): personals['sex'] = datasource.attributes('PatientSex') personals['size'] = datasource.attributes('PatientSize') personals['weight'] = datasource.attributes('PatientWeight') age = datasource.attributes('PatientAge') # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY elif dataformat=='Pfile': sex = datasource.attributes('rhe_patsex') if sex=='0': personals['sex'] = 'O' elif sex=='1': personals['sex'] = 'M' elif sex=='2': personals['sex'] = 'F' age = dateutil.parser.parse(datasource.attributes('rhr_rh_scan_date')) - dateutil.parser.parse(datasource.attributes('rhe_dateofbirth')) age = str(age.days) + 'D' else: return False if age.endswith('D'): age = float(age.rstrip('D')) / 365.2524 elif age.endswith('W'): age = float(age.rstrip('W')) / 52.1775 elif age.endswith('M'): age = float(age.rstrip('M')) / 12 elif age.endswith('Y'): age = float(age.rstrip('Y')) if age: if bidsmap['Options']['plugins']['dcm2niix2bids'].get('anon', 'y') in ('y','yes'): age = int(float(age)) personals['age'] = str(age) return True
def bidsmapper_plugin(session: Path, bidsmap_new: dict, bidsmap_old: dict, template: dict, store: dict) -> None: """ All the logic to map the DICOM/PAR source fields onto bids labels go into this function :param session: The full-path name of the subject/session raw data source folder :param bidsmap_new: The new study bidsmap that we are building :param bidsmap_old: The previous study bidsmap that has precedence over the template bidsmap :param template: The template bidsmap with the default heuristics :param store: The paths of the source- and target-folder :return: """ # Get started plugin = { 'dcm2niix2bids': bidsmap_new['Options']['plugins']['dcm2niix2bids'] } datasource = bids.get_datasource(session, plugin) dataformat = datasource.dataformat if not dataformat: return # Collect the different DICOM/PAR source files for all runs in the session sourcefiles = [] if dataformat == 'DICOM': for sourcedir in bidscoin.lsdirs(session): for n in range( 1 ): # Option: Use range(2) to scan two files and catch e.g. magnitude1/2 fieldmap files that are stored in one Series folder (but bidscoiner sees only the first file anyhow and it makes bidsmapper 2x slower :-() sourcefile = bids.get_dicomfile(sourcedir, n) if sourcefile.name: sourcefiles.append(sourcefile) elif dataformat == 'PAR': sourcefiles = bids.get_parfiles(session) else: LOGGER.exception(f"Unsupported dataformat '{dataformat}'") # Update the bidsmap with the info from the source files for sourcefile in sourcefiles: # Input checks if not sourcefile.name or (not template[dataformat] and not bidsmap_old[dataformat]): LOGGER.error( f"No {dataformat} source information found in the bidsmap and template for: {sourcefile}" ) return # See if we can find a matching run in the old bidsmap datasource = bids.DataSource(sourcefile, plugin, dataformat) run, match = bids.get_matching_run(datasource, bidsmap_old) # If not, see if we can find a matching run in the template if not match: run, _ = bids.get_matching_run(datasource, template) # See if we have collected the run somewhere in our new bidsmap if not bids.exist_run(bidsmap_new, '', run): # Communicate with the user if the run was not present in bidsmap_old or in template, i.e. that we found a new sample if not match: LOGGER.info( f"Discovered '{datasource.datatype}' {dataformat} sample: {sourcefile}" ) # Now work from the provenance store if store: targetfile = store['target'] / sourcefile.relative_to( store['source']) targetfile.parent.mkdir(parents=True, exist_ok=True) run['provenance'] = str(shutil.copy2(sourcefile, targetfile)) # Copy the filled-in run over to the new bidsmap bids.append_run(bidsmap_new, run) else: # Communicate with the user if the run was already present in bidsmap_old or in template LOGGER.debug( f"Known '{datasource.datatype}' {dataformat} sample: {sourcefile}" )
def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None: """ The bidscoiner plugin to convert the session DICOM and PAR/REC source-files into BIDS-valid nifti-files in the corresponding bids session-folder and extract personals (e.g. Age, Sex) from the source header :param session: The full-path name of the subject/session source folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsses: The full-path name of the BIDS output `sub-/ses-` folder :return: Nothing """ # Get the subject identifiers and the BIDS root folder from the bidsses folder if bidsses.name.startswith('ses-'): bidsfolder = bidsses.parent.parent subid = bidsses.parent.name sesid = bidsses.name else: bidsfolder = bidsses.parent subid = bidsses.name sesid = '' # Get started and see what dataformat we have options = bidsmap['Options']['plugins']['dcm2niix2bids'] datasource = bids.get_datasource(session, {'dcm2niix2bids': options}) dataformat = datasource.dataformat if not dataformat: LOGGER.info(f"No {__name__} sourcedata found in: {session}") return # Make a list of all the data sources / runs manufacturer = 'UNKNOWN' sources = [] if dataformat == 'DICOM': sources = bidscoin.lsdirs(session) manufacturer = datasource.attributes('Manufacturer') elif dataformat == 'PAR': sources = bids.get_parfiles(session) manufacturer = 'Philips Medical Systems' else: LOGGER.exception(f"Unsupported dataformat '{dataformat}'") # Read or create a scans_table and tsv-file scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv" if scans_tsv.is_file(): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') else: scans_table = pd.DataFrame(columns=['acq_time'], dtype='str') scans_table.index.name = 'filename' # Process all the source files or run subfolders sourcefile = Path() for source in sources: # Get a sourcefile if dataformat == 'DICOM': sourcefile = bids.get_dicomfile(source) elif dataformat == 'PAR': sourcefile = source if not sourcefile.name: continue # Get a matching run from the bidsmap datasource = bids.DataSource(sourcefile, {'dcm2niix2bids': options}, dataformat) run, match = bids.get_matching_run(datasource, bidsmap, runtime=True) # Check if we should ignore this run if datasource.datatype in bidsmap['Options']['bidscoin'][ 'ignoretypes']: LOGGER.info(f"Leaving out: {source}") continue # Check if we already know this run if not match: LOGGER.error( f"Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning" ) continue LOGGER.info(f"Processing: {source}") # Create the BIDS session/datatype output folder if run['bids']['suffix'] in bids.get_derivatives(datasource.datatype): outfolder = bidsfolder / 'derivatives' / manufacturer.replace( ' ', '') / subid / sesid / datasource.datatype else: outfolder = bidsses / datasource.datatype outfolder.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run bidsname = bids.get_bidsname(subid, sesid, run, runtime=True) runindex = run['bids'].get('run', '') if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.increment_runindex(outfolder, bidsname) jsonfiles = [ (outfolder / bidsname).with_suffix('.json') ] # List -> Collect the associated json-files (for updating them later) -- possibly > 1 # Check if file already exists (-> e.g. when a static runindex is used) if (outfolder / bidsname).with_suffix('.json').is_file(): LOGGER.warning( f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!" ) for ext in ('.nii.gz', '.nii', '.json', '.tsv', '.tsv.gz', '.bval', '.bvec'): (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True) # Convert physiological log files (dcm2niix can't handle these) if run['bids']['suffix'] == 'physio': if bids.get_dicomfile( source, 2).name: # TODO: issue warning or support PAR LOGGER.warning( f"Found > 1 DICOM file in {source}, using: {sourcefile}") physiodata = physio.readphysio(sourcefile) physio.physio2tsv(physiodata, outfolder / bidsname) # Convert the source-files in the run folder to nifti's in the BIDS-folder else: command = '{command} {args} -f "{filename}" -o "{outfolder}" "{source}"'.format( command=options['command'], args=options.get('args', ''), filename=bidsname, outfolder=outfolder, source=source) if not bidscoin.run_command(command): if not list(outfolder.glob(f"{bidsname}.nii*")): continue if list(outfolder.glob(f"{bidsname}a.nii*")): LOGGER.warning( f"Unexpected variants of {outfolder/bidsname}* were produced by dcm2niix. Possibly this can be remedied by using the dcm2niix -i option (to ignore derived, localizer and 2D images)" ) # Replace uncropped output image with the cropped one if '-x y' in options.get('args', ''): for dcm2niixfile in sorted( outfolder.glob(bidsname + '*_Crop_*')): # e.g. *_Crop_1.nii.gz ext = ''.join(dcm2niixfile.suffixes) newbidsfile = str(dcm2niixfile).rsplit(ext, 1)[0].rsplit( '_Crop_', 1)[0] + ext LOGGER.info( f"Found dcm2niix _Crop_ postfix, replacing original file\n{dcm2niixfile} ->\n{newbidsfile}" ) dcm2niixfile.replace(newbidsfile) # Rename all files that got additional postfixes from dcm2niix. See: https://github.com/rordenlab/dcm2niix/blob/master/FILENAMING.md dcm2niixpostfixes = ('_c', '_i', '_Eq', '_real', '_imaginary', '_MoCo', '_t', '_Tilt', '_e', '_ph', '_ADC', '_fieldmaphz') dcm2niixfiles = sorted( set([ dcm2niixfile for dcm2niixpostfix in dcm2niixpostfixes for dcm2niixfile in outfolder.glob( f"{bidsname}*{dcm2niixpostfix}*.nii*") ])) if not jsonfiles[0].is_file( ) and dcm2niixfiles: # Possibly renamed by dcm2niix, e.g. with multi-echo data (but not always for the first echo) jsonfiles.pop(0) for dcm2niixfile in dcm2niixfiles: ext = ''.join(dcm2niixfile.suffixes) postfixes = str(dcm2niixfile).split(bidsname)[1].rsplit( ext)[0].split('_')[1:] newbidsname = dcm2niixfile.name # Strip the additional postfixes and assign them to bids entities in the for-loop below for postfix in postfixes: # dcm2niix postfixes _c%d, _e%d and _ph (and any combination of these in that order) are for multi-coil data, multi-echo data and phase data # Patch the echo entity in the newbidsname with the dcm2niix echo info # NB: We can't rely on the bids-entity info here because manufacturers can e.g. put multiple echos in one series / run-folder if 'echo' in run['bids'] and postfix.startswith('e'): echonr = f"_{postfix}".replace('_e', '') # E.g. postfix='e1' if not echonr: echonr = '1' if echonr.isnumeric(): newbidsname = bids.insert_bidskeyval( newbidsname, 'echo', echonr.lstrip('0') ) # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness else: LOGGER.error( f"Unexpected postix '{postfix}' found in {dcm2niixfile}" ) newbidsname = bids.get_bidsvalue( newbidsname, 'dummy', postfix ) # Append the unknown postfix to the acq-label # Patch the phase entity in the newbidsname with the dcm2niix mag/phase info elif 'part' in run['bids'] and postfix in ( 'ph', 'real', 'imaginary' ): # e.g. part: ['', 'mag', 'phase', 'real', 'imag', 0] if postfix == 'ph': newbidsname = bids.insert_bidskeyval( newbidsname, 'part', 'phase') if postfix == 'real': newbidsname = bids.insert_bidskeyval( newbidsname, 'part', 'real') if postfix == 'imaginary': newbidsname = bids.insert_bidskeyval( newbidsname, 'part', 'imag') # Patch fieldmap images (NB: datatype=='fmap' is too broad, see the fmap.yaml file) elif run['bids']['suffix'] in bids.bidsdatatypes['fmap'][0][ 'suffixes']: # i.e. in ('magnitude','magnitude1','magnitude2','phase1','phase2','phasediff','fieldmap'). TODO: Make this robust for future BIDS versions if len(dcm2niixfiles) not in ( 1, 2, 3, 4 ): # Phase / echo data may be stored in the same data source / run folder LOGGER.debug( f"Unknown fieldmap {outfolder/bidsname} for '{postfix}'" ) newbidsname = newbidsname.replace( '_magnitude1a', '_magnitude2' ) # First catch this potential weird / rare case newbidsname = newbidsname.replace( '_magnitude1_pha', '_phase2' ) # First catch this potential weird / rare case newbidsname = newbidsname.replace( '_magnitude1_e1', '_magnitude1' ) # Case 2 = Two phase and magnitude images newbidsname = newbidsname.replace( '_magnitude1_e2', '_magnitude2' ) # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_magnitude2_e1', '_magnitude1' ) # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_magnitude2_e2', '_magnitude2') # Case 2 if len(dcm2niixfiles) in ( 2, 3 ): # Case 1 = One or two magnitude + one phasediff image newbidsname = newbidsname.replace( '_magnitude1_ph', '_phasediff') newbidsname = newbidsname.replace( '_magnitude2_ph', '_phasediff') newbidsname = newbidsname.replace( '_phasediff_e1', '_phasediff') # Case 1 newbidsname = newbidsname.replace( '_phasediff_e2', '_phasediff') # Case 1 newbidsname = newbidsname.replace( '_phasediff_ph', '_phasediff') # Case 1 newbidsname = newbidsname.replace( '_magnitude1_ph', '_phase1' ) # Case 2: One or two magnitude and phase images in one folder / datasource newbidsname = newbidsname.replace( '_magnitude2_ph', '_phase2' ) # Case 2: Two magnitude + two phase images in one folder / datasource newbidsname = newbidsname.replace( '_phase1_e1', '_phase1') # Case 2 newbidsname = newbidsname.replace( '_phase1_e2', '_phase2' ) # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_phase2_e1', '_phase1' ) # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_phase2_e2', '_phase2') # Case 2 newbidsname = newbidsname.replace( '_phase1_ph', '_phase1' ) # Case 2: One or two magnitude and phase images in one folder / datasource newbidsname = newbidsname.replace( '_phase2_ph', '_phase2' ) # Case 2: Two magnitude + two phase images in one folder / datasource newbidsname = newbidsname.replace( '_magnitude_e1', '_magnitude' ) # Case 3 = One magnitude + one fieldmap image if len(dcm2niixfiles) == 2: newbidsname = newbidsname.replace( '_fieldmap_e1', '_magnitude' ) # Case 3: One magnitude + one fieldmap image in one folder / datasource newbidsname = newbidsname.replace( '_fieldmap_e1', '_fieldmap') # Case 3 newbidsname = newbidsname.replace( '_magnitude_ph', '_fieldmap' ) # Case 3: One magnitude + one fieldmap image in one folder / datasource newbidsname = newbidsname.replace( '_fieldmap_ph', '_fieldmap') # Case 3 # Append the dcm2niix info to acq-label, may need to be improved / elaborated for future BIDS standards, supporting multi-coil data else: newbidsname = bids.get_bidsvalue( newbidsname, 'dummy', postfix) # Remove the added postfix from the new bidsname newbidsname = newbidsname.replace(f"_{postfix}_", '_') # If it is not last newbidsname = newbidsname.replace(f"_{postfix}.", '.') # If it is last # The ADC images are not BIDS compliant if postfix == 'ADC': LOGGER.warning( f"The {newbidsname} image is most likely not BIDS-compliant -- you can probably delete it safely and update the scants.tsv file" ) # Save the nifti file with a new name if runindex.startswith('<<') and runindex.endswith('>>'): newbidsname = bids.increment_runindex( outfolder, newbidsname, '' ) # Update the runindex now that the acq-label has changed newbidsfile = outfolder / newbidsname LOGGER.info( f"Found dcm2niix {postfixes} postfixes, renaming\n{dcm2niixfile} ->\n{newbidsfile}" ) if newbidsfile.is_file(): LOGGER.warning( f"Overwriting existing {newbidsfile} file -- check your results carefully!" ) dcm2niixfile.replace(newbidsfile) # Rename all associated files (i.e. the json-, bval- and bvec-files) oldjsonfile = dcm2niixfile.with_suffix('').with_suffix('.json') newjsonfile = newbidsfile.with_suffix('').with_suffix('.json') if not oldjsonfile.is_file(): LOGGER.warning( f"Unexpected file conversion result: {oldjsonfile} not found" ) else: if oldjsonfile in jsonfiles: jsonfiles.remove(oldjsonfile) if newjsonfile not in jsonfiles: jsonfiles.append(newjsonfile) for oldfile in outfolder.glob( dcm2niixfile.with_suffix('').stem + '.*'): oldfile.replace( newjsonfile.with_suffix(''.join(oldfile.suffixes))) # Copy over the source meta-data metadata = bids.copymetadata(sourcefile, outfolder / bidsname, options.get('meta', [])) # Loop over and adapt all the newly produced json sidecar-files and write to the scans.tsv file (NB: assumes every nifti-file comes with a json-file) for jsonfile in sorted(set(jsonfiles)): # Load the json meta-data with jsonfile.open('r') as json_fid: jsondata = json.load(json_fid) # Add all the source meta data to the meta-data for metakey, metaval in metadata.items(): if jsondata.get(metakey) == metaval: LOGGER.warning( f"Replacing {metakey} values in {jsonfile}: {jsondata[metakey]} -> {metaval}" ) jsondata[metakey] = metaval # Add all the run meta data to the meta-data. NB: the dynamic `IntendedFor` value is handled separately later for metakey, metaval in run['meta'].items(): if metakey != 'IntendedFor': metaval = datasource.dynamicvalue(metaval, cleanup=False, runtime=True) try: metaval = ast.literal_eval(str(metaval)) except (ValueError, SyntaxError): pass LOGGER.info( f"Adding '{metakey}: {metaval}' to: {jsonfile}") if not metaval: metaval = None jsondata[metakey] = metaval # Remove unused (but added from the template) B0FieldIdentifiers/Sources if not jsondata.get('B0FieldSource'): jsondata.pop('B0FieldSource', None) if not jsondata.get('B0FieldIdentifier'): jsondata.pop('B0FieldIdentifier', None) # Save the meta-data to the json sidecar-file with jsonfile.open('w') as json_fid: json.dump(jsondata, json_fid, indent=4) # Parse the acquisition time from the source header or else from the json file (NB: assuming the source file represents the first acquisition) outputfile = [ file for file in jsonfile.parent.glob(jsonfile.stem + '.*') if file.suffix in ('.nii', '.gz') ] # Find the corresponding nifti/tsv.gz file (there should be only one, let's not make assumptions about the .gz extension) if not outputfile: LOGGER.exception( f"No data-file found with {jsonfile} when updating {scans_tsv}" ) elif datasource.datatype not in bidsmap['Options']['bidscoin'][ 'bidsignore'] and not run['bids'][ 'suffix'] in bids.get_derivatives(datasource.datatype): acq_time = '' if dataformat == 'DICOM': acq_time = f"{datasource.attributes('AcquisitionDate')}T{datasource.attributes('AcquisitionTime')}" elif dataformat == 'PAR': acq_time = datasource.attributes('exam_date') if not acq_time or acq_time == 'T': acq_time = f"1925-01-01T{jsondata.get('AcquisitionTime','')}" try: acq_time = dateutil.parser.parse(acq_time) if options.get('anon', 'y') in ('y', 'yes'): acq_time = acq_time.replace( year=1925, month=1, day=1 ) # Privacy protection (see BIDS specification) acq_time = acq_time.isoformat() except Exception as jsonerror: LOGGER.warning( f"Could not parse the acquisition time from: {sourcefile}\n{jsonerror}" ) acq_time = 'n/a' scanpath = outputfile[0].relative_to(bidsses) scans_table.loc[scanpath.as_posix(), 'acq_time'] = acq_time # Write the scans_table to disk LOGGER.info(f"Writing acquisition time data to: {scans_tsv}") scans_table.sort_values(by=['acq_time', 'filename'], inplace=True) scans_table.replace('', 'n/a').to_csv(scans_tsv, sep='\t', encoding='utf-8', na_rep='n/a') # Collect personal data from a source header (PAR/XML does not contain personal info) personals = {} if sesid and 'session_id' not in personals: personals['session_id'] = sesid personals['age'] = '' if dataformat == 'DICOM': age = datasource.attributes( 'PatientAge' ) # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY if age.endswith('D'): age = float(age.rstrip('D')) / 365.2524 elif age.endswith('W'): age = float(age.rstrip('W')) / 52.1775 elif age.endswith('M'): age = float(age.rstrip('M')) / 12 elif age.endswith('Y'): age = float(age.rstrip('Y')) if age: if options.get('anon', 'y') in ('y', 'yes'): age = int(float(age)) personals['age'] = str(age) personals['sex'] = datasource.attributes('PatientSex') personals['size'] = datasource.attributes('PatientSize') personals['weight'] = datasource.attributes('PatientWeight') # Store the collected personals in the participants_table participants_tsv = bidsfolder / 'participants.tsv' if participants_tsv.is_file(): participants_table = pd.read_csv(participants_tsv, sep='\t', dtype=str) participants_table.set_index(['participant_id'], verify_integrity=True, inplace=True) else: participants_table = pd.DataFrame() participants_table.index.name = 'participant_id' if subid in participants_table.index and 'session_id' in participants_table.keys( ) and participants_table.loc[subid, 'session_id']: return # Only take data from the first session -> BIDS specification for key in personals: # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file if key not in participants_table or participants_table[key].isnull( ).get(subid, True) or participants_table[key].get(subid) == 'n/a': participants_table.loc[subid, key] = personals[key] # Write the collected data to the participants tsv-file LOGGER.info(f"Writing {subid} subject data to: {participants_tsv}") participants_table.replace('', 'n/a').to_csv(participants_tsv, sep='\t', encoding='utf-8', na_rep='n/a')
def bidscoiner(rawfolder: str, bidsfolder: str, subjects: list = (), force: bool = False, participants: bool = False, bidsmapfile: str = 'bidsmap.yaml') -> None: """ Main function that processes all the subjects and session in the sourcefolder and uses the bidsmap.yaml file in bidsfolder/code/bidscoin to cast the data into the BIDS folder. :param rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param bidsfolder: The name of the BIDS root folder :param subjects: List of selected subjects / participants (i.e. sub-# names / folders) to be processed (the sub- prefix can be removed). Otherwise all subjects in the sourcefolder will be selected :param force: If True, subjects will be processed, regardless of existing folders in the bidsfolder. Otherwise existing folders will be skipped :param participants: If True, subjects in particpants.tsv will not be processed (this could be used e.g. to protect these subjects from being reprocessed), also when force=True :param bidsmapfile: The name of the bidsmap YAML-file. If the bidsmap pathname is relative (i.e. no "/" in the name) then it is assumed to be located in bidsfolder/code/bidscoin :return: Nothing """ # Input checking & defaults rawfolder = Path(rawfolder).resolve() bidsfolder = Path(bidsfolder).resolve() bidsmapfile = Path(bidsmapfile) # Start logging bidscoin.setup_logging(bidsfolder / 'code' / 'bidscoin' / 'bidscoiner.log') LOGGER.info('') LOGGER.info( f"-------------- START BIDScoiner {localversion}: BIDS {bidscoin.bidsversion()} ------------" ) LOGGER.info( f">>> bidscoiner sourcefolder={rawfolder} bidsfolder={bidsfolder} subjects={subjects} force={force} participants={participants} bidsmap={bidsmapfile}" ) # Create a code/bidscoin subfolder (bidsfolder / 'code' / 'bidscoin').mkdir(parents=True, exist_ok=True) # Create a dataset description file if it does not exist dataset_file = bidsfolder / 'dataset_description.json' generatedby = [{ "Name": "BIDScoin", "Version": localversion, "CodeURL": "https://github.com/Donders-Institute/bidscoin" }] if not dataset_file.is_file(): LOGGER.info(f"Creating dataset description file: {dataset_file}") dataset_description = { "Name": "REQUIRED. Name of the dataset", "GeneratedBy": generatedby, "BIDSVersion": str(bidscoin.bidsversion()), "DatasetType": "raw", "License": "RECOMMENDED. The license for the dataset. The use of license name abbreviations is RECOMMENDED for specifying a license. The corresponding full license text MAY be specified in an additional LICENSE file", "Authors": [ "OPTIONAL. List of individuals who contributed to the creation/curation of the dataset" ], "Acknowledgements": "OPTIONAL. Text acknowledging contributions of individuals or institutions beyond those listed in Authors or Funding", "HowToAcknowledge": "OPTIONAL. Instructions how researchers using this dataset should acknowledge the original authors. This field can also be used to define a publication that should be cited in publications that use the dataset", "Funding": ["OPTIONAL. List of sources of funding (grant numbers)"], "EthicsApprovals": [ "OPTIONAL. List of ethics committee approvals of the research protocols and/or protocol identifiers" ], "ReferencesAndLinks": [ "OPTIONAL. List of references to publication that contain information on the dataset, or links", "https://github.com/Donders-Institute/bidscoin" ], "DatasetDOI": "OPTIONAL. The Document Object Identifier of the dataset (not the corresponding paper)" } else: with dataset_file.open('r') as fid: dataset_description = json.load(fid) if 'BIDScoin' not in [ generatedby_['Name'] for generatedby_ in dataset_description.get('GeneratedBy', []) ]: LOGGER.info(f"Adding {generatedby} to {dataset_file}") dataset_description['GeneratedBy'] = dataset_description.get( 'GeneratedBy', []) + generatedby with dataset_file.open('w') as fid: json.dump(dataset_description, fid, indent=4) # Create a README file if it does not exist readme_file = bidsfolder / 'README' if not readme_file.is_file(): LOGGER.info(f"Creating README file: {readme_file}") readme_file.write_text( f"A free form text ( README ) describing the dataset in more details that SHOULD be provided\n\n" f"The raw BIDS data was created using BIDScoin {localversion}\n" f"All provenance information and settings can be found in ./code/bidscoin\n" f"For more information see: https://github.com/Donders-Institute/bidscoin\n" ) # Get the bidsmap heuristics from the bidsmap YAML-file bidsmap, _ = bids.load_bidsmap(bidsmapfile, bidsfolder / 'code' / 'bidscoin') dataformats = [ dataformat for dataformat in bidsmap if dataformat and dataformat not in ('Options', 'PlugIns') ] # Handle legacy bidsmaps (-> 'PlugIns') if not bidsmap: LOGGER.error( f"No bidsmap file found in {bidsfolder}. Please run the bidsmapper first and/or use the correct bidsfolder" ) return # Load the data conversion plugins plugins = [ bidscoin.import_plugin(plugin, ('bidscoiner_plugin', )) for plugin, options in bidsmap['Options']['plugins'].items() ] plugins = [plugin for plugin in plugins if plugin] # Filter the empty items from the list if not plugins: LOGGER.warning( f"The plugins listed in your bidsmap['Options'] did not have a usable `bidscoiner_plugin` function, nothing to do" ) LOGGER.info('-------------- FINISHED! ------------') LOGGER.info('') return # Append options to the .bidsignore file bidsignore_items = [ item.strip() for item in bidsmap['Options']['bidscoin']['bidsignore'].split(';') ] bidsignore_file = bidsfolder / '.bidsignore' if bidsignore_items: LOGGER.info(f"Writing {bidsignore_items} entries to {bidsignore_file}") if bidsignore_file.is_file(): bidsignore_items += bidsignore_file.read_text().splitlines() with bidsignore_file.open('w') as bidsignore: for item in set(bidsignore_items): bidsignore.write(item + '\n') # Get the table & dictionary of the subjects that have been processed participants_tsv = bidsfolder / 'participants.tsv' if participants_tsv.is_file(): participants_table = pd.read_csv(participants_tsv, sep='\t') participants_table.set_index(['participant_id'], verify_integrity=True, inplace=True) else: participants_table = pd.DataFrame() participants_table.index.name = 'participant_id' # Get the list of subjects subprefix = bidsmap['Options']['bidscoin']['subprefix'].replace('*', '') sesprefix = bidsmap['Options']['bidscoin']['sesprefix'].replace('*', '') if not subjects: subjects = bidscoin.lsdirs( rawfolder, (subprefix if subprefix != '*' else '') + '*') if not subjects: LOGGER.warning(f"No subjects found in: {rawfolder/subprefix}*") else: subjects = [ rawfolder / (subprefix + re.sub(f"^{subprefix}", '', subject)) for subject in subjects ] # Make sure there is a sub-prefix # Loop over all subjects and sessions and convert them using the bidsmap entries with logging_redirect_tqdm(): for n, subject in enumerate( tqdm(subjects, unit='subject', leave=False), 1): LOGGER.info( f"------------------- Subject {n}/{len(subjects)} -------------------" ) if participants and subject.name in list(participants_table.index): LOGGER.info( f"Skipping subject: {subject} ({n}/{len(subjects)})") continue if not subject.is_dir(): LOGGER.warning( f"The '{subject}' subject folder does not exist") continue sessions = bidscoin.lsdirs( subject, (sesprefix if sesprefix != '*' else '') + '*') if not sessions or (subject / 'DICOMDIR').is_file(): sessions = [subject] for session in sessions: # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file sesfolders, unpacked = bids.unpack(session) for sesfolder in sesfolders: # Check if we should skip the session-folder datasource = bids.get_datasource( sesfolder, bidsmap['Options']['plugins']) if not datasource.dataformat: LOGGER.info( f"No coinable datasources found in '{sesfolder}'") continue subid = bidsmap[datasource.dataformat]['subject'] sesid = bidsmap[datasource.dataformat]['session'] subid, sesid = datasource.subid_sesid( subid, sesid if sesid else '') bidssession = bidsfolder / subid / sesid # TODO: Support DICOMDIR with multiple subjects (as in PYDICOMDIR) if not force and bidssession.is_dir(): datatypes = [] for dataformat in dataformats: for datatype in bidscoin.lsdirs( bidssession ): # See what datatypes we already have in the bids session-folder if datatype.iterdir( ) and bidsmap[dataformat].get( datatype.name ): # See if we are going to add data for this datatype datatypes.append(datatype.name) if datatypes: LOGGER.info( f"Skipping processed session: {bidssession} already has {datatypes} data (you can carefully use the -f option to overrule)" ) continue LOGGER.info(f"Coining datasources in: {sesfolder}") if bidssession.is_dir(): LOGGER.warning( f"Existing BIDS output-directory found, which may result in duplicate data (with increased run-index). Make sure {bidssession} was cleaned-up from old data before (re)running the bidscoiner" ) bidssession.mkdir(parents=True, exist_ok=True) # Run the bidscoiner plugins for module in plugins: LOGGER.info( f"Executing plugin: {Path(module.__file__).name}") module.bidscoiner_plugin(sesfolder, bidsmap, bidssession) # Add the special fieldmap metadata (IntendedFor, B0FieldIdentifier, TE, etc) addmetadata(bidssession, subid, sesid) # Clean-up the temporary unpacked data if unpacked: shutil.rmtree(sesfolder) # Re-read the participants_table and store the collected personals in the json sidecar-file if participants_tsv.is_file(): participants_table = pd.read_csv(participants_tsv, sep='\t') participants_table.set_index(['participant_id'], verify_integrity=True, inplace=True) participants_json = participants_tsv.with_suffix('.json') participants_dict = {} if participants_json.is_file(): with participants_json.open('r') as json_fid: participants_dict = json.load(json_fid) if not participants_dict.get('participant_id'): participants_dict['participant_id'] = { 'Description': 'Unique participant identifier' } if not participants_dict.get( 'session_id') and 'session_id' in participants_table.columns: participants_dict['session_id'] = {'Description': 'Session identifier'} newkey = False for col in participants_table.columns: if col not in participants_dict: newkey = True participants_dict[col] = dict( LongName='Long (unabbreviated) name of the column', Description='Description of the the column', Levels=dict( Key= 'Value (This is for categorical variables: a dictionary of possible values (keys) and their descriptions (values))' ), Units= 'Measurement units. [<prefix symbol>]<unit symbol> format following the SI standard is RECOMMENDED' ) # Write the collected data to the participant files if newkey: LOGGER.info(f"Writing subject meta data to: {participants_json}") with participants_json.open('w') as json_fid: json.dump(participants_dict, json_fid, indent=4) LOGGER.info('-------------- FINISHED! ------------') LOGGER.info('') bidscoin.reporterrors()
def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None: """ This wrapper funtion around spec2nii converts the MRS data in the session folder and saves it in the bidsfolder. Each saved datafile should be accompanied with a json sidecar file. The bidsmap options for this plugin can be found in: bidsmap_new['Options']['plugins']['spec2nii2bids'] :param session: The full-path name of the subject/session raw data source folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsses: The full-path name of the BIDS output `sub-/ses-` folder :return: Nothing """ # Get the subject identifiers and the BIDS root folder from the bidsses folder if bidsses.name.startswith('ses-'): bidsfolder = bidsses.parent.parent subid = bidsses.parent.name sesid = bidsses.name else: bidsfolder = bidsses.parent subid = bidsses.name sesid = '' # Get started and see what dataformat we have options = bidsmap['Options']['plugins']['spec2nii2bids'] datasource = bids.get_datasource(session, {'spec2nii2bids': options}) dataformat = datasource.dataformat sourcefiles = [file for file in session.rglob('*') if is_sourcefile(file)] if not sourcefiles: LOGGER.info(f"No {__name__} sourcedata found in: {session}") return # Read or create a scans_table and tsv-file scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv" if scans_tsv.is_file(): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') else: scans_table = pd.DataFrame(columns=['acq_time'], dtype='str') scans_table.index.name = 'filename' # Loop over all MRS source data files and convert them to BIDS for sourcefile in sourcefiles: # Get a data source, a matching run from the bidsmap datasource = bids.DataSource(sourcefile, {'spec2nii2bids': options}) run, index = bids.get_matching_run(datasource, bidsmap, runtime=True) # Check if we should ignore this run if datasource.datatype in bidsmap['Options']['bidscoin'][ 'ignoretypes']: LOGGER.info(f"Leaving out: {sourcefile}") continue # Check that we know this run if index is None: LOGGER.error( f"Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete the MRS output data in {bidsses} to solve this warning" ) continue LOGGER.info(f"Processing: {sourcefile}") # Create the BIDS session/datatype output folder outfolder = bidsses / datasource.datatype outfolder.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run bidsname = bids.get_bidsname(subid, sesid, run, runtime=True) runindex = run['bids'].get('run', '') if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.increment_runindex(outfolder, bidsname) jsonfile = (outfolder / bidsname).with_suffix('.json') # Check if file already exists (-> e.g. when a static runindex is used) if jsonfile.is_file(): LOGGER.warning( f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!" ) for ext in ('.nii.gz', '.nii', '.json', '.bval', '.bvec', '.tsv.gz'): (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True) # Run spec2nii to convert the source-files in the run folder to nifti's in the BIDS-folder arg = '' args = options.get('args', OPTIONS['args']) if args is None: args = '' if dataformat == 'SPAR': dformat = 'philips' arg = f'"{sourcefile.with_suffix(".SDAT")}"' elif dataformat == 'Twix': dformat = 'twix' arg = '-e image' elif dataformat == 'Pfile': dformat = 'ge' else: LOGGER.exception(f"Unsupported dataformat: {dataformat}") command = options.get("command", "spec2nii") if not bidscoin.run_command( f'{command} {dformat} -j -f "{bidsname}" -o "{outfolder}" {args} {arg} "{sourcefile}"' ): if not list(outfolder.glob(f"{bidsname}.nii*")): continue # Load and adapt the newly produced json sidecar-file (NB: assumes every nifti-file comes with a json-file) with jsonfile.open('r') as json_fid: jsondata = json.load(json_fid) # Copy over the source meta-data metadata = bids.copymetadata(sourcefile, outfolder / bidsname, options.get('meta', [])) for metakey, metaval in metadata.items(): if jsondata.get(metakey) == metaval: LOGGER.warning( f"Replacing {metakey} values in {jsonfile}: {jsondata[metakey]} -> {metaval}" ) jsondata[metakey] = metaval # Add all the meta data to the json-file for metakey, metaval in run['meta'].items(): metaval = datasource.dynamicvalue(metaval, cleanup=False, runtime=True) try: metaval = ast.literal_eval(str(metaval)) except (ValueError, SyntaxError): pass LOGGER.info(f"Adding '{metakey}: {metaval}' to: {jsonfile}") if not metaval: metaval = None jsondata[metakey] = metaval # Save the meta data to disk with jsonfile.open('w') as json_fid: json.dump(jsondata, json_fid, indent=4) # Parse the acquisition time from the source header or else from the json file (NB: assuming the source file represents the first acquisition) if datasource.datatype not in bidsmap['Options']['bidscoin'][ 'bidsignore'] and not run['bids'][ 'suffix'] in bids.get_derivatives(datasource.datatype): acq_time = '' if dataformat == 'SPAR': acq_time = datasource.attributes('scan_date') elif dataformat == 'Twix': acq_time = f"{datasource.attributes('AcquisitionDate')}T{datasource.attributes('AcquisitionTime')}" elif dataformat == 'Pfile': acq_time = f"{datasource.attributes('rhr_rh_scan_date')}T{datasource.attributes('rhr_rh_scan_time')}" if not acq_time or acq_time == 'T': acq_time = f"1925-01-01T{jsondata.get('AcquisitionTime','')}" try: acq_time = dateutil.parser.parse(acq_time) if options.get('anon', OPTIONS['anon']) in ('y', 'yes'): acq_time = acq_time.replace( year=1925, month=1, day=1) # Privacy protection (see BIDS specification) acq_time = acq_time.isoformat() except Exception as jsonerror: LOGGER.warning( f"Could not parse the acquisition time from: {sourcefile}\n{jsonerror}" ) acq_time = 'n/a' scans_table.loc[jsonfile.with_suffix('.nii.gz'). relative_to(bidsses).as_posix(), 'acq_time'] = acq_time # Write the scans_table to disk LOGGER.info(f"Writing acquisition time data to: {scans_tsv}") scans_table.sort_values(by=['acq_time', 'filename'], inplace=True) scans_table.replace('', 'n/a').to_csv(scans_tsv, sep='\t', encoding='utf-8', na_rep='n/a') # Collect personal data from a source header personals = {} if sesid and 'session_id' not in personals: personals['session_id'] = sesid age = '' if sesid and 'session_id' not in personals: personals['session_id'] = sesid if dataformat == 'Twix': personals['sex'] = datasource.attributes('PatientSex') personals['size'] = datasource.attributes('PatientSize') personals['weight'] = datasource.attributes('PatientWeight') age = datasource.attributes( 'PatientAge' ) # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY elif dataformat == 'Pfile': sex = datasource.attributes('rhe_patsex') if sex == '0': personals['sex'] = 'O' elif sex == '1': personals['sex'] = 'M' elif sex == '2': personals['sex'] = 'F' age = dateutil.parser.parse( datasource.attributes('rhr_rh_scan_date')) - dateutil.parser.parse( datasource.attributes('rhe_dateofbirth')) age = str(age.days) + 'D' if age.endswith('D'): age = float(age.rstrip('D')) / 365.2524 elif age.endswith('W'): age = float(age.rstrip('W')) / 52.1775 elif age.endswith('M'): age = float(age.rstrip('M')) / 12 elif age.endswith('Y'): age = float(age.rstrip('Y')) if age and options.get('anon', OPTIONS['anon']) in ('y', 'yes'): age = int(float(age)) personals['age'] = str(age) # Store the collected personals in the participants_table participants_tsv = bidsfolder / 'participants.tsv' if participants_tsv.is_file(): participants_table = pd.read_csv(participants_tsv, sep='\t', dtype=str) participants_table.set_index(['participant_id'], verify_integrity=True, inplace=True) else: participants_table = pd.DataFrame() participants_table.index.name = 'participant_id' if subid in participants_table.index and 'session_id' in participants_table.keys( ) and participants_table.loc[subid, 'session_id']: return # Only take data from the first session -> BIDS specification for key in personals: # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file if key not in participants_table or participants_table[key].isnull( ).get(subid, True) or participants_table[key].get(subid) == 'n/a': participants_table.loc[subid, key] = personals[key] # Write the collected data to the participants tsv-file LOGGER.info(f"Writing {subid} subject data to: {participants_tsv}") participants_table.replace('', 'n/a').to_csv(participants_tsv, sep='\t', encoding='utf-8', na_rep='n/a')
def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None: """ This wrapper funtion around phys2bids converts the physio data in the session folder and saves it in the bidsfolder. Each saved datafile should be accompanied with a json sidecar file. The bidsmap options for this plugin can be found in: bidsmap_new['Options']['plugins']['phys2bidscoin'] See also the dcm2niix2bids plugin for reference implementation :param session: The full-path name of the subject/session raw data source folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsses: The full-path name of the BIDS output `sub-/ses-` folder :return: Nothing """ # Get the subject identifiers and the BIDS root folder from the bidsses folder if bidsses.name.startswith('ses-'): bidsfolder = bidsses.parent.parent subid = bidsses.parent.name sesid = bidsses.name else: bidsfolder = bidsses.parent subid = bidsses.name sesid = '' # Get started plugin = {'phys2bidscoin': bidsmap['Options']['plugins']['phys2bidscoin']} datasource = bids.get_datasource(session, plugin) sourcefiles = [file for file in session.rglob('*') if is_sourcefile(file)] if not sourcefiles: LOGGER.info(f"No {__name__} sourcedata found in: {session}") return # Loop over all source data files and convert them to BIDS for sourcefile in sourcefiles: # Get a data source, a matching run from the bidsmap datasource = bids.DataSource(sourcefile, plugin, datasource.dataformat) run, match = bids.get_matching_run(datasource, bidsmap, runtime=True) # Check if we should ignore this run if datasource.datatype in bidsmap['Options']['bidscoin'][ 'ignoretypes']: LOGGER.info(f"Leaving out: {sourcefile}") continue # Check that we know this run if not match: LOGGER.error( f"Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete the physiological output data in {bidsses} to solve this warning" ) continue LOGGER.info(f"Processing: {sourcefile}") # Get an ordered list of the func runs from the scans.tsv file (which should have a standardized datetime format) scans_tsv = bidsses / f"{subid}{bids.add_prefix('_', sesid)}_scans.tsv" if scans_tsv.is_file(): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') scans_table.sort_values(by=['acq_time', 'filename'], inplace=True) else: LOGGER.error( f"Could not read the TR's for phys2bids due to a missing '{scans_tsv}' file" ) continue funcscans = [] for index, row in scans_table.iterrows(): if index.startswith('func/'): funcscans.append(index) # Then read the TR's from the associated func sidecar files tr = [] for funcscan in funcscans: with (bidsses / funcscan).with_suffix('.json').open('r') as json_fid: jsondata = json.load(json_fid) tr.append(jsondata['RepetitionTime']) # Create a heuristic function for phys2bids heur_str = ('def heur(physinfo, run=""):\n' ' info = {}\n' f' if physinfo == "{sourcefile.name}":') for key, val in run['bids'].items(): heur_str = (f'{heur_str}' f'\n info["{key}"] = "{val}"') heur_str = f'{heur_str}\n return info' # Write heuristic function as file in temporary folder heur_file = Path( tempfile.mkdtemp()) / f'heuristic_sub-{subid}_ses-{sesid}.py' heur_file.write_text(heur_str) # Run phys2bids physiofiles = phys2bids( filename=str(sourcefile), outdir=str(bidsfolder), heur_file=str(heur_file), sub=subid, ses=sesid, chtrig=int(run['meta'].get('TriggerChannel', 0)), num_timepoints_expected=run['meta'].get('ExpectedTimepoints', None), tr=tr, pad=run['meta'].get('Pad', 9), ch_name=run['meta'].get('ChannelNames', []), yml='', debug=True, quiet=False) # Add user-specified meta-data to the newly produced json files (NB: assumes every physio-file comes with a json-file) for physiofile in physiofiles: jsonfile = Path(physiofile).with_suffix('.json') if not jsonfile.is_file(): LOGGER.error( f"Could not find the expected json sidecar-file: '{jsonfile}'" ) continue with jsonfile.open('r') as json_fid: jsondata = json.load(json_fid) for metakey, metaval in run['meta'].items(): metaval = datasource.dynamicvalue(metaval, cleanup=False, runtime=True) try: metaval = ast.literal_eval(str(metaval)) except (ValueError, SyntaxError): pass LOGGER.info(f"Adding '{metakey}: {metaval}' to: {jsonfile}") if not metaval: metaval = None jsondata[metakey] = metaval with jsonfile.open('w') as json_fid: json.dump(jsondata, json_fid, indent=4)
def bidsmapper_plugin(session: Path, bidsmap_new: dict, bidsmap_old: dict, template: dict, store: dict) -> None: """ All the logic to map the Nibabel header fields onto bids labels go into this function :param session: The full-path name of the subject/session raw data source folder :param bidsmap_new: The new study bidsmap that we are building :param bidsmap_old: The previous study bidsmap that has precedence over the template bidsmap :param template: The template bidsmap with the default heuristics :param store: The paths of the source- and target-folder :return: """ # Get started plugin = { 'nibabel2bids': bidsmap_new['Options']['plugins']['nibabel2bids'] } datasource = bids.get_datasource(session, plugin, recurse=2) if not datasource.dataformat: return if not (template[datasource.dataformat] or bidsmap_old[datasource.dataformat]): LOGGER.error( f"No {datasource.dataformat} source information found in the bidsmap and template" ) return # Collect the different DICOM/PAR source files for all runs in the session for sourcefile in [ file for file in session.rglob('*') if is_sourcefile(file) ]: # See if we can find a matching run in the old bidsmap datasource = bids.DataSource(sourcefile, plugin, datasource.dataformat) run, match = bids.get_matching_run(datasource, bidsmap_old) # If not, see if we can find a matching run in the template if not match: run, _ = bids.get_matching_run(datasource, template) # See if we have collected the run somewhere in our new bidsmap if not bids.exist_run(bidsmap_new, '', run): # Communicate with the user if the run was not present in bidsmap_old or in template, i.e. that we found a new sample if not match: LOGGER.info( f"Discovered '{datasource.datatype}' {datasource.dataformat} sample: {sourcefile}" ) # Now work from the provenance store if store: targetfile = store['target'] / sourcefile.relative_to( store['source']) targetfile.parent.mkdir(parents=True, exist_ok=True) run['provenance'] = str(shutil.copy2(sourcefile, targetfile)) # Copy the filled-in run over to the new bidsmap bids.append_run(bidsmap_new, run) else: # Communicate with the user if the run was already present in bidsmap_old or in template LOGGER.debug( f"Known '{datasource.datatype}' {datasource.dataformat} sample: {sourcefile}" )