def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None: """ The bidscoiner plugin to convert the session DICOM and PAR/REC source-files into BIDS-valid nifti-files in the corresponding bids session-folder and extract personals (e.g. Age, Sex) from the source header :param session: The full-path name of the subject/session source folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsses: The full-path name of the BIDS output `sub-/ses-` folder :return: Nothing """ # Get the subject identifiers and the BIDS root folder from the bidsses folder if bidsses.name.startswith('ses-'): bidsfolder = bidsses.parent.parent subid = bidsses.parent.name sesid = bidsses.name else: bidsfolder = bidsses.parent subid = bidsses.name sesid = '' # Get started and see what dataformat we have options = bidsmap['Options']['plugins']['dcm2niix2bids'] datasource = bids.get_datasource(session, {'dcm2niix2bids': options}) dataformat = datasource.dataformat if not dataformat: LOGGER.info(f"No {__name__} sourcedata found in: {session}") return # Make a list of all the data sources / runs manufacturer = 'UNKNOWN' sources = [] if dataformat == 'DICOM': sources = bidscoin.lsdirs(session) manufacturer = datasource.attributes('Manufacturer') elif dataformat == 'PAR': sources = bids.get_parfiles(session) manufacturer = 'Philips Medical Systems' else: LOGGER.exception(f"Unsupported dataformat '{dataformat}'") # Read or create a scans_table and tsv-file scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv" if scans_tsv.is_file(): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') else: scans_table = pd.DataFrame(columns=['acq_time'], dtype='str') scans_table.index.name = 'filename' # Process all the source files or run subfolders sourcefile = Path() for source in sources: # Get a sourcefile if dataformat == 'DICOM': sourcefile = bids.get_dicomfile(source) elif dataformat == 'PAR': sourcefile = source if not sourcefile.name: continue # Get a matching run from the bidsmap datasource = bids.DataSource(sourcefile, {'dcm2niix2bids': options}, dataformat) run, match = bids.get_matching_run(datasource, bidsmap, runtime=True) # Check if we should ignore this run if datasource.datatype in bidsmap['Options']['bidscoin'][ 'ignoretypes']: LOGGER.info(f"Leaving out: {source}") continue # Check if we already know this run if not match: LOGGER.error( f"Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning" ) continue LOGGER.info(f"Processing: {source}") # Create the BIDS session/datatype output folder if run['bids']['suffix'] in bids.get_derivatives(datasource.datatype): outfolder = bidsfolder / 'derivatives' / manufacturer.replace( ' ', '') / subid / sesid / datasource.datatype else: outfolder = bidsses / datasource.datatype outfolder.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run bidsname = bids.get_bidsname(subid, sesid, run, runtime=True) runindex = run['bids'].get('run', '') if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.increment_runindex(outfolder, bidsname) jsonfiles = [ (outfolder / bidsname).with_suffix('.json') ] # List -> Collect the associated json-files (for updating them later) -- possibly > 1 # Check if file already exists (-> e.g. when a static runindex is used) if (outfolder / bidsname).with_suffix('.json').is_file(): LOGGER.warning( f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!" ) for ext in ('.nii.gz', '.nii', '.json', '.tsv', '.tsv.gz', '.bval', '.bvec'): (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True) # Convert physiological log files (dcm2niix can't handle these) if run['bids']['suffix'] == 'physio': if bids.get_dicomfile( source, 2).name: # TODO: issue warning or support PAR LOGGER.warning( f"Found > 1 DICOM file in {source}, using: {sourcefile}") physiodata = physio.readphysio(sourcefile) physio.physio2tsv(physiodata, outfolder / bidsname) # Convert the source-files in the run folder to nifti's in the BIDS-folder else: command = '{command} {args} -f "{filename}" -o "{outfolder}" "{source}"'.format( command=options['command'], args=options.get('args', ''), filename=bidsname, outfolder=outfolder, source=source) if not bidscoin.run_command(command): if not list(outfolder.glob(f"{bidsname}.nii*")): continue if list(outfolder.glob(f"{bidsname}a.nii*")): LOGGER.warning( f"Unexpected variants of {outfolder/bidsname}* were produced by dcm2niix. Possibly this can be remedied by using the dcm2niix -i option (to ignore derived, localizer and 2D images)" ) # Replace uncropped output image with the cropped one if '-x y' in options.get('args', ''): for dcm2niixfile in sorted( outfolder.glob(bidsname + '*_Crop_*')): # e.g. *_Crop_1.nii.gz ext = ''.join(dcm2niixfile.suffixes) newbidsfile = str(dcm2niixfile).rsplit(ext, 1)[0].rsplit( '_Crop_', 1)[0] + ext LOGGER.info( f"Found dcm2niix _Crop_ postfix, replacing original file\n{dcm2niixfile} ->\n{newbidsfile}" ) dcm2niixfile.replace(newbidsfile) # Rename all files that got additional postfixes from dcm2niix. See: https://github.com/rordenlab/dcm2niix/blob/master/FILENAMING.md dcm2niixpostfixes = ('_c', '_i', '_Eq', '_real', '_imaginary', '_MoCo', '_t', '_Tilt', '_e', '_ph', '_ADC', '_fieldmaphz') dcm2niixfiles = sorted( set([ dcm2niixfile for dcm2niixpostfix in dcm2niixpostfixes for dcm2niixfile in outfolder.glob( f"{bidsname}*{dcm2niixpostfix}*.nii*") ])) if not jsonfiles[0].is_file( ) and dcm2niixfiles: # Possibly renamed by dcm2niix, e.g. with multi-echo data (but not always for the first echo) jsonfiles.pop(0) for dcm2niixfile in dcm2niixfiles: ext = ''.join(dcm2niixfile.suffixes) postfixes = str(dcm2niixfile).split(bidsname)[1].rsplit( ext)[0].split('_')[1:] newbidsname = dcm2niixfile.name # Strip the additional postfixes and assign them to bids entities in the for-loop below for postfix in postfixes: # dcm2niix postfixes _c%d, _e%d and _ph (and any combination of these in that order) are for multi-coil data, multi-echo data and phase data # Patch the echo entity in the newbidsname with the dcm2niix echo info # NB: We can't rely on the bids-entity info here because manufacturers can e.g. put multiple echos in one series / run-folder if 'echo' in run['bids'] and postfix.startswith('e'): echonr = f"_{postfix}".replace('_e', '') # E.g. postfix='e1' if not echonr: echonr = '1' if echonr.isnumeric(): newbidsname = bids.insert_bidskeyval( newbidsname, 'echo', echonr.lstrip('0') ) # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness else: LOGGER.error( f"Unexpected postix '{postfix}' found in {dcm2niixfile}" ) newbidsname = bids.get_bidsvalue( newbidsname, 'dummy', postfix ) # Append the unknown postfix to the acq-label # Patch the phase entity in the newbidsname with the dcm2niix mag/phase info elif 'part' in run['bids'] and postfix in ( 'ph', 'real', 'imaginary' ): # e.g. part: ['', 'mag', 'phase', 'real', 'imag', 0] if postfix == 'ph': newbidsname = bids.insert_bidskeyval( newbidsname, 'part', 'phase') if postfix == 'real': newbidsname = bids.insert_bidskeyval( newbidsname, 'part', 'real') if postfix == 'imaginary': newbidsname = bids.insert_bidskeyval( newbidsname, 'part', 'imag') # Patch fieldmap images (NB: datatype=='fmap' is too broad, see the fmap.yaml file) elif run['bids']['suffix'] in bids.bidsdatatypes['fmap'][0][ 'suffixes']: # i.e. in ('magnitude','magnitude1','magnitude2','phase1','phase2','phasediff','fieldmap'). TODO: Make this robust for future BIDS versions if len(dcm2niixfiles) not in ( 1, 2, 3, 4 ): # Phase / echo data may be stored in the same data source / run folder LOGGER.debug( f"Unknown fieldmap {outfolder/bidsname} for '{postfix}'" ) newbidsname = newbidsname.replace( '_magnitude1a', '_magnitude2' ) # First catch this potential weird / rare case newbidsname = newbidsname.replace( '_magnitude1_pha', '_phase2' ) # First catch this potential weird / rare case newbidsname = newbidsname.replace( '_magnitude1_e1', '_magnitude1' ) # Case 2 = Two phase and magnitude images newbidsname = newbidsname.replace( '_magnitude1_e2', '_magnitude2' ) # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_magnitude2_e1', '_magnitude1' ) # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_magnitude2_e2', '_magnitude2') # Case 2 if len(dcm2niixfiles) in ( 2, 3 ): # Case 1 = One or two magnitude + one phasediff image newbidsname = newbidsname.replace( '_magnitude1_ph', '_phasediff') newbidsname = newbidsname.replace( '_magnitude2_ph', '_phasediff') newbidsname = newbidsname.replace( '_phasediff_e1', '_phasediff') # Case 1 newbidsname = newbidsname.replace( '_phasediff_e2', '_phasediff') # Case 1 newbidsname = newbidsname.replace( '_phasediff_ph', '_phasediff') # Case 1 newbidsname = newbidsname.replace( '_magnitude1_ph', '_phase1' ) # Case 2: One or two magnitude and phase images in one folder / datasource newbidsname = newbidsname.replace( '_magnitude2_ph', '_phase2' ) # Case 2: Two magnitude + two phase images in one folder / datasource newbidsname = newbidsname.replace( '_phase1_e1', '_phase1') # Case 2 newbidsname = newbidsname.replace( '_phase1_e2', '_phase2' ) # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_phase2_e1', '_phase1' ) # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_phase2_e2', '_phase2') # Case 2 newbidsname = newbidsname.replace( '_phase1_ph', '_phase1' ) # Case 2: One or two magnitude and phase images in one folder / datasource newbidsname = newbidsname.replace( '_phase2_ph', '_phase2' ) # Case 2: Two magnitude + two phase images in one folder / datasource newbidsname = newbidsname.replace( '_magnitude_e1', '_magnitude' ) # Case 3 = One magnitude + one fieldmap image if len(dcm2niixfiles) == 2: newbidsname = newbidsname.replace( '_fieldmap_e1', '_magnitude' ) # Case 3: One magnitude + one fieldmap image in one folder / datasource newbidsname = newbidsname.replace( '_fieldmap_e1', '_fieldmap') # Case 3 newbidsname = newbidsname.replace( '_magnitude_ph', '_fieldmap' ) # Case 3: One magnitude + one fieldmap image in one folder / datasource newbidsname = newbidsname.replace( '_fieldmap_ph', '_fieldmap') # Case 3 # Append the dcm2niix info to acq-label, may need to be improved / elaborated for future BIDS standards, supporting multi-coil data else: newbidsname = bids.get_bidsvalue( newbidsname, 'dummy', postfix) # Remove the added postfix from the new bidsname newbidsname = newbidsname.replace(f"_{postfix}_", '_') # If it is not last newbidsname = newbidsname.replace(f"_{postfix}.", '.') # If it is last # The ADC images are not BIDS compliant if postfix == 'ADC': LOGGER.warning( f"The {newbidsname} image is most likely not BIDS-compliant -- you can probably delete it safely and update the scants.tsv file" ) # Save the nifti file with a new name if runindex.startswith('<<') and runindex.endswith('>>'): newbidsname = bids.increment_runindex( outfolder, newbidsname, '' ) # Update the runindex now that the acq-label has changed newbidsfile = outfolder / newbidsname LOGGER.info( f"Found dcm2niix {postfixes} postfixes, renaming\n{dcm2niixfile} ->\n{newbidsfile}" ) if newbidsfile.is_file(): LOGGER.warning( f"Overwriting existing {newbidsfile} file -- check your results carefully!" ) dcm2niixfile.replace(newbidsfile) # Rename all associated files (i.e. the json-, bval- and bvec-files) oldjsonfile = dcm2niixfile.with_suffix('').with_suffix('.json') newjsonfile = newbidsfile.with_suffix('').with_suffix('.json') if not oldjsonfile.is_file(): LOGGER.warning( f"Unexpected file conversion result: {oldjsonfile} not found" ) else: if oldjsonfile in jsonfiles: jsonfiles.remove(oldjsonfile) if newjsonfile not in jsonfiles: jsonfiles.append(newjsonfile) for oldfile in outfolder.glob( dcm2niixfile.with_suffix('').stem + '.*'): oldfile.replace( newjsonfile.with_suffix(''.join(oldfile.suffixes))) # Copy over the source meta-data metadata = bids.copymetadata(sourcefile, outfolder / bidsname, options.get('meta', [])) # Loop over and adapt all the newly produced json sidecar-files and write to the scans.tsv file (NB: assumes every nifti-file comes with a json-file) for jsonfile in sorted(set(jsonfiles)): # Load the json meta-data with jsonfile.open('r') as json_fid: jsondata = json.load(json_fid) # Add all the source meta data to the meta-data for metakey, metaval in metadata.items(): if jsondata.get(metakey) == metaval: LOGGER.warning( f"Replacing {metakey} values in {jsonfile}: {jsondata[metakey]} -> {metaval}" ) jsondata[metakey] = metaval # Add all the run meta data to the meta-data. NB: the dynamic `IntendedFor` value is handled separately later for metakey, metaval in run['meta'].items(): if metakey != 'IntendedFor': metaval = datasource.dynamicvalue(metaval, cleanup=False, runtime=True) try: metaval = ast.literal_eval(str(metaval)) except (ValueError, SyntaxError): pass LOGGER.info( f"Adding '{metakey}: {metaval}' to: {jsonfile}") if not metaval: metaval = None jsondata[metakey] = metaval # Remove unused (but added from the template) B0FieldIdentifiers/Sources if not jsondata.get('B0FieldSource'): jsondata.pop('B0FieldSource', None) if not jsondata.get('B0FieldIdentifier'): jsondata.pop('B0FieldIdentifier', None) # Save the meta-data to the json sidecar-file with jsonfile.open('w') as json_fid: json.dump(jsondata, json_fid, indent=4) # Parse the acquisition time from the source header or else from the json file (NB: assuming the source file represents the first acquisition) outputfile = [ file for file in jsonfile.parent.glob(jsonfile.stem + '.*') if file.suffix in ('.nii', '.gz') ] # Find the corresponding nifti/tsv.gz file (there should be only one, let's not make assumptions about the .gz extension) if not outputfile: LOGGER.exception( f"No data-file found with {jsonfile} when updating {scans_tsv}" ) elif datasource.datatype not in bidsmap['Options']['bidscoin'][ 'bidsignore'] and not run['bids'][ 'suffix'] in bids.get_derivatives(datasource.datatype): acq_time = '' if dataformat == 'DICOM': acq_time = f"{datasource.attributes('AcquisitionDate')}T{datasource.attributes('AcquisitionTime')}" elif dataformat == 'PAR': acq_time = datasource.attributes('exam_date') if not acq_time or acq_time == 'T': acq_time = f"1925-01-01T{jsondata.get('AcquisitionTime','')}" try: acq_time = dateutil.parser.parse(acq_time) if options.get('anon', 'y') in ('y', 'yes'): acq_time = acq_time.replace( year=1925, month=1, day=1 ) # Privacy protection (see BIDS specification) acq_time = acq_time.isoformat() except Exception as jsonerror: LOGGER.warning( f"Could not parse the acquisition time from: {sourcefile}\n{jsonerror}" ) acq_time = 'n/a' scanpath = outputfile[0].relative_to(bidsses) scans_table.loc[scanpath.as_posix(), 'acq_time'] = acq_time # Write the scans_table to disk LOGGER.info(f"Writing acquisition time data to: {scans_tsv}") scans_table.sort_values(by=['acq_time', 'filename'], inplace=True) scans_table.replace('', 'n/a').to_csv(scans_tsv, sep='\t', encoding='utf-8', na_rep='n/a') # Collect personal data from a source header (PAR/XML does not contain personal info) personals = {} if sesid and 'session_id' not in personals: personals['session_id'] = sesid personals['age'] = '' if dataformat == 'DICOM': age = datasource.attributes( 'PatientAge' ) # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY if age.endswith('D'): age = float(age.rstrip('D')) / 365.2524 elif age.endswith('W'): age = float(age.rstrip('W')) / 52.1775 elif age.endswith('M'): age = float(age.rstrip('M')) / 12 elif age.endswith('Y'): age = float(age.rstrip('Y')) if age: if options.get('anon', 'y') in ('y', 'yes'): age = int(float(age)) personals['age'] = str(age) personals['sex'] = datasource.attributes('PatientSex') personals['size'] = datasource.attributes('PatientSize') personals['weight'] = datasource.attributes('PatientWeight') # Store the collected personals in the participants_table participants_tsv = bidsfolder / 'participants.tsv' if participants_tsv.is_file(): participants_table = pd.read_csv(participants_tsv, sep='\t', dtype=str) participants_table.set_index(['participant_id'], verify_integrity=True, inplace=True) else: participants_table = pd.DataFrame() participants_table.index.name = 'participant_id' if subid in participants_table.index and 'session_id' in participants_table.keys( ) and participants_table.loc[subid, 'session_id']: return # Only take data from the first session -> BIDS specification for key in personals: # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file if key not in participants_table or participants_table[key].isnull( ).get(subid, True) or participants_table[key].get(subid) == 'n/a': participants_table.loc[subid, key] = personals[key] # Write the collected data to the participants tsv-file LOGGER.info(f"Writing {subid} subject data to: {participants_tsv}") participants_table.replace('', 'n/a').to_csv(participants_tsv, sep='\t', encoding='utf-8', na_rep='n/a')
def bidsmapper_plugin(session: Path, bidsmap_new: dict, bidsmap_old: dict, template: dict, store: dict) -> None: """ All the logic to map the DICOM/PAR source fields onto bids labels go into this function :param session: The full-path name of the subject/session raw data source folder :param bidsmap_new: The new study bidsmap that we are building :param bidsmap_old: The previous study bidsmap that has precedence over the template bidsmap :param template: The template bidsmap with the default heuristics :param store: The paths of the source- and target-folder :return: """ # Get started plugin = { 'dcm2niix2bids': bidsmap_new['Options']['plugins']['dcm2niix2bids'] } datasource = bids.get_datasource(session, plugin) dataformat = datasource.dataformat if not dataformat: return # Collect the different DICOM/PAR source files for all runs in the session sourcefiles = [] if dataformat == 'DICOM': for sourcedir in bidscoin.lsdirs(session): for n in range( 1 ): # Option: Use range(2) to scan two files and catch e.g. magnitude1/2 fieldmap files that are stored in one Series folder (but bidscoiner sees only the first file anyhow and it makes bidsmapper 2x slower :-() sourcefile = bids.get_dicomfile(sourcedir, n) if sourcefile.name: sourcefiles.append(sourcefile) elif dataformat == 'PAR': sourcefiles = bids.get_parfiles(session) else: LOGGER.exception(f"Unsupported dataformat '{dataformat}'") # Update the bidsmap with the info from the source files for sourcefile in sourcefiles: # Input checks if not sourcefile.name or (not template[dataformat] and not bidsmap_old[dataformat]): LOGGER.error( f"No {dataformat} source information found in the bidsmap and template for: {sourcefile}" ) return # See if we can find a matching run in the old bidsmap datasource = bids.DataSource(sourcefile, plugin, dataformat) run, match = bids.get_matching_run(datasource, bidsmap_old) # If not, see if we can find a matching run in the template if not match: run, _ = bids.get_matching_run(datasource, template) # See if we have collected the run somewhere in our new bidsmap if not bids.exist_run(bidsmap_new, '', run): # Communicate with the user if the run was not present in bidsmap_old or in template, i.e. that we found a new sample if not match: LOGGER.info( f"Discovered '{datasource.datatype}' {dataformat} sample: {sourcefile}" ) # Now work from the provenance store if store: targetfile = store['target'] / sourcefile.relative_to( store['source']) targetfile.parent.mkdir(parents=True, exist_ok=True) run['provenance'] = str(shutil.copy2(sourcefile, targetfile)) # Copy the filled-in run over to the new bidsmap bids.append_run(bidsmap_new, run) else: # Communicate with the user if the run was already present in bidsmap_old or in template LOGGER.debug( f"Known '{datasource.datatype}' {dataformat} sample: {sourcefile}" )
def rawmapper(rawfolder, outfolder=None, rename=False, dicomfield=('PatientComments', ), wildcard='*', dryrun=False): """ :param str rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param str outfolder: The name of the folder where the mapping-file is saved (default = rawfolder) :param bool rename: Flag for renaming the sub-subid folders to sub-dicomfield :param list dicomfield: The names of the dicomfields that are mapped (/ renamed to sub-dcmval/ses-dcmval) :param str wildcard: The Unix style pathname pattern expansion that is used by glob to select the series from which the dicomfield is being mapped :param bool dryrun: Flag for dry-running renaming the sub-subid folders :return: Nothing :rtype: NoneType """ # Input checking if not outfolder: outfolder = rawfolder rawfolder = os.path.abspath(os.path.expanduser(rawfolder)) outfolder = os.path.abspath(os.path.expanduser(outfolder)) # Create a output mapper-file if not dryrun: mapperfile = os.path.join( outfolder, 'rawmapper_{}.tsv'.format('_'.join(dicomfield))) if rename: with open(mapperfile, 'a') as fid: fid.write('{}\t{}\t{}\t{}\n'.format('subid', 'sesid', 'newsubid', 'newsesid')) else: with open(mapperfile, 'x') as fid: fid.write('{}\t{}\t{}\t{}\n'.format('subid', 'sesid', 'seriesname', '\t'.join(dicomfield))) # Loop over all subjects and sessions in the rawfolder for subject in bids.lsdirs(rawfolder, 'sub-*'): sessions = bids.lsdirs(subject, 'ses-*') if not sessions: sessions = subject for session in sessions: # Get the subject and session identifiers from the raw folder subid = 'sub-' + session.rsplit(os.sep + 'sub-', 1)[1].split( os.sep + 'ses-', 1)[0] sesid = 'ses-' + session.rsplit(os.sep + 'ses-')[1] # Parse the new subject and session identifiers from the dicomfield series = bids.lsdirs(session, wildcard) if not series: series = '' dcmval = '' else: series = series[0] # TODO: loop over series? dcmval = '' for dcmfield in dicomfield: dcmval = dcmval + '/' + str( bids.get_dicomfield(dcmfield, bids.get_dicomfile(series)) ) # TODO: test how newlines from the console work out dcmval = dcmval[1:] # Rename the session subfolder in the rawfolder and print & save this info if rename: # Get the new subid and sesid if not dcmval or dcmval == 'None': warnings.warn( 'Skipping renaming because the dicom-field was empty for: ' + session) continue else: if '/' in dcmval: # Allow for different sub/ses delimiters that could be entered at the console (i.e. in PatientComments) delim = '/' elif '\\' in dcmval: delim = '\\' else: delim = '\n' newsubsesid = dcmval.split(delim) newsubid = 'sub-' + bids.cleanup_label( newsubsesid[0].replace('sub-', '')) if newsubid == 'sub-' or newsubid == 'sub-None': newsubid = subid warnings.warn( 'Could not rename {} because the dicom-field was empty for: {}' .format(subid, session)) if len(newsubsesid) == 1: newsesid = sesid elif len(newsubsesid) == 2: newsesid = 'ses-' + bids.cleanup_label( newsubsesid[1].replace('ses-', '')) else: warnings.warn( 'Skipping renaming of {} because the dicom-field "{}" could not be parsed into [subid, sesid]' .format(session, dcmval)) continue if newsesid == 'ses-' or newsesid == 'ses-None': newsesid = sesid warnings.warn( 'Could not rename {} because the dicom-field was empty for: {}' .format(sesid, session)) # Save the dicomfield / sub-ses mapping to disk and rename the session subfolder (but skip if it already exists) newsession = os.path.join(rawfolder, newsubid, newsesid) print(session + ' -> ' + newsession) if newsession == session: continue if os.path.isdir(newsession): warnings.warn( '{} already exists, skipping renaming of {}'.format( newsession, session)) elif not dryrun: with open(os.path.join(outfolder, mapperfile), 'a') as fid: fid.write('{}\t{}\t{}\t{}\n'.format( subid, sesid, newsubid, newsesid)) os.renames(session, newsession) # Print & save the dicom values else: print('{}{}{}\t-> {}'.format(subid + os.sep, sesid + os.sep, os.path.basename(series), '\t'.join(dcmval.split('/')))) if not dryrun: with open(os.path.join(outfolder, mapperfile), 'a') as fid: fid.write('{}\t{}\t{}\t{}\n'.format( subid, sesid, os.path.basename(series), '\t'.join(dcmval.split('/'))))
def rawmapper(rawfolder: str, outfolder: str='', sessions: list=[], rename: bool=False, dicomfield: tuple=('PatientComments',), wildcard: str='*', subprefix: str='sub-', sesprefix: str='ses-', dryrun: bool=False) -> None: """ :param rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param outfolder: The name of the folder where the mapping-file is saved (default = rawfolder) :param sessions: Space separated list of selected sub-#/ses-# names / folders to be processed. Otherwise all sessions in the bidsfolder will be selected :param rename: Flag for renaming the sub-subid folders to sub-dicomfield :param dicomfield: The names of the dicomfields that are mapped (/ renamed to sub-dcmval/ses-dcmval) :param wildcard: The Unix style pathname pattern expansion that is used by glob to select the series from which the dicomfield is being mapped :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :param dryrun: Flag for dry-running renaming the sub-subid folders :return: Nothing """ # Input checking if not outfolder: outfolder = rawfolder rawfolder = os.path.abspath(os.path.expanduser(rawfolder)) outfolder = os.path.abspath(os.path.expanduser(outfolder)) # Create or append the output to a mapper logfile if not dryrun: mapperfile = os.path.join(outfolder, 'rawmapper_{}.tsv'.format('_'.join(dicomfield))) if rename: with open(mapperfile, 'a') as fid: fid.write('{}\t{}\t{}\t{}\n'.format('subid', 'sesid', 'newsubid', 'newsesid')) else: with open(mapperfile, 'x') as fid: fid.write('{}\t{}\t{}\t{}\n'.format('subid', 'sesid', 'seriesname', '\t'.join(dicomfield))) # Map the sessions in the rawfolder if not sessions: sessions = glob.glob(os.path.join(rawfolder, f'{subprefix}*{os.sep}{sesprefix}*')) if not sessions: sessions = glob.glob(os.path.join(rawfolder, f'{subprefix}*')) # Try without session-subfolders else: sessions = [sessionitem for session in sessions for sessionitem in glob.glob(os.path.join(rawfolder, session), recursive=True)] # Loop over the selected sessions in the rawfolder for session in sessions: # Get the subject and session identifiers from the raw folder subid = subprefix + session.rsplit(os.sep+subprefix, 1)[1].split(os.sep+sesprefix, 1)[0] sesid = sesprefix + session.rsplit(os.sep+sesprefix)[1] # TODO: Fix crashing on session-less datasets # Parse the new subject and session identifiers from the dicomfield series = bids.lsdirs(session, wildcard) if not series: series = '' dcmval = '' else: series = series[0] # TODO: loop over series? dcmval = '' for dcmfield in dicomfield: dcmval = dcmval + '/' + str(bids.get_dicomfield(dcmfield, bids.get_dicomfile(series))) dcmval = dcmval[1:] # Rename the session subfolder in the rawfolder and print & save this info if rename: # Get the new subid and sesid if not dcmval or dcmval=='None': warnings.warn('Skipping renaming because the dicom-field was empty for: ' + session) continue else: if '/' in dcmval: # Allow for different sub/ses delimiters that could be entered at the console (i.e. in PatientComments) delim = '/' elif '\\' in dcmval: delim = '\\' else: delim = '\r\n' newsubsesid = [val for val in dcmval.split(delim) if val] # Skip empty lines / entries newsubid = subprefix + bids.cleanup_label(newsubsesid[0].lstrip(subprefix)) if newsubid==subprefix or newsubid==subprefix+'None': newsubid = subid warnings.warn('Could not rename {} because the dicom-field was empty for: {}'.format(subid, session)) if len(newsubsesid)==1: newsesid = sesid elif len(newsubsesid)==2: newsesid = sesprefix + bids.cleanup_label(newsubsesid[1].lstrip(sesprefix)) else: warnings.warn('Skipping renaming of {} because the dicom-field "{}" could not be parsed into [subid, sesid]'.format(session, dcmval)) continue if newsesid==sesprefix or newsesid==subprefix+'None': newsesid = sesid warnings.warn('Could not rename {} because the dicom-field was empty for: {}'.format(sesid, session)) # Save the dicomfield / sub-ses mapping to disk and rename the session subfolder (but skip if it already exists) newsession = os.path.join(rawfolder, newsubid, newsesid) print(session + ' -> ' + newsession) if newsession == session: continue if os.path.isdir(newsession): warnings.warn('{} already exists, skipping renaming of {}'.format(newsession, session)) elif not dryrun: with open(os.path.join(outfolder, mapperfile), 'a') as fid: fid.write('{}\t{}\t{}\t{}\n'.format(subid, sesid, newsubid, newsesid)) os.renames(session, newsession) # Print & save the dicom values else: print('{}{}{}\t-> {}'.format(subid+os.sep, sesid+os.sep, os.path.basename(series), '\t'.join(dcmval.split('/')))) if not dryrun: with open(os.path.join(outfolder, mapperfile), 'a') as fid: fid.write('{}\t{}\t{}\t{}\n'.format(subid, sesid, os.path.basename(series), '\t'.join(dcmval.split('/'))))
def coin_dicom(session: str, bidsmap: dict, bidsfolder: str, personals: dict, subprefix: str, sesprefix: str) -> None: """ Converts the session dicom-files into BIDS-valid nifti-files in the corresponding bidsfolder and extracts personals (e.g. Age, Sex) from the dicom header :param session: The full-path name of the subject/session source folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsfolder: The full-path name of the BIDS root-folder :param personals: The dictionary with the personal information :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :return: Nothing """ if not bids.lsdirs(session): logger.warning('No series subfolder(s) found in: ' + session) return TE = [None, None] # Get a valid BIDS subject identifier from the (first) dicom-header or from the session source folder if bidsmap['DICOM']['participant_label'] and bidsmap['DICOM'][ 'participant_label'].startswith( '<<') and bidsmap['DICOM']['participant_label'].endswith('>>'): subid = bids.get_dicomfield( bidsmap['DICOM']['participant_label'][2:-2], bids.get_dicomfile(bids.lsdirs(session)[0])) elif bidsmap['DICOM']['participant_label']: subid = bidsmap['DICOM']['participant_label'] else: subid = session.rsplit(os.sep + subprefix, 1)[1].split(os.sep + sesprefix, 1)[0] subid = 'sub-' + bids.cleanup_label(subid.lstrip(subprefix)) if subid == subprefix: logger.error('Error: No valid subject identifier found for: ' + session) return # Get a valid or empty BIDS session identifier from the (first) dicom-header or from the session source folder if bidsmap['DICOM']['session_label'] and bidsmap['DICOM'][ 'session_label'].startswith( '<<') and bidsmap['DICOM']['session_label'].endswith('>>'): sesid = bids.get_dicomfield( bidsmap['DICOM']['session_label'][2:-2], bids.get_dicomfile(bids.lsdirs(session)[0])) elif bidsmap['DICOM']['session_label']: sesid = bidsmap['DICOM']['session_label'] elif os.sep + sesprefix in session: sesid = session.rsplit(os.sep + sesprefix)[1] else: sesid = '' if sesid: sesid = 'ses-' + bids.cleanup_label(sesid.lstrip(sesprefix)) # Create the BIDS session-folder and a scans.tsv file bidsses = os.path.join( bidsfolder, subid, sesid ) # NB: This gives a trailing '/' if ses=='', but that should be ok os.makedirs(bidsses, exist_ok=True) scans_tsv = os.path.join(bidsses, f'{subid}{bids.add_prefix("_",sesid)}_scans.tsv') if os.path.exists(scans_tsv): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') else: scans_table = pd.DataFrame(columns=['acq_time'], dtype='str') scans_table.index.name = 'filename' # Process all the dicom series subfolders for series in bids.lsdirs(session): if series.startswith('.'): logger.info('Ignoring hidden dicom-folder: ' + series) continue else: logger.info('Processing dicom-folder: ' + series) # Get the cleaned-up bids labels from a dicom-file and bidsmap dicomfile = bids.get_dicomfile(series) if not dicomfile: continue result = bids.get_matching_dicomseries(dicomfile, bidsmap) series_ = result['series'] modality = result['modality'] # Create the BIDS session/modality folder bidsmodality = os.path.join(bidsses, modality) os.makedirs(bidsmodality, exist_ok=True) # Compose the BIDS filename using the bids labels and run-index runindex = series_['bids']['run_index'] if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.get_bidsname(subid, sesid, modality, series_, runindex[2:-2]) bidsname = bids.increment_runindex(bidsmodality, bidsname) else: bidsname = bids.get_bidsname(subid, sesid, modality, series_, runindex) # Convert the dicom-files in the series folder to nifti's in the BIDS-folder command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{infolder}"'.format( path=bidsmap['Options']['dcm2niix']['path'], args=bidsmap['Options']['dcm2niix']['args'], filename=bidsname, outfolder=bidsmodality, infolder=series) logger.info('$ ' + command) process = subprocess.run( command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT ) # TODO: investigate shell=False and capture_output=True logger.info(process.stdout.decode('utf-8')) if process.returncode != 0: errormsg = f'Error: Failed to process {series} (errorcode {process.returncode})' logger.error(errormsg) continue # Replace uncropped output image with the cropped one if '-x y' in bidsmap['Options']['dcm2niix']['args']: for filename in sorted( glob.glob(os.path.join( bidsmodality, bidsname + '*_Crop_*'))): # e.g. *_Crop_1.nii.gz basepath, ext1 = os.path.splitext(filename) basepath, ext2 = os.path.splitext( basepath) # Account for .nii.gz files basepath = basepath.rsplit('_Crop_', 1)[0] newfilename = basepath + ext2 + ext1 logger.info( f'Found dcm2niix _Crop_ suffix, replacing original file\n{filename} ->\n{newfilename}' ) os.replace(filename, newfilename) # Rename all files ending with _c%d, _e%d and _ph (and any combination of these): These are produced by dcm2niix for multi-coil data, multi-echo data and phase data, respectively jsonfiles = [ ] # Collect the associated json-files (for updating them later) -- possibly > 1 for suffix in ('_c', '_e', '_ph', '_i'): for filename in sorted( glob.glob( os.path.join(bidsmodality, bidsname + suffix + '[0-9]*'))): basepath, ext1 = os.path.splitext(filename) basepath, ext2 = os.path.splitext( basepath) # Account for .nii.gz files basepath, index = basepath.rsplit(suffix, 1) index = index.split('_')[0].zfill( 2 ) # Zero padd as specified in the BIDS-standard (assuming two digits is sufficient); strip following suffices (fieldmaps produce *_e2_ph files) # This is a special hack: dcm2niix does not always add a _c/_e suffix for the first(?) coil/echo image -> add it when we encounter a **_e2/_c2 file if suffix in ( '_c', '_e' ) and int(index) == 2 and basepath.rsplit( '_', 1 )[1] != 'magnitude1': # For fieldmaps: *_magnitude1_e[index] -> *_magnitude[index] (This is handled below) filename_ce = basepath + ext2 + ext1 # The file without the _c1/_e1 suffix if suffix == '_e' and bids.set_bidslabel(basepath, 'echo'): newbasepath_ce = bids.set_bidslabel( basepath, 'echo', '1') else: newbasepath_ce = bids.set_bidslabel( basepath, 'dummy', suffix.upper() + '1'.zfill(len(index)) ) # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data newfilename_ce = newbasepath_ce + ext2 + ext1 # The file as it should have been if os.path.isfile(filename_ce): if filename_ce != newfilename_ce: logger.info( f'Found no dcm2niix {suffix} suffix for image instance 1, renaming\n{filename_ce} ->\n{newfilename_ce}' ) os.rename(filename_ce, newfilename_ce) if ext1 == '.json': jsonfiles.append(newbasepath_ce + '.json') # Patch the basepath with the suffix info if suffix == '_e' and bids.set_bidslabel(basepath, 'echo') and index: basepath = bids.set_bidslabel( basepath, 'echo', str(int(index)) ) # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness elif suffix == '_e' and basepath.rsplit('_', 1)[1] in ( 'magnitude1', 'magnitude2') and index: # i.e. modality == 'fmap' basepath = basepath[0:-1] + str( int(index) ) # basepath: *_magnitude1_e[index] -> *_magnitude[index] # Read the echo times that need to be added to the json-file (see below) if os.path.splitext(filename)[1] == '.json': with open(filename, 'r') as json_fid: data = json.load(json_fid) TE[int(index) - 1] = data['EchoTime'] logger.info( f"Reading EchoTime{index} = {data['EchoTime']} from: {filename}" ) elif suffix == '_e' and basepath.rsplit( '_', 1 )[1] == 'phasediff' and index: # i.e. modality == 'fmap' pass elif suffix == '_ph' and basepath.rsplit('_', 1)[1] in [ 'phase1', 'phase2' ] and index: # i.e. modality == 'fmap' (TODO: untested) basepath = basepath[0:-1] + str( int(index )) # basepath: *_phase1_e[index] -> *_phase[index] logger.warning('Untested dcm2niix "_ph"-filetype: ' + basepath) else: basepath = bids.set_bidslabel( basepath, 'dummy', suffix.upper() + index ) # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data # Save the file with a new name if runindex.startswith('<<') and runindex.endswith('>>'): newbidsname = bids.increment_runindex( bidsmodality, os.path.basename(basepath), ext2 + ext1 ) # Update the runindex now that the acq-label has changed else: newbidsname = os.path.basename(basepath) newfilename = os.path.join(bidsmodality, newbidsname + ext2 + ext1) logger.info( f'Found dcm2niix {suffix} suffix, renaming\n{filename} ->\n{newfilename}' ) os.rename(filename, newfilename) if ext1 == '.json': jsonfiles.append( os.path.join(bidsmodality, newbidsname + '.json')) # Loop over and adapt all the newly produced json files and write to the scans.tsv file (every nifti-file comes with a json-file) if not jsonfiles: jsonfiles = [os.path.join(bidsmodality, bidsname + '.json')] for jsonfile in set(jsonfiles): # Check if dcm2niix behaved as expected if not os.path.isfile(jsonfile): logger.warning( f'Unexpected file conbids.version result: {jsonfile} not found' ) continue # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans) if modality == 'dwi': bvecfile = os.path.splitext(jsonfile)[0] + '.bvec' bvalfile = os.path.splitext(jsonfile)[0] + '.bval' if not os.path.isfile(bvecfile): logger.info('Adding dummy bvec file: ' + bvecfile) with open(bvecfile, 'w') as bvec_fid: bvec_fid.write('0\n0\n0\n') if not os.path.isfile(bvalfile): logger.info('Adding dummy bval file: ' + bvalfile) with open(bvalfile, 'w') as bval_fid: bval_fid.write('0\n') # Add the TaskName to the func json-file elif modality == 'func': with open(jsonfile, 'r') as json_fid: data = json.load(json_fid) if not 'TaskName' in data: logger.info('Adding TaskName to: ' + jsonfile) data['TaskName'] = series_['bids']['task_label'] with open(jsonfile, 'w') as json_fid: json.dump(data, json_fid, indent=4) # Add the EchoTime(s) used to create the difference image to the fmap json-file. NB: This assumes the magnitude series have already been parsed (i.e. their nifti's had an _e suffix) -- This is normally the case for Siemens (phase-series being saved after the magnitude series elif modality == 'fmap': if series_['bids']['suffix'] == 'phasediff': logger.info('Adding EchoTime1 and EchoTime2 to: ' + jsonfile) with open(jsonfile, 'r') as json_fid: data = json.load(json_fid) data['EchoTime1'] = TE[0] data['EchoTime2'] = TE[1] with open(jsonfile, 'w') as json_fid: json.dump(data, json_fid, indent=4) if TE[0] > TE[1]: logger.warning('EchoTime1 > EchoTime2 in: ' + jsonfile) # Parse the acquisition time from the json file with open(jsonfile, 'r') as json_fid: data = json.load(json_fid) acq_time = dateutil.parser.parse(data['AcquisitionTime']) niipath = glob.glob( os.path.splitext(jsonfile)[0] + '.nii*' )[0] # Find the corresponding nifti file (there should be only one, let's not make assumptions about the .gz extension) niipath = niipath.replace( bidsses + os.sep, '' ) # Use a relative path. Somehow .strip(bidsses) instead of replace(bidsses,'') does not work properly scans_table.loc[ niipath, 'acq_time'] = '1900-01-01T' + acq_time.strftime('%H:%M:%S') # Write the scans_table to disk logger.info('Writing acquisition time data to: ' + scans_tsv) scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8') # Search for the IntendedFor images and add them to the json-files. This has been postponed untill all modalities have been processed (i.e. so that all target images are indeed on disk) if bidsmap['DICOM']['fmap'] is not None: for fieldmap in bidsmap['DICOM']['fmap']: if 'IntendedFor' in fieldmap['bids'] and fieldmap['bids'][ 'IntendedFor']: bidsname = bids.get_bidsname(subid, sesid, 'fmap', fieldmap, '1') acqlabel = bids.set_bidslabel(bidsname, 'acq') for jsonfile in glob.glob( os.path.join( bidsses, 'fmap', bidsname.replace( '_run-1_', '_run-[0-9]*_').replace( acqlabel, acqlabel + '[CE][0-9]*') + '.json') ): # Account for multiple runs and dcm2niix suffixes inserted into the acquisition label intendedfor = fieldmap['bids']['IntendedFor'] if intendedfor.startswith('<<') and intendedfor.endswith( '>>'): intendedfor = intendedfor[2:-2].split('><') else: intendedfor = [intendedfor] niifiles = [] for selector in intendedfor: niifiles.extend([ niifile.split(os.sep + subid + os.sep, 1)[1].replace('\\', '/') for niifile in sorted( glob.glob( os.path.join( bidsses, f'**{os.sep}*{selector}*.nii*'))) ]) # Search in all series using a relative path with open(jsonfile, 'r') as json_fid: data = json.load(json_fid) data['IntendedFor'] = niifiles logger.info('Adding IntendedFor to: ' + jsonfile) with open(jsonfile, 'w') as json_fid: json.dump(data, json_fid, indent=4) # Catch magnitude2 files produced by dcm2niix (i.e. magnitude1 & magnitude2 both in the same seriesfolder) if jsonfile.endswith('magnitude1.json'): jsonfile2 = jsonfile.rsplit('1.json', 1)[0] + '2.json' if os.path.isfile(jsonfile2): with open(jsonfile2, 'r') as json_fid: data = json.load(json_fid) if 'IntendedFor' not in data: data['IntendedFor'] = niifiles logger.info('Adding IntendedFor to: ' + jsonfile2) with open(jsonfile2, 'w') as json_fid: json.dump(data, json_fid, indent=4) # Collect personal data from the DICOM header dicomfile = bids.get_dicomfile(series) personals['participant_id'] = subid if sesid: personals['session_id'] = sesid personals['age'] = bids.get_dicomfield('PatientAge', dicomfile) personals['sex'] = bids.get_dicomfield('PatientSex', dicomfile) personals['size'] = bids.get_dicomfield('PatientSize', dicomfile) personals['weight'] = bids.get_dicomfield('PatientWeight', dicomfile)
def coin_data2bids(dataformat: str, session: Path, bidsmap: dict, bidsfolder: Path, personals: dict, subprefix: str, sesprefix: str) -> None: """ Converts the session source-files into BIDS-valid nifti-files in the corresponding bidsfolder and extracts personals (e.g. Age, Sex) from the source header :param dataformat: The format of the raw input data that is to be coined (e.g. 'DICOM' or 'PAR', see bids.get_dataformat) :param session: The full-path name of the subject/session source file/folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsfolder: The full-path name of the BIDS root-folder :param personals: The dictionary with the personal information :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :return: Nothing """ # Get valid BIDS subject/session identifiers from the (first) DICOM- or PAR/XML source file if dataformat == 'DICOM': sourcefile = Path() sources = bids.lsdirs(session) for source in sources: sourcefile = bids.get_dicomfile(source) manufacturer = bids.get_dicomfield('Manufacturer', sourcefile) if sourcefile.name: break elif dataformat == 'PAR': sources = bids.get_parfiles(session) manufacturer = 'Philips Medical Systems' if sources: sourcefile = sources[0] else: LOGGER.error( f"Unsupported data format: {dataformat}\nPlease report this bug") return if not sources: LOGGER.info(f"No data found for: {session}") return subid, sesid = bids.get_subid_sesid(sourcefile, bidsmap[dataformat]['subject'], bidsmap[dataformat]['session'], subprefix, sesprefix) if subid == subprefix: LOGGER.error(f"No valid subject identifier found for: {session}") return # Create the BIDS session-folder and a scans.tsv file bidsses = bidsfolder / subid / sesid if bidsses.is_dir(): LOGGER.warning( f"Existing BIDS output-directory found, which may result in duplicate data (with increased run-index). Make sure {bidsses} was cleaned-up from old data before (re)running the bidscoiner" ) bidsses.mkdir(parents=True, exist_ok=True) scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv" if scans_tsv.is_file(): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') else: scans_table = pd.DataFrame(columns=['acq_time'], dtype='str') scans_table.index.name = 'filename' # Process all the source files or run subfolders for source in sources: # Get a source-file if dataformat == 'DICOM': sourcefile = bids.get_dicomfile(source) elif dataformat == 'PAR': sourcefile = source if not sourcefile.name: continue # Get a matching run from the bidsmap run, datatype, index = bids.get_matching_run(sourcefile, bidsmap, dataformat) # Check if we should ignore this run if datatype == bids.ignoredatatype: LOGGER.info(f"Leaving out: {source}") continue # Check if we already know this run if index is None: LOGGER.error( f"Skipping unknown '{datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning" ) continue LOGGER.info(f"Processing: {source}") # Create the BIDS session/datatype output folder if run['bids']['suffix'] in bids.get_derivatives(datatype): outfolder = bidsfolder / 'derivatives' / manufacturer.replace( ' ', '') / subid / sesid / datatype else: outfolder = bidsses / datatype outfolder.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run bidsname = bids.get_bidsname(subid, sesid, run) runindex = run['bids'].get('run', '') if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.increment_runindex(outfolder, bidsname) jsonfiles = [ (outfolder / bidsname).with_suffix('.json') ] # List -> Collect the associated json-files (for updating them later) -- possibly > 1 # Check if file already exists (-> e.g. when a static runindex is used) if (outfolder / bidsname).with_suffix('.json').is_file(): LOGGER.warning( f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!" ) for ext in ('.nii.gz', '.nii', '.json', '.bval', '.bvec', 'tsv.gz'): (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True) # Convert physiological log files (dcm2niix can't handle these) if run['bids']['suffix'] == 'physio': if bids.get_dicomfile(source, 2).name: LOGGER.warning( f"Found > 1 DICOM file in {source}, using: {sourcefile}") physiodata = physio.readphysio(sourcefile) physio.physio2tsv(physiodata, outfolder / bidsname) # Convert the source-files in the run folder to nifti's in the BIDS-folder else: command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{source}"'.format( path=bidsmap['Options']['dcm2niix']['path'], args=bidsmap['Options']['dcm2niix']['args'], filename=bidsname, outfolder=outfolder, source=source) if not bids.run_command(command): continue # Replace uncropped output image with the cropped one if '-x y' in bidsmap['Options']['dcm2niix']['args']: for dcm2niixfile in sorted( outfolder.glob(bidsname + '*_Crop_*')): # e.g. *_Crop_1.nii.gz ext = ''.join(dcm2niixfile.suffixes) newbidsfile = str(dcm2niixfile).rsplit(ext, 1)[0].rsplit( '_Crop_', 1)[0] + ext LOGGER.info( f"Found dcm2niix _Crop_ postfix, replacing original file\n{dcm2niixfile} ->\n{newbidsfile}" ) dcm2niixfile.replace(newbidsfile) # Rename all files that got additional postfixes from dcm2niix. See: https://github.com/rordenlab/dcm2niix/blob/master/FILENAMING.md dcm2niixpostfixes = ('_c', '_i', '_Eq', '_real', '_imaginary', '_MoCo', '_t', '_Tilt', '_e', '_ph') dcm2niixfiles = sorted( set([ dcm2niixfile for dcm2niixpostfix in dcm2niixpostfixes for dcm2niixfile in outfolder.glob( f"{bidsname}*{dcm2niixpostfix}*") ])) for dcm2niixfile in dcm2niixfiles: ext = ''.join(dcm2niixfile.suffixes) postfixes = str(dcm2niixfile).split(bidsname)[1].rsplit( ext)[0].split('_')[1:] newbidsname = dcm2niixfile.name # Strip the additional postfixes and assign them to bids entities in the for-loop below for postfix in postfixes: # dcm2niix postfixes _c%d, _e%d and _ph (and any combination of these in that order) are for multi-coil data, multi-echo data and phase data # Patch the echo entity in the newbidsname with the dcm2niix echo info # NB: We can't rely on the bids-entity info here because manufacturers can e.g. put multiple echos in one series / run-folder if postfix[0] == 'e' and bids.get_bidsvalue( newbidsname, 'echo' ): # NB: Check if postfix[0]=='e' uniquely refers to the right dcm2niixpostfix echonr = f"_{postfix}" # E.g. echonr='_e1' or echonr='_pha' for dcm2niixpostfix in dcm2niixpostfixes: echonr = echonr.replace( dcm2niixpostfix, '' ) # Strip the dcm2niixpostfix to keep the echonr info. E.g. [echonr='_e1' or echonr='_pha'] -> [echonr='1' or echonr='a'] if echonr.isalpha(): echonr = ord( echonr ) - 95 # dcm2niix adds an alphabetically ordered character if it outputs more than one image with the same name. Convert character to echo-number: '' -> 1, 'a'->2, etc elif not echonr: echonr = 1 newbidsname = bids.get_bidsvalue( newbidsname, 'echo', str(echonr) ) # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness # Patch fieldmap images (NB: datatype=='fmap' is too broad, see the fmap.yaml file) elif run['bids']['suffix'] in ('magnitude', 'magnitude1', 'magnitude2', 'phase1', 'phase2', 'phasediff', 'fieldmap'): if len(dcm2niixfiles) not in ( 0, 2, 4, 6, 8 ): # Phase / echo data may be stored in the same data source / run folder LOGGER.warning( f"Unknown fieldmap {outfolder/bidsname} for '{postfix}'" ) newbidsname = newbidsname.replace( '_fieldmap_ph', '_fieldmap') newbidsname = newbidsname.replace( '_magnitude_e1', '_magnitude') newbidsname = newbidsname.replace( '_magnitude_ph', '_fieldmap') newbidsname = newbidsname.replace( '_magnitude1_e1', '_magnitude1') newbidsname = newbidsname.replace( '_magnitude2_e1', '_magnitude1' ) # This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_magnitude1_e2', '_magnitude2') if len(dcm2niixfiles) == 8: newbidsname = newbidsname.replace( '_magnitude1_ph', '_phase1' ) # Two magnitude + 2 phase images in one folder / datasource else: newbidsname = newbidsname.replace( '_magnitude1_ph', '_phasediff' ) # One or two magnitude + 1 phasediff image newbidsname = newbidsname.replace( '_magnitude1a', '_magnitude2') newbidsname = newbidsname.replace( '_magnitude1_pha', '_phase2') newbidsname = newbidsname.replace( '_magnitude2_e2', '_magnitude2') newbidsname = newbidsname.replace( '_magnitude2_ph', '_phase2') newbidsname = newbidsname.replace( '_phase1_e1', '_phase1') newbidsname = newbidsname.replace( '_phase2_e1', '_phase1' ) # This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_phase1_ph', '_phase1') newbidsname = newbidsname.replace( '_phase1_e2', '_phase2') newbidsname = newbidsname.replace( '_phase2_e2', '_phase2') newbidsname = newbidsname.replace( '_phase2_ph', '_phase2') # Append the dcm2niix info to acq-label, may need to be improved / elaborated for future BIDS standards, supporting multi-coil data else: newbidsname = bids.get_bidsvalue( newbidsname, 'dummy', postfix) # Remove the added postfix from the new bidsname newbidsname = newbidsname.replace(f"_{postfix}_", '_') # If it is not last newbidsname = newbidsname.replace(f"_{postfix}.", '.') # If it is last # Save the file with a new name if runindex.startswith('<<') and runindex.endswith('>>'): newbidsname = bids.increment_runindex( outfolder, newbidsname, '' ) # Update the runindex now that the acq-label has changed newbidsfile = outfolder / newbidsname LOGGER.info( f"Found dcm2niix {postfixes} postfixes, renaming\n{dcm2niixfile} ->\n{newbidsfile}" ) if newbidsfile.is_file(): LOGGER.warning( f"Overwriting existing {newbidsfile} file -- check your results carefully!" ) dcm2niixfile.replace(newbidsfile) if ext == '.json': oldjsonfile = (outfolder / bidsname).with_suffix('.json') if oldjsonfile in jsonfiles and not oldjsonfile.is_file(): jsonfiles.remove( (outfolder / bidsname).with_suffix('.json')) jsonfiles.append(newbidsfile) # Loop over and adapt all the newly produced json files and write to the scans.tsv file (every nifti-file comes with a json-file) for jsonfile in sorted(set(jsonfiles)): # Check if dcm2niix behaved as expected if not jsonfile.is_file(): LOGGER.error( f"Unexpected file conversion result: {jsonfile} not found") continue # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans) if datatype == 'dwi': bvecfile = jsonfile.with_suffix('.bvec') bvalfile = jsonfile.with_suffix('.bval') if not bvecfile.is_file(): LOGGER.info(f"Adding dummy bvec file: {bvecfile}") with bvecfile.open('w') as bvec_fid: bvec_fid.write('0\n0\n0\n') if not bvalfile.is_file(): LOGGER.info(f"Adding dummy bval file: {bvalfile}") with bvalfile.open('w') as bval_fid: bval_fid.write('0\n') # Add the TaskName to the func json-file elif datatype == 'func': with jsonfile.open('r') as json_fid: data = json.load(json_fid) if not 'TaskName' in data: LOGGER.info(f"Adding TaskName to: {jsonfile}") data['TaskName'] = run['bids']['task'] with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Parse the acquisition time from the json file or else from the source header (NB: assuming the source file represents the first acquisition) niifile = list( jsonfile.parent.glob(jsonfile.stem + '.nii*') ) # Find the corresponding nifti file (there should be only one, let's not make assumptions about the .gz extension) if niifile and datatype not in bidsmap['Options']['bidscoin'][ 'bidsignore'] and not run['bids'][ 'suffix'] in bids.get_derivatives(datatype): with jsonfile.open('r') as json_fid: data = json.load(json_fid) if 'AcquisitionTime' not in data or not data['AcquisitionTime']: data['AcquisitionTime'] = bids.get_sourcefield( 'AcquisitionTime', sourcefile) # DICOM if not data['AcquisitionTime']: data['AcquisitionTime'] = bids.get_sourcefield( 'exam_date', sourcefile) # PAR/XML try: acq_time = dateutil.parser.parse(data['AcquisitionTime']) except: LOGGER.warning( f"Could not parse the acquisition time from: '{data['AcquisitionTime']}' in {sourcefile}" ) acq_time = dateutil.parser.parse('00:00:00') scanpath = niifile[0].relative_to(bidsses) scans_table.loc[ scanpath.as_posix(), 'acq_time'] = '1925-01-01T' + acq_time.strftime('%H:%M:%S') # Write the scans_table to disk LOGGER.info(f"Writing acquisition time data to: {scans_tsv}") scans_table.sort_values(by=['acq_time', 'filename'], inplace=True) scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8') # Add IntendedFor and TE1+TE2 meta-data to the fieldmap json-files. This has been postponed untill all datatypes have been processed (i.e. so that all target images are indeed on disk) if bidsmap[dataformat]['fmap'] is not None: for fieldmap in bidsmap[dataformat]['fmap']: bidsname = bids.get_bidsname(subid, sesid, fieldmap) niifiles = [] intendedfor = fieldmap['bids']['IntendedFor'] # Search for the imaging files that match the IntendedFor search criteria if intendedfor: if intendedfor.startswith('<<') and intendedfor.endswith('>>'): intendedfor = intendedfor[2:-2].split('><') elif not isinstance(intendedfor, list): intendedfor = [intendedfor] for selector in intendedfor: niifiles.extend( [ Path(niifile).relative_to(bidsfolder / subid) for niifile in sorted( bidsses.rglob(f"*{selector}*.nii*")) if selector ] ) # Search in all runs using a relative path to the subject folder else: intendedfor = [] # Get the set of json-files (account for multiple runs in one data source and dcm2niix postfixes inserted into the acquisition label) jsonfiles = [] acqlabel = bids.get_bidsvalue(bidsname, 'acq') patterns = (bidsname.replace('_run-1_', '_run-[0-9]*_').replace( '_magnitude1', '_magnitude*').replace('_magnitude2', '_magnitude*').replace( '_phase1', '_phase*').replace('_phase2', '_phase*'), bidsname.replace('_run-1_', '_run-[0-9]*_').replace( '_magnitude1', '_phase*').replace('_magnitude2', '_phase*')) for pattern in patterns: jsonfiles.extend((bidsses / 'fmap').glob(pattern + '.json')) if acqlabel: cepattern = bids.get_bidsvalue(pattern, 'acq', acqlabel + '[CE][0-9]*') jsonfiles.extend( list((bidsses / 'fmap').glob(cepattern + '.json'))) # Save the meta-data in the jsonfiles for jsonfile in sorted(set(jsonfiles)): # Add the IntendedFor data with jsonfile.open('r') as json_fid: data = json.load(json_fid) if 'IntendedFor' not in data: if niifiles: LOGGER.info(f"Adding IntendedFor to: {jsonfile}") elif intendedfor: LOGGER.warning( f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the search for {intendedfor} gave no results" ) else: LOGGER.warning( f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the IntendedFor value of the bidsmap entry was empty" ) data['IntendedFor'] = [ niifile.as_posix() for niifile in niifiles ] # The path needs to use forward slashes instead of backward slashes with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Extract the echo times from magnitude1 and magnitude2 and add them to the phasediff json-file if jsonfile.name.endswith('phasediff.json'): json_magnitude = [None, None] TE = [None, None] for n in (0, 1): json_magnitude[ n] = jsonfile.parent / jsonfile.name.replace( '_phasediff', f"_magnitude{n+1}") if not json_magnitude[n].is_file(): LOGGER.error( f"Could not find expected magnitude{n+1} image associated with: {jsonfile}" ) else: with json_magnitude[n].open('r') as json_fid: data = json.load(json_fid) TE[n] = data['EchoTime'] if None in TE: LOGGER.error( f"Cannot find and add valid EchoTime1={TE[0]} and EchoTime2={TE[1]} data to: {jsonfile}" ) elif TE[0] > TE[1]: LOGGER.error( f"Found invalid EchoTime1={TE[0]} > EchoTime2={TE[1]} for: {jsonfile}" ) else: with jsonfile.open('r') as json_fid: data = json.load(json_fid) data['EchoTime1'] = TE[0] data['EchoTime2'] = TE[1] LOGGER.info( f"Adding EchoTime1: {TE[0]} and EchoTime2: {TE[1]} to {jsonfile}" ) with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Collect personal data from a source header (PAR/XML does not contain personal info) if dataformat == 'DICOM' and sourcefile.name: personals['participant_id'] = subid if sesid: if 'session_id' not in personals: personals['session_id'] = sesid else: return # Only take data from the first session -> BIDS specification age = bids.get_dicomfield( 'PatientAge', sourcefile ) # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY if age.endswith('D'): personals['age'] = str(int(float(age.rstrip('D')) / 365.2524)) elif age.endswith('W'): personals['age'] = str(int(float(age.rstrip('W')) / 52.1775)) elif age.endswith('M'): personals['age'] = str(int(float(age.rstrip('M')) / 12)) elif age.endswith('Y'): personals['age'] = str(int(float(age.rstrip('Y')))) elif age: personals['age'] = age personals['sex'] = bids.get_dicomfield('PatientSex', sourcefile) personals['size'] = bids.get_dicomfield('PatientSize', sourcefile) personals['weight'] = bids.get_dicomfield('PatientWeight', sourcefile)
def coin_dicom(session: Path, bidsmap: dict, bidsfolder: Path, personals: dict, subprefix: str, sesprefix: str) -> None: """ Converts the session dicom-files into BIDS-valid nifti-files in the corresponding bidsfolder and extracts personals (e.g. Age, Sex) from the dicom header :param session: The full-path name of the subject/session source folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsfolder: The full-path name of the BIDS root-folder :param personals: The dictionary with the personal information :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :return: Nothing """ if not bids.lsdirs(session): LOGGER.warning(f"No run subfolder(s) found in: {session}") return TE = [None, None] # Get valid BIDS subject/session identifiers from the (first) dicom-header or from the session source folder subid, sesid = bids.get_subid_sesid( bids.get_dicomfile(bids.lsdirs(session)[0]), bidsmap['DICOM']['subject'], bidsmap['DICOM']['session'], subprefix, sesprefix) if subid == subprefix: LOGGER.error(f"No valid subject identifier found for: {session}") return # Create the BIDS session-folder and a scans.tsv file bidsses = bidsfolder / subid / sesid if bidsses.is_dir(): LOGGER.warning( f"Existing BIDS output-directory found, which may result in duplicate data (with increased run-index). Make sure {bidsses} was cleaned-up from old data before (re)running the bidscoiner" ) bidsses.mkdir(parents=True, exist_ok=True) scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv" if scans_tsv.is_file(): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') else: scans_table = pd.DataFrame(columns=['acq_time'], dtype='str') scans_table.index.name = 'filename' # Process all the dicom run subfolders for runfolder in bids.lsdirs(session): # Get a dicom-file dicomfile = bids.get_dicomfile(runfolder) if not dicomfile.name: continue # Get a matching run from the bidsmap run, modality, index = bids.get_matching_run(dicomfile, bidsmap) # Check if we should ignore this run if modality == bids.ignoremodality: LOGGER.info(f"Leaving out: {runfolder}") continue # Check if we already know this run if index is None: LOGGER.warning( f"Skipping unknown '{modality}': {dicomfile}\n-> re-run the bidsmapper and delete {session} to solve this warning" ) continue LOGGER.info(f"Processing: {runfolder}") # Create the BIDS session/modality folder bidsmodality = bidsses / modality bidsmodality.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run bidsname = bids.get_bidsname(subid, sesid, modality, run) runindex = run['bids']['run'] if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.increment_runindex(bidsmodality, bidsname) # Check if file already exists (-> e.g. when a static runindex is used) if (bidsmodality / bidsname).with_suffix('.json').is_file(): LOGGER.warning( f"{bidsmodality/bidsname}.* already exists -- check your results carefully!" ) # Convert the dicom-files in the run folder to nifti's in the BIDS-folder command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{infolder}"'.format( path=bidsmap['Options']['dcm2niix']['path'], args=bidsmap['Options']['dcm2niix']['args'], filename=bidsname, outfolder=bidsmodality, infolder=runfolder) if not bids.run_command(command): continue # Replace uncropped output image with the cropped one if '-x y' in bidsmap['Options']['dcm2niix']['args']: for filename in sorted( bidsmodality.glob(bidsname + '*_Crop_*')): # e.g. *_Crop_1.nii.gz ext = ''.join(filename.suffixes) newfilename = str(filename).rsplit(ext, 1)[0].rsplit( '_Crop_', 1)[0] + ext LOGGER.info( f"Found dcm2niix _Crop_ suffix, replacing original file\n{filename} ->\n{newfilename}" ) filename.replace(newfilename) # Rename all files ending with _c%d, _e%d and _ph (and any combination of these): These are produced by dcm2niix for multi-coil data, multi-echo data and phase data, respectively jsonfiles = [ ] # Collect the associated json-files (for updating them later) -- possibly > 1 for dcm2niisuffix in ('_c', '_e', '_ph', '_i'): for filename in sorted( bidsmodality.glob(bidsname + dcm2niisuffix + '*')): ext = ''.join(filename.suffixes) basepath, index = str(filename).rsplit(ext, 1)[0].rsplit( dcm2niisuffix, 1 ) # basepath = the name without the added stuff (i.e. bidsmodality/bidsname), index = added dcm2niix index (e.g. _c1 -> index=1) basesuffix = basepath.rsplit( '_', 1 )[1] # The BIDS suffix, e.g. basepath = *_magnitude1 -> basesuffix=magnitude1 index = index.split('_')[0].zfill( 2 ) # Zero padd as specified in the BIDS-standard (assuming two digits is sufficient); strip following suffices (fieldmaps produce *_e2_ph files) # This is a special hack: dcm2niix does not always add a _c/_e suffix for the first(?) coil/echo image -> add it when we encounter a **_e2/_c2 file if dcm2niisuffix in ( '_c', '_e' ) and int(index) == 2 and basesuffix not in [ 'magnitude1', 'phase1' ]: # For fieldmaps: *_magnitude1_e[index] -> *_magnitude[index] (This is handled below) filename_ce = Path( basepath + ext) # The file without the _c1/_e1 suffix if dcm2niisuffix == '_e' and bids.get_bidsvalue( basepath, 'echo'): newbasepath_ce = Path( bids.get_bidsvalue(basepath, 'echo', '1')) else: newbasepath_ce = Path( bids.get_bidsvalue( basepath, 'dummy', dcm2niisuffix.upper() + '1'.zfill(len(index))) ) # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data newfilename_ce = newbasepath_ce.with_suffix( ext) # The file as it should have been if filename_ce.is_file(): if filename_ce != newfilename_ce: LOGGER.info( f"Found no dcm2niix {dcm2niisuffix} suffix for image instance 1, renaming\n{filename_ce} ->\n{newfilename_ce}" ) filename_ce.replace(newfilename_ce) if ext == '.json': jsonfiles.append( newbasepath_ce.with_suffix('.json')) # Patch the basepath with the dcm2niix suffix info (we can't rely on the basepath info here because Siemens can e.g. put multiple echos in one series / run-folder) if dcm2niisuffix == '_e' and bids.get_bidsvalue( basepath, 'echo') and index: basepath = bids.get_bidsvalue( basepath, 'echo', str(int(index)) ) # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness elif dcm2niisuffix == '_e' and basesuffix in ( 'magnitude1', 'magnitude2') and index: # i.e. modality == 'fmap' basepath = basepath[0:-1] + str( int(index) ) # basepath: *_magnitude1_e[index] -> *_magnitude[index] # Collect the echo times that need to be added to the json-file (see below) if filename.suffix == '.json': with filename.open('r') as json_fid: data = json.load(json_fid) TE[int(index) - 1] = data['EchoTime'] LOGGER.info( f"Collected EchoTime{index} = {data['EchoTime']} from: {filename}" ) elif dcm2niisuffix == '_e' and basesuffix == 'phasediff' and index: # i.e. modality == 'fmap' pass elif dcm2niisuffix == '_e' and basesuffix in [ 'phase1', 'phase2' ] and index: # i.e. modality == 'fmap' basepath = basepath[0:-1] + str( int(index) ) # basepath: *_phase1_e[index]_ph -> *_phase[index] else: basepath = bids.get_bidsvalue( basepath, 'dummy', dcm2niisuffix.upper() + index ) # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data # Save the file with a new name newbidsname = str(Path(basepath).name) if runindex.startswith('<<') and runindex.endswith('>>'): newbidsname = bids.increment_runindex( bidsmodality, newbidsname, ext ) # Update the runindex now that the acq-label has changed newfilename = (bidsmodality / newbidsname).with_suffix(ext) LOGGER.info( f"Found dcm2niix {dcm2niisuffix} suffix, renaming\n{filename} ->\n{newfilename}" ) filename.replace(newfilename) if ext == '.json': jsonfiles.append( (bidsmodality / newbidsname).with_suffix('.json')) # Loop over and adapt all the newly produced json files and write to the scans.tsv file (every nifti-file comes with a json-file) if not jsonfiles: jsonfiles = [(bidsmodality / bidsname).with_suffix('.json')] for jsonfile in set(jsonfiles): # Check if dcm2niix behaved as expected if not jsonfile.is_file(): LOGGER.error( f"Unexpected file conversion result: {jsonfile} not found") continue # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans) if modality == 'dwi': bvecfile = jsonfile.with_suffix('.bvec') bvalfile = jsonfile.with_suffix('.bval') if not bvecfile.is_file(): LOGGER.info(f"Adding dummy bvec file: {bvecfile}") with bvecfile.open('w') as bvec_fid: bvec_fid.write('0\n0\n0\n') if not bvalfile.is_file(): LOGGER.info(f"Adding dummy bval file: {bvalfile}") with bvalfile.open('w') as bval_fid: bval_fid.write('0\n') # Add the TaskName to the func json-file elif modality == 'func': with jsonfile.open('r') as json_fid: data = json.load(json_fid) if not 'TaskName' in data: LOGGER.info(f"Adding TaskName to: {jsonfile}") data['TaskName'] = run['bids']['task'] with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Add the EchoTime(s) used to create the difference image to the fmap json-file. NB: This assumes the magnitude runs have already been parsed (i.e. their nifti's had an _e suffix) -- This is normally the case for Siemens (phase-runs being saved after the magnitude runs elif modality == 'fmap': if run['bids']['suffix'] == 'phasediff': LOGGER.info( f"Adding EchoTime1: {TE[0]} and EchoTime2: {TE[1]} to {jsonfile}" ) if TE[0] is None or TE[1] is None: LOGGER.warning( f"Missing Echo-Time data for: {jsonfile}") elif TE[0] > TE[1]: LOGGER.warning( f"EchoTime1 > EchoTime2 for: {jsonfile}") with jsonfile.open('r') as json_fid: data = json.load(json_fid) data['EchoTime1'] = TE[0] data['EchoTime2'] = TE[1] with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Parse the acquisition time from the json file or else from the dicom header (NB: assuming the dicom file represents the first aqcuisition) with jsonfile.open('r') as json_fid: data = json.load(json_fid) if 'AcquisitionTime' not in data: data['AcquisitionTime'] = bids.get_dicomfield( 'AcquisitionTime', dicomfile) acq_time = dateutil.parser.parse(data['AcquisitionTime']) scanpath = list( jsonfile.parent.glob(jsonfile.stem + '.nii*') )[0].relative_to( bidsses ) # Find the corresponding nifti file (there should be only one, let's not make assumptions about the .gz extension) scans_table.loc[ scanpath.as_posix(), 'acq_time'] = '1900-01-01T' + acq_time.strftime('%H:%M:%S') # Write the scans_table to disk LOGGER.info(f"Writing acquisition time data to: {scans_tsv}") scans_table.sort_values(by=['acq_time', 'filename'], inplace=True) scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8') # Search for the IntendedFor images and add them to the json-files. This has been postponed untill all modalities have been processed (i.e. so that all target images are indeed on disk) if bidsmap['DICOM']['fmap'] is not None: for fieldmap in bidsmap['DICOM']['fmap']: bidsname = bids.get_bidsname(subid, sesid, 'fmap', fieldmap) niifiles = [] intendedfor = fieldmap['bids']['IntendedFor'] # Search for the imaging files that match the IntendedFor search criteria if intendedfor: if intendedfor.startswith('<<') and intendedfor.endswith('>>'): intendedfor = intendedfor[2:-2].split('><') elif not isinstance(intendedfor, list): intendedfor = [intendedfor] for selector in intendedfor: niifiles.extend( [ Path(niifile).relative_to(bidsfolder / subid) for niifile in sorted( bidsses.rglob(f"*{selector}*.nii*")) if selector ] ) # Search in all runs using a relative path to the subject folder else: intendedfor = [] # Save the IntendedFor data in the json-files (account for multiple runs and dcm2niix suffixes inserted into the acquisition label) acqlabel = bids.get_bidsvalue(bidsname, 'acq') for jsonfile in list((bidsses/'fmap').glob(bidsname.replace('_run-1_', '_run-[0-9]*_') + '.json')) + \ list((bidsses/'fmap').glob(bidsname.replace('_run-1_', '_run-[0-9]*_').replace(acqlabel, acqlabel+'[CE][0-9]*') + '.json')): if niifiles: LOGGER.info(f"Adding IntendedFor to: {jsonfile}") elif intendedfor: LOGGER.warning( f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the search for {intendedfor} gave no results" ) else: LOGGER.warning( f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the IntendedFor value of the bidsmap entry was empty" ) with jsonfile.open('r') as json_fid: data = json.load(json_fid) data['IntendedFor'] = [ niifile.as_posix() for niifile in niifiles ] # The path needs to use forward slashes instead of backward slashes with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Catch magnitude2 and phase2 files produced by dcm2niix (i.e. magnitude1 & magnitude2 both in the same runfolder) if jsonfile.name.endswith( 'magnitude1.json') or jsonfile.name.endswith( 'phase1.json'): jsonfile2 = jsonfile.with_name( jsonfile.name.rsplit('1.json', 1)[0] + '2.json') if jsonfile2.is_file(): with jsonfile2.open('r') as json_fid: data = json.load(json_fid) if 'IntendedFor' not in data: if niifiles: LOGGER.info( f"Adding IntendedFor to: {jsonfile2}") else: LOGGER.warning( f"Empty 'IntendedFor' fieldmap value in {jsonfile2}: the search for {intendedfor} gave no results" ) data['IntendedFor'] = [ niifile.as_posix() for niifile in niifiles ] # The path needs to use forward slashes instead of backward slashes with jsonfile2.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Collect personal data from the DICOM header: only from the first session (-> BIDS specification) if 'runfolder' in locals(): dicomfile = bids.get_dicomfile(runfolder) personals['participant_id'] = subid if sesid: if 'session_id' not in personals: personals['session_id'] = sesid else: return age = bids.get_dicomfield( 'PatientAge', dicomfile ) # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY if age.endswith('D'): personals['age'] = str(int(float(age.rstrip('D')) / 365.2524)) elif age.endswith('W'): personals['age'] = str(int(float(age.rstrip('W')) / 52.1775)) elif age.endswith('M'): personals['age'] = str(int(float(age.rstrip('M')) / 12)) elif age.endswith('Y'): personals['age'] = str(int(float(age.rstrip('Y')))) elif age: personals['age'] = age personals['sex'] = bids.get_dicomfield('PatientSex', dicomfile) personals['size'] = bids.get_dicomfield('PatientSize', dicomfile) personals['weight'] = bids.get_dicomfield('PatientWeight', dicomfile)
def bidsmapper(rawfolder, bidsfolder, bidsmapfile='bidsmap_sample.yaml', automatic=False): """ Main function that processes all the subjects and session in the rawfolder and that generates a maximally filled-in bidsmap.yaml file in bidsfolder/code. Folders in rawfolder are assumed to contain a single dataset. :param str rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param str bidsfolder: The name of the BIDS root folder :param str bidsmapfile: The name of the bidsmap YAML-file :param bool automatic: If True, the user will not be asked for help if an unknown series is encountered :return: str bidsmapfile: The name of the mapped bidsmap YAML-file :rtype: str """ # Input checking rawfolder = os.path.abspath(os.path.expanduser(rawfolder)) bidsfolder = os.path.abspath(os.path.expanduser(bidsfolder)) # Get the heuristics for creating the bidsmap heuristics = bids.get_heuristics(bidsmapfile, os.path.join(bidsfolder,'code')) # Create a copy / bidsmap skeleton with no modality entries (i.e. bidsmap with empty lists) bidsmap = copy.deepcopy(heuristics) for logic in ('DICOM', 'PAR', 'P7', 'Nifti', 'FileSystem'): for modality in bids.bidsmodalities + (bids.unknownmodality,): if bidsmap[logic] and modality in bidsmap[logic]: bidsmap[logic][modality] = None # Loop over all subjects and sessions and built up the bidsmap entries subjects = bids.lsdirs(rawfolder, 'sub-*') for subject in subjects: sessions = bids.lsdirs(subject, 'ses-*') if not sessions: sessions = subject for session in sessions: print('Parsing: ' + session) for series in bids.lsdirs(session): # Update / append the dicom mapping if heuristics['DICOM']: dicomfile = bids.get_dicomfile(series) bidsmap = built_dicommap(dicomfile, bidsmap, heuristics, automatic) # Update / append the PAR/REC mapping if heuristics['PAR']: parfile = bids.get_parfile(series) bidsmap = built_parmap(parfile, bidsmap, heuristics, automatic) # Update / append the P7 mapping if heuristics['P7']: p7file = bids.get_p7file(series) bidsmap = built_p7map(p7file, bidsmap, heuristics, automatic) # Update / append the nifti mapping if heuristics['Nifti']: niftifile = bids.get_niftifile(series) bidsmap = built_niftimap(niftifile, bidsmap, heuristics, automatic) # Update / append the file-system mapping if heuristics['FileSystem']: bidsmap = built_filesystemmap(series, bidsmap, heuristics, automatic) # Update / append the plugin mapping if heuristics['PlugIn']: bidsmap = built_pluginmap(series, bidsmap) # Create the bidsmap YAML-file in bidsfolder/code os.makedirs(os.path.join(bidsfolder,'code'), exist_ok=True) bidsmapfile = os.path.join(bidsfolder,'code','bidsmap.yaml') # Save the bidsmap to the bidsmap YAML-file print('Writing bidsmap to: ' + bidsmapfile) with open(bidsmapfile, 'w') as stream: yaml.dump(bidsmap, stream) return bidsmapfile
def coin_dicom(session, bidsmap, bidsfolder, personals): """ Converts the session dicom-files into BIDS-valid nifti-files in the corresponding bidsfolder and extracts personals (e.g. Age, Sex) from the dicom header :param str session: The full-path name of the subject/session source folder :param dict bidsmap: The full mapping heuristics from the bidsmap YAML-file :param str bidsfolder: The full-path name of the BIDS root-folder :param dict personals: The dictionary with the personal information :return: Nothing :rtype: NoneType """ global logfile TE = [None, None] # Get a valid BIDS subject identifier from the (first) dicom-header or from the session source folder if bidsmap['DICOM']['participant_label'] and bidsmap['DICOM'][ 'participant_label'].startswith( '<<') and bidsmap['DICOM']['participant_label'].endswith('>>'): subid = 'sub-' + bids.get_dicomfield( bidsmap['DICOM']['participant_label'][2:-2], bids.get_dicomfile(bids.lsdirs(session)[0])) elif bidsmap['DICOM']['participant_label']: subid = 'sub-' + bidsmap['DICOM']['participant_label'] else: subid = 'sub-' + session.rsplit(os.sep + 'sub-', 1)[1].split( os.sep + 'ses-', 1)[0] if subid == 'sub-': bids.printlog( 'Error: No valid subject identifier found for: ' + session, logfile) return # Get a BIDS session identifier from the (first) dicom-header or from the session source folder if bidsmap['DICOM']['session_label'] and bidsmap['DICOM'][ 'session_label'].startswith( '<<') and bidsmap['DICOM']['session_label'].endswith('>>'): sesid = 'ses-' + bids.get_dicomfield( bidsmap['DICOM']['session_label'][2:-2], bids.get_dicomfile(bids.lsdirs(session)[0])) elif bidsmap['DICOM']['session_label']: sesid = 'ses-' + bidsmap['DICOM']['session_label'] elif os.sep + 'ses-' in session: sesid = 'ses-' + session.rsplit(os.sep + 'ses-')[1] else: sesid = '' # Create the BIDS session-folder bidsses = os.path.join( bidsfolder, subid, sesid ) # NB: This gives a trailing '/' if ses=='', but that should be ok os.makedirs(bidsses, exist_ok=True) # Process all the dicom series subfolders for series in bids.lsdirs(session): bids.printlog('Processing dicom-folder: ' + series, logfile) # Get the cleaned-up bids labels from a dicom-file and bidsmap dicomfile = bids.get_dicomfile(series) result = bids.get_matching_dicomseries(dicomfile, bidsmap) series_ = result['series'] modality = result['modality'] # Create the BIDS session/modality folder bidsmodality = os.path.join(bidsses, modality) os.makedirs(bidsmodality, exist_ok=True) # Compose the BIDS filename using the bids labels and run-index runindex = series_['run_index'] if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.get_bidsname(subid, sesid, modality, series_, runindex[2:-2]) bidsname = bids.increment_runindex(bidsmodality, bidsname) else: bidsname = bids.get_bidsname(subid, sesid, modality, series_, runindex) # Convert the dicom-files in the series folder to nifti's in the BIDS-folder command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{infolder}"'.format( path=bidsmap['Options']['dcm2niix']['path'], args=bidsmap['Options']['dcm2niix']['args'], filename=bidsname, outfolder=bidsmodality, infolder=series) bids.printlog('$ ' + command, logfile) process = subprocess.run( command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT ) # TODO: investigate shell=False and capture_output=True bids.printlog(process.stdout.decode('utf-8'), logfile) if process.returncode != 0: errormsg = 'Error: Failed to process {} (errorcode {})'.format( series, process.returncode) bids.printlog(errormsg, logfile) continue # Rename all files ending with _c%d, _e%d and _ph: These are produced by dcm2niix for multi-coil data, multi-echo data and phase data, respectively jsonfiles = [ ] # Collect the associated json-files (for updating them later) for suffix in ('_c', '_e', '_ph'): for filename in sorted( glob.glob( os.path.join(bidsmodality, bidsname + suffix + '*'))): basepath, ext1 = os.path.splitext(filename) basepath, ext2 = os.path.splitext( basepath) # Account for .nii.gz files basepath, index = basepath.rsplit(suffix, 1) if suffix == '_e' and bids.set_bidslabel(basepath, 'echo') and index: basepath = bids.set_bidslabel(basepath, 'echo', index) elif suffix == '_e' and basepath.rsplit('_', 1)[1] in [ 'magnitude1', 'magnitude2' ] and index: # i.e. modality == 'fmap' basepath = basepath[0:-1] + index # Read the echo times that need to be added to the json-file (see below) if os.path.splitext(filename)[1] == '.json': with open(filename, 'r') as json_fid: data = json.load(json_fid) TE[int(index) - 1] = data['EchoTime'] bids.printlog( 'Reading EchoTime{} = {} from: {}'.format( index, data['EchoTime'], filename), logfile) elif suffix == '_e' and basepath.rsplit( '_', 1 )[1] == 'phasediff' and index: # i.e. modality == 'fmap' pass elif suffix == '_ph' and basepath.rsplit('_', 1)[1] in [ 'phase1', 'phase2' ] and index: # i.e. modality == 'fmap' (TODO: untested) basepath = basepath[0:-1] + index bids.printlog( 'WARNING: Untested dcm2niix "_ph"-filetype: ' + basepath, logfile) else: basepath = bids.set_bidslabel( basepath, 'dummy', suffix.upper() + index ) # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data if runindex.startswith('<<') and runindex.endswith('>>'): newbidsname = bids.increment_runindex( bidsmodality, os.path.basename(basepath), ext2 + ext1 ) # Update the runindex now that the acq-label has changed else: newbidsname = os.path.basename(basepath) newfilename = os.path.join(bidsmodality, newbidsname + ext2 + ext1) bids.printlog( 'Found dcm2niix {} suffix, renaming\n{} ->\n{}'.format( suffix, filename, newfilename), logfile) os.rename(filename, newfilename) if ext1 == '.json': jsonfiles.append( os.path.join(bidsmodality, newbidsname + '.json')) # Loop over and adapt all the newly produced json files (every nifti file comes with a json file) if not jsonfiles: jsonfiles = [os.path.join(bidsmodality, bidsname + '.json')] for jsonfile in jsonfiles: # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans) if modality == 'dwi': bvecfile = os.path.splitext(jsonfile)[0] + '.bvec' bvalfile = os.path.splitext(jsonfile)[0] + '.bval' if not os.path.isfile(bvecfile): with open(bvecfile, 'w') as bvec_fid: bids.printlog('Adding dummy bvec file: ' + bvecfile, logfile) bvec_fid.write('0\n0\n0\n') if not os.path.isfile(bvalfile): with open(bvalfile, 'w') as bval_fid: bids.printlog('Adding dummy bval file: ' + bvalfile, logfile) bval_fid.write('0\n') # Add the TaskName to the func json-file elif modality == 'func': with open(jsonfile, 'r') as json_fid: data = json.load(json_fid) if not 'TaskName' in data: bids.printlog('Adding TaskName to: ' + jsonfile, logfile) with open(jsonfile, 'w') as json_fid: data['TaskName'] = series_['task_label'] json.dump(data, json_fid, indent=4) # Add the EchoTime(s) used to create the difference image to the fmap json-file. NB: This assumes the magnitude series have already been parsed (i.e. their nifti's had an _e suffix) -- This is normally the case for Siemens (phase-series being saved after the magnitude series elif modality == 'fmap': if series_['suffix'] == 'phasediff': bids.printlog( 'Adding EchoTime1 and EchoTime2 to: ' + jsonfile, logfile) with open(jsonfile, 'r') as json_fid: data = json.load(json_fid) data['EchoTime1'] = TE[0] data['EchoTime2'] = TE[1] with open(jsonfile, 'w') as json_fid: json.dump(data, json_fid, indent=4) if TE[0] > TE[1]: bids.printlog( 'WARNING: EchoTime1 > EchoTime2 in: ' + jsonfile, logfile) # Search for the IntendedFor images and add them to the json-files. This has been postponed untill all modalities have been processed (i.e. so that all target images are indeed on disk) if bidsmap['DICOM']['fmap'] is not None: for fieldmap in bidsmap['DICOM']['fmap']: if 'IntendedFor' in fieldmap and fieldmap['IntendedFor']: jsonfile = os.path.join( bidsses, 'fmap', bids.get_bidsname(subid, sesid, 'fmap', fieldmap, '1') + '.json' ) # TODO: Assumes that there is only 1 fieldmap acquired for each bidsmap entry / series if not os.path.isfile(jsonfile): continue intendedfor = fieldmap['IntendedFor'] if intendedfor.startswith('<<') and intendedfor.endswith('>>'): intendedfor = intendedfor[2:-2].split('><') else: intendedfor = [intendedfor] with open(jsonfile, 'r') as json_fid: data = json.load(json_fid) niifiles = [ niifile.split(os.sep + subid + os.sep, 1)[1] for niifile in sorted( glob.glob( os.path.join( bidsses, '**' + os.sep + '*' + '*'.join(intendedfor) + '*.nii*'))) ] # Use a relative path data['IntendedFor'] = niifiles bids.printlog('Adding IntendedFor to: ' + jsonfile, logfile) with open(jsonfile, 'w') as json_fid: json.dump(data, json_fid, indent=4) # Catch magnitude2 files produced by dcm2niix if jsonfile.endswith('magnitude1.json'): jsonfile2 = jsonfile.rsplit('1.json', 1)[0] + '2.json' if os.path.isfile(jsonfile2): with open(jsonfile2, 'r') as json_fid: data = json.load(json_fid) data['IntendedFor'] = niifiles bids.printlog('Adding IntendedFor to: ' + jsonfile2, logfile) with open(jsonfile2, 'w') as json_fid: json.dump(data, json_fid, indent=4) # Collect personal data from the DICOM header dicomfile = bids.get_dicomfile(series) personals['participant_id'] = subid if sesid: personals[ 'session_id'] = sesid # TODO: Check if this can be in the participants.tsv file according to BIDS personals['age'] = bids.get_dicomfield('PatientAge', dicomfile) personals['sex'] = bids.get_dicomfield('PatientSex', dicomfile) personals['size'] = bids.get_dicomfield('PatientSize', dicomfile) personals['weight'] = bids.get_dicomfield('PatientWeight', dicomfile)
def rawmapper(rawfolder, outfolder: Path = Path(), sessions: list = [], rename: bool = False, dicomfield: tuple = ('PatientComments', ), wildcard: str = '*', subprefix: str = 'sub-', sesprefix: str = 'ses-', dryrun: bool = False) -> None: """ :param rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param outfolder: The name of the folder where the mapping-file is saved (default = sourcefolder) :param sessions: Space separated list of selected sub-#/ses-# names / folders to be processed. Otherwise all sessions in the bidsfolder will be selected :param rename: Flag for renaming the sub-subid folders to sub-dicomfield :param dicomfield: The names of the dicomfields that are mapped (/ renamed to sub-dcmval/ses-dcmval) :param wildcard: The Unix style pathname pattern expansion that is used by glob to select the series from which the dicomfield is being mapped :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :param dryrun: Flag for dry-running renaming the sub-subid folders :return: Nothing """ # Input checking rawfolder = Path(rawfolder) if not outfolder: outfolder = rawfolder print(f"Outfolder: {outfolder}") outfolder = Path(outfolder) # Create or append the output to a mapper logfile mapperfile = outfolder / f"rawmapper_{'_'.join(dicomfield)}.tsv" if not dryrun: if rename: with mapperfile.open('a') as fid: fid.write('subid\tsesid\tnewsubid\tnewsesid\n') else: with mapperfile.open('x') as fid: fid.write('subid\tsesid\tseriesname\t{}\n'.format( '\t'.join(dicomfield))) # Map the sessions in the sourcefolder if not sessions: sessions = list(rawfolder.glob(f"{subprefix}*/{sesprefix}*")) if not sessions: sessions = rawfolder.glob( f"{subprefix}*") # Try without session-subfolders else: sessions = [ sessionitem for session in sessions for sessionitem in rawfolder.rglob(session) ] # Loop over the selected sessions in the sourcefolder for session in sessions: # Get the subject and session identifiers from the raw folder subid, sesid = bids.get_subid_sesid(session) # Parse the new subject and session identifiers from the dicomfield series = bids.lsdirs(session, wildcard) if not series: series = '' dcmval = '' else: series = series[0] # TODO: loop over series? dcmval = '' for dcmfield in dicomfield: dcmval = dcmval + '/' + str( bids.get_dicomfield(dcmfield, bids.get_dicomfile(series))) dcmval = dcmval[1:] # Rename the session subfolder in the sourcefolder and print & save this info if rename: # Get the new subid and sesid if not dcmval or dcmval == 'None': warnings.warn( f"Skipping renaming because the dicom-field was empty for: {session}" ) continue else: if '/' in dcmval: # Allow for different sub/ses delimiters that could be entered at the console (i.e. in PatientComments) delim = '/' elif '\\' in dcmval: delim = '\\' else: delim = '\r\n' newsubsesid = [val for val in dcmval.split(delim) if val] # Skip empty lines / entries newsubid = subprefix + bids.cleanup_value( re.sub(f'^{subprefix}', '', newsubsesid[0])) if newsubid == subprefix or newsubid == subprefix + 'None': newsubid = subid warnings.warn( 'Could not rename {} because the dicom-field was empty for: {}' .format(subid, session)) if len(newsubsesid) == 1: newsesid = sesid elif len(newsubsesid) == 2: newsesid = sesprefix + bids.cleanup_value( re.sub(f'^{sesprefix}', '', newsubsesid[1])) else: warnings.warn( f"Skipping renaming of {session} because the dicom-field '{dcmval}' could not be parsed into [subid, sesid]" ) continue if newsesid == sesprefix or newsesid == subprefix + 'None': newsesid = sesid warnings.warn( f"Could not rename {sesid} because the dicom-field was empty for: {session}" ) # Save the dicomfield / sub-ses mapping to disk and rename the session subfolder (but skip if it already exists) newsession = rawfolder / newsubid / newsesid print(f"{session} -> {newsession}") if newsession == session: continue if newsession.is_dir(): warnings.warn( f"{newsession} already exists, skipping renaming of {session}" ) elif not dryrun: with mapperfile.open('a') as fid: fid.write(f"{subid}\t{sesid}\t{newsubid}\t{newsesid}\n") session.rename(newsession) # Print & save the dicom values else: print('{}/{}/{}\t-> {}'.format(subid, sesid, series.name, '\t'.join(dcmval.split('/')))) if not dryrun: with mapperfile.open('a') as fid: fid.write('{}\t{}\t{}\t{}\n'.format( subid, sesid, series.name, '\t'.join(dcmval.split('/'))))
def build_dicommap(runfolder: Path, bidsmap_new: dict, bidsmap_old: dict, template: dict, gui: object) -> dict: """ All the logic to map dicom-attributes (fields/tags) onto bids-labels go into this function :param runfolder: The full-path name of the series-folder containing source dicom-files :param bidsmap_new: The bidsmap that we are building :param bidsmap_old: Full BIDS heuristics data structure, with all options, BIDS labels and attributes, etc :param template: The bidsmap template with the default heuristics :param gui: If not None, the user will not be asked for help if an unknown run is encountered :return: The bidsmap with new entries in it """ # Input checks dicomfile = bids.get_dicomfile(runfolder) if not dicomfile.name or (not template['DICOM'] and not bidsmap_old['DICOM']): LOGGER.info('No DICOM information found in the bidsmap and template') return bidsmap_new # See if we can find a matching run in the old bidsmap run, modality, index = bids.get_matching_run(dicomfile, bidsmap_old) # If not, see if we can find a matching run in the template if index is None: run, modality, _ = bids.get_matching_run(dicomfile, template) # See if we have collected the run in our new bidsmap if not bids.exist_run(bidsmap_new, 'DICOM', '', run): # Copy the filled-in run over to the new bidsmap bidsmap_new = bids.append_run(bidsmap_new, 'DICOM', modality, run) # Communicate with the user if the run was not present in bidsmap_old or in template LOGGER.info(f"New '{modality}' sample found: {dicomfile}") # Launch a GUI to ask the user for help if the new run comes from the template (i.e. was not yet in the old bidsmap) if gui and gui.interactive == 2 and index is None: # Open the interactive edit window to get the new mapping dialog_edit = bidseditor.EditDialog(dicomfile, modality, bidsmap_new, template, gui.subprefix, gui.sesprefix) dialog_edit.exec() # Get the result if dialog_edit.result() == 1: # The user has finished the edit bidsmap_new = dialog_edit.target_bidsmap elif dialog_edit.result() in [ 0, 2 ]: # The user has canceled / aborted the edit answer = QMessageBox.question( None, 'BIDSmapper', 'Do you want to abort and quit the bidsmapper?', QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if answer == QMessageBox.No: pass if answer == QMessageBox.Yes: LOGGER.info('User has quit the bidsmapper') sys.exit() else: LOGGER.debug( f'Unexpected result {dialog_edit.result()} from the edit dialog' ) return bidsmap_new
def bidsmapper(rawfolder: str, bidsfolder: str, bidsmapfile: str, templatefile: str, subprefix: str = 'sub-', sesprefix: str = 'ses-', store: bool = False, interactive: bool = True) -> None: """ Main function that processes all the subjects and session in the sourcefolder and that generates a maximally filled-in bidsmap.yaml file in bidsfolder/code/bidscoin. Folders in sourcefolder are assumed to contain a single dataset. :param rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param bidsfolder: The name of the BIDS root folder :param bidsmapfile: The name of the bidsmap YAML-file :param templatefile: The name of the bidsmap template YAML-file :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :param store: If True, the provenance samples will be stored :param interactive: If True, the user will be asked for help if an unknown run is encountered :return:bidsmapfile: The name of the mapped bidsmap YAML-file """ # Input checking rawfolder = Path(rawfolder).resolve() bidsfolder = Path(bidsfolder).resolve() bidsmapfile = Path(bidsmapfile) templatefile = Path(templatefile) bidscoinfolder = bidsfolder / 'code' / 'bidscoin' # Start logging bids.setup_logging(bidscoinfolder / 'bidsmapper.log') LOGGER.info('') LOGGER.info('-------------- START BIDSmapper ------------') LOGGER.info( f">>> bidsmapper sourcefolder={rawfolder} bidsfolder={bidsfolder} bidsmap={bidsmapfile} " f" template={templatefile} subprefix={subprefix} sesprefix={sesprefix} store={store} interactive={interactive}" ) # Get the heuristics for filling the new bidsmap bidsmap_old, _ = bids.load_bidsmap(bidsmapfile, bidscoinfolder) template, _ = bids.load_bidsmap(templatefile, bidscoinfolder) # Create the new bidsmap as a copy / bidsmap skeleton with no modality entries (i.e. bidsmap with empty lists) if bidsmap_old: bidsmap_new = copy.deepcopy(bidsmap_old) else: bidsmap_new = copy.deepcopy(template) for logic in ('DICOM', 'PAR', 'P7', 'Nifti', 'FileSystem'): for modality in bids.bidsmodalities + (bids.unknownmodality, bids.ignoremodality): if bidsmap_new[logic] and modality in bidsmap_new[logic]: bidsmap_new[logic][modality] = None # Start with an empty skeleton if we didn't have an old bidsmap if not bidsmap_old: bidsmap_old = copy.deepcopy(bidsmap_new) # Start the Qt-application gui = interactive if gui: app = QApplication(sys.argv) app.setApplicationName('BIDS editor') mainwin = bidseditor.MainWindow() gui = bidseditor.Ui_MainWindow() gui.interactive = interactive gui.subprefix = subprefix gui.sesprefix = sesprefix if gui.interactive == 2: QMessageBox.information( mainwin, 'BIDS mapping workflow', f"The bidsmapper will now scan {bidsfolder} and whenever " f"it detects a new type of scan it will ask you to identify it.\n\n" f"It is important that you choose the correct BIDS modality " f"(e.g. 'anat', 'dwi' or 'func') and suffix (e.g. 'bold' or 'sbref').\n\n" f"At the end you will be shown an overview of all the " f"different scan types and BIDScoin options (as in the " f"bidseditor) that you can then (re)edit to your needs") # Loop over all subjects and sessions and built up the bidsmap entries dataformat = '' subjects = bids.lsdirs(rawfolder, subprefix + '*') if not subjects: LOGGER.warning(f'No subjects found in: {rawfolder/subprefix}*') gui = None for n, subject in enumerate(subjects, 1): sessions = bids.lsdirs(subject, sesprefix + '*') if not sessions: sessions = [subject] for session in sessions: # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file session, unpacked = bids.unpack(session, subprefix, sesprefix, '*') if unpacked: store = dict(source=unpacked, target=bidscoinfolder / 'provenance') elif store: store = dict(source=rawfolder, target=bidscoinfolder / 'provenance') else: store = dict() # Loop of the different DICOM runs (series) and collect source files sourcefiles = [] dataformat = bids.get_dataformat(session) if not dataformat: LOGGER.info( f"Skipping: {session} (subject {n}/{len(subjects)})") continue LOGGER.info(f"Parsing: {session} (subject {n}/{len(subjects)})") if dataformat == 'DICOM': for sourcedir in bids.lsdirs(session): sourcefile = bids.get_dicomfile(sourcedir) if sourcefile.name: sourcefiles.append(sourcefile) if dataformat == 'PAR': sourcefiles = bids.get_parfiles(session) if dataformat == 'P7': sourcefiles = bids.get_p7file(session) # Update the bidsmap with the info from the source files for sourcefile in sourcefiles: bidsmap_new = build_bidsmap(dataformat, sourcefile, bidsmap_new, bidsmap_old, template, store, gui) # Update / append the nifti mapping if dataformat == 'Nifti': bidsmap_new = build_niftimap(session, bidsmap_new, bidsmap_old) # Update / append the file-system mapping if dataformat == 'FileSystem': bidsmap_new = build_filesystemmap(session, bidsmap_new, bidsmap_old) # Update / append the plugin mapping if bidsmap_old['PlugIns']: bidsmap_new = build_pluginmap(session, bidsmap_new, bidsmap_old) # Clean-up the temporary unpacked data if unpacked: shutil.rmtree(session) if not dataformat: LOGGER.warning('Could not determine the dataformat of the source data') # (Re)launch the bidseditor UI_MainWindow bidsmapfile = bidscoinfolder / 'bidsmap.yaml' if gui: if not dataformat: QMessageBox.information( mainwin, 'BIDS mapping workflow', 'Could not determine the dataformat of the source data.\n' 'You can try running the bidseditor tool yourself') else: QMessageBox.information( mainwin, 'BIDS mapping workflow', f"The bidsmapper has finished scanning {rawfolder}\n\n" f"Please carefully check all the different BIDS output names " f"and BIDScoin options and (re)edit them to your needs.\n\n" f"You can always redo this step later by re-running the " f"bidsmapper or by just running the bidseditor tool") LOGGER.info('Opening the bidseditor') gui.setupUi(mainwin, bidsfolder, bidsmapfile, bidsmap_new, copy.deepcopy(bidsmap_new), template, dataformat, subprefix=subprefix, sesprefix=sesprefix) mainwin.show() app.exec() else: # Save the bidsmap in the bidscoinfolder bids.save_bidsmap(bidsmapfile, bidsmap_new) LOGGER.info('-------------- FINISHED! -------------------') LOGGER.info('') bids.reporterrors()
def scanparticipant(dataformat: str, session: Path, personals: dict, subid: str, sesid: str) -> bool: """ Converts the session source-files into BIDS-valid nifti-files in the corresponding bidsfolder and extracts personals (e.g. Age, Sex) from the source header :param session: The full-path name of the subject/session source file/folder :param personals: The dictionary with the personal information :param subid: The subject-id from the bids-folder :param sesid: The session-id from the bids-folder :return: True if successful """ # Get valid BIDS subject/session identifiers from the (first) DICOM- or PAR/XML source file sourcefile = Path() if dataformat == 'DICOM': sources = bids.lsdirs(session) for source in sources: sourcefile = bids.get_dicomfile(source) if sourcefile.name: break elif dataformat == 'PAR': sources = bids.get_parfiles(session) if sources: sourcefile = sources[0] else: LOGGER.error( f"Unsupported data format: {dataformat}\nPlease report this bug") return False if not sources: LOGGER.info(f"No data found for: {session}") return False # Collect personal data from a source header (PAR/XML does not contain personal info) if dataformat == 'DICOM' and sourcefile.name: personals['participant_id'] = subid if sesid: if 'session_id' not in personals: personals['session_id'] = sesid else: return False # Only from the first session -> BIDS specification age = bids.get_dicomfield( 'PatientAge', sourcefile ) # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY if age.endswith('D'): personals['age'] = str(int(float(age.rstrip('D')) / 365.2524)) elif age.endswith('W'): personals['age'] = str(int(float(age.rstrip('W')) / 52.1775)) elif age.endswith('M'): personals['age'] = str(int(float(age.rstrip('M')) / 12)) elif age.endswith('Y'): personals['age'] = str(int(float(age.rstrip('Y')))) elif age: personals['age'] = age personals['sex'] = bids.get_dicomfield('PatientSex', sourcefile) personals['size'] = bids.get_dicomfield('PatientSize', sourcefile) personals['weight'] = bids.get_dicomfield('PatientWeight', sourcefile) return True