def test_kcni_converted_to_datman_and_back_is_unmodified(): orig_kcni = 'SPN01_CMH_0001_01_SE01_MR' kcni_ident = scanid.parse(orig_kcni) datman = scanid.parse(str(kcni_ident)) assert isinstance(datman, scanid.DatmanIdentifier) new_kcni = scanid.get_kcni_identifier(datman) assert new_kcni.orig_id == orig_kcni
def test_datman_converted_to_kcni_and_back_is_unmodified(): orig_datman = 'SPN01_CMH_0001_01_01' dm_ident = scanid.parse(orig_datman) kcni = scanid.get_kcni_identifier(dm_ident) assert isinstance(kcni, scanid.KCNIIdentifier) new_datman = scanid.parse(str(kcni)) assert str(new_datman) == orig_datman
def test_user_settings_id_type_respected(): # Datman IDs should be rejected if user says to parse only KCNI IDs with pytest.raises(scanid.ParseException): scanid.parse("DTI01_CMH_H001_01_02", settings={'IdType': 'KCNI'}) # KCNI IDs should be rejected if the user says to parse only Datman IDs with pytest.raises(scanid.ParseException): scanid.parse("DTI01_CMH_H001_01_SE02_MR", settings={'IdType': 'DATMAN'})
def test_kcni_get_xnat_experiment_id_not_affected_by_field_translations(): settings = {"STUDY": {"ABC01": "ABCD"}} pha = "ABC01_CMH_LEGPHA_0001_MR" pha_ident = scanid.parse(pha, settings) assert pha_ident.get_xnat_experiment_id() == pha sub = "ABC01_CMH_12345678_01_SE02_MR" sub_ident = scanid.parse(sub, settings) assert sub_ident.get_xnat_experiment_id() == sub
def _parse_checklist(checklist, subject=None): """ Support function for read_checklist(). Gets a list of existing / signed off sessions from a checklist.csv file. The 'checklist' argument is expected to be a handler for an already opened file. Returns: A dictionary of subject IDs (minus session/repeat num) mapped to their QC comments (or an empty string if it's a new entry). Or a single comment string if the 'subject' option was used """ if subject: entries = None else: entries = {} for line in checklist.readlines(): fields = line.split() if not fields: # Ignore blank lines continue try: subid = os.path.splitext(fields[0].replace("qc_", ""))[0] except (IndexError, TypeError): raise MetadataException(f"Found malformed checklist entry: {line}") try: scanid.parse(subid) except scanid.ParseException: logger.error( f"Found malformed subject ID {subid} in checklist. Ignoring." ) continue if entries and subid in entries: logger.info( f"Found duplicate checklist entries for {subid}. Ignoring " "all except the first entry found." ) continue comment = " ".join(fields[1:]).strip() if subject: if subid != subject: continue return comment else: entries[subid] = comment return entries
def test_kcni_subid_field_is_modified_when_settings_given(): settings = { 'Subject': { '^(100001|100002)->H\\1': '^H([0-9]+)->\\1' } } kcni_id = 'PAC01_CMH_100001_01_SE01_MR' ident = scanid.parse(kcni_id, settings=settings) assert ident.subject == 'H100001' kcni_id = 'PAC01_CMH_100004_01_SE03_MR' ident = scanid.parse(kcni_id, settings=settings) assert ident.subject == '100004'
def test_kcni_study_field_is_modified_when_settings_given(): settings = {'STUDY': {'DTI01': 'DTI'}} kcni_id = "DTI01_CMH_H001_01_SE02_MR" ident = scanid.parse(kcni_id, settings=settings) assert ident.study == 'DTI' assert str(ident) == "DTI_CMH_H001_01_02"
def _list_outputs(self): outputs = super(dmSelectFiles, self)._list_outputs() # use the datman config file to get expected counts for each type of file if 'dm_subject_id' in self._infields: try: ident = dm_scanid.parse(self.inputs.dm_subject_id) study = ident.study site = ident.site export_info = self.dm_config.get_export_info_object( site, study=study) except: logger.warning('Invalid datman scanid:{}'.format( self.inputs.dm_subject_id)) return (outputs) for file_type in outputs.keys(): try: expected_count = export_info.get_tag_info(file_type)['Count'] if isinstance(outputs[file_type], str): found_count = 1 else: found_count = len(outputs[file_type]) if found_count > expected_count: msg = ('Found {} {} files, expected {}.'.format( found_count, file_type, expected_count)) raise IOError(msg) except KeyError: logger.debug( 'Count value for tag:{} not found.'.format(file_type)) pass return (outputs)
def find_sessions(search_str): """ Used by the dashboard's search bar and so must work around fuzzy user input. """ search_str = search_str.strip().upper() try: ident = scanid.parse(search_str) except: # Not a proper ID, try fuzzy search for name match query = Session.query.filter( func.upper(Session.name).contains(search_str)) else: if ident.session: query = Session.query.filter( and_( func.upper(Session.name) == ident.get_full_subjectid_with_timepoint(), Session.num == ident.session)) if not query.count(): ident.session = None if not ident.session: query = Session.query.filter((func.upper( Session.name) == ident.get_full_subjectid_with_timepoint())) return query.all()
def __init__(self, subject_id, config): self.is_phantom = True if '_PHA_' in subject_id else False subject_id = self.__check_session(subject_id) try: ident = scanid.parse(subject_id) except datman.scanid.ParseException: message = "{} does not match datman convention".format(subject_id) raise datman.scanid.ParseException(message) try: self.project = config.map_xnat_archive_to_project(subject_id) except Exception as e: message = 'Failed getting project from config: {}'.format(str(e)) raise Exception(message) DatmanNamed.__init__(self, ident) self.nii_path = self.__get_path('nii', config) self.dcm_path = self.__get_path('dcm', config) self.nrrd_path = self.__get_path('nrrd', config) self.mnc_path = self.__get_path('mnc', config) self.qc_path = self.__get_path('qc', config) self.resource_path = self.__get_path('resources', config, session=True) self.niftis = self.__get_series(self.nii_path, ['.nii', '.nii.gz']) self.dicoms = self.__get_series(self.dcm_path, ['.dcm']) self.__nii_dict = self.__make_dict(self.niftis) self.__dcm_dict = self.__make_dict(self.dicoms) self.nii_tags = self.__nii_dict.keys() self.dcm_tags = self.__dcm_dict.keys()
def test_parse_good_scanid(): ident = scanid.parse("DTI_CMH_H001_01_02") eq_(ident.study, "DTI") eq_(ident.site, "CMH") eq_(ident.subject, "H001") eq_(ident.timepoint, "01") eq_(ident.session, "02")
def test_parse_good_kcni_scanid(): ident = scanid.parse("ABC01_CMH_12345678_01_SE02_MR") assert ident.study == 'ABC01' assert ident.site == 'CMH' assert ident.subject == '12345678' assert ident.timepoint == '01' assert ident.session == '02'
def test_parse_good_date_based_datman_pha_scanid(): ident = scanid.parse("OPT01_UTO_PHA_FBN190603") assert ident.study == "OPT01" assert ident.site == "UTO" assert ident.subject == "PHA_FBN190603" assert ident.timepoint == "" assert str(ident) == "OPT01_UTO_PHA_FBN190603"
def __init__(self, subject_id, config): self.is_phantom = True if "_PHA_" in subject_id else False subject_id = self.__check_session(subject_id) try: ident = scanid.parse(subject_id) except datman.scanid.ParseException: message = f"{subject_id} does not match datman convention" raise datman.scanid.ParseException(message) try: self.project = config.map_xnat_archive_to_project(subject_id) except Exception as e: message = f"Failed getting project from config: {str(e)}" raise Exception(message) DatmanNamed.__init__(self, ident) self.nii_path = self.__get_path("nii", config) self.nrrd_path = self.__get_path("nrrd", config) self.mnc_path = self.__get_path("mnc", config) self.qc_path = self.__get_path("qc", config) self.resource_path = self.__get_path("resources", config, session=True) self.niftis = self.__get_series(self.nii_path, [".nii", ".nii.gz"]) self.__nii_dict = self.__make_dict(self.niftis) self.nii_tags = list(self.__nii_dict.keys())
def test_kcni_site_field_is_modified_when_settings_given(): settings = {'SITE': {'UTO': 'UT2'}} kcni_id = 'ABC01_UTO_12345678_01_SE02_MR' ident = scanid.parse(kcni_id, settings=settings) assert ident.site == 'UT2' assert str(ident) == 'ABC01_UT2_12345678_01_02'
def test_parse_good_datman_scanid(): ident = scanid.parse("DTI_CMH_H001_01_02") assert ident.study == "DTI" assert ident.site == "CMH" assert ident.subject == "H001" assert ident.timepoint == "01" assert ident.session == "02"
def test_parse_good_datman_PHA_scanid(): ident = scanid.parse("DTI_CMH_PHA_ADN0001") assert ident.study == "DTI" assert ident.site == "CMH" assert ident.subject == "PHA_ADN0001" assert ident.timepoint == "" assert ident.session == "" assert str(ident) == "DTI_CMH_PHA_ADN0001"
def test_parse_PHA_scanid(): ident = scanid.parse("DTI_CMH_PHA_ADN0001") eq_(ident.study, "DTI") eq_(ident.site, "CMH") eq_(ident.subject, "PHA_ADN0001") eq_(ident.timepoint, "") eq_(ident.session, "") eq_(str(ident), "DTI_CMH_PHA_ADN0001")
def test_parse_PHA_scanid(): ident = scanid.parse("DTI_CMH_PHA_ADN0001") eq_(ident.study, "DTI") eq_(ident.site, "CMH") eq_(ident.subject,"PHA_ADN0001") eq_(ident.timepoint, "") eq_(ident.session, "") eq_(str(ident),"DTI_CMH_PHA_ADN0001")
def get_sub_ident(subject): ''' Convenience function for wrapping try/catch around parsing subject identifier Arguments: subject Datman style subject ID ''' try: ident = scan_ident.parse(subject) except scan_ident.ParseException: try: ident = scan_ident.parse(subject + '_01') except scan_ident.ParseException: logger.error('{s} and {s}_01, is invalid!'.format(s=subject)) raise return ident
def get_bids_name(subject): ''' Helper function to convert datman to BIDS name Arguments: subject Datman style subject ID ''' try: ident = scan_ident.parse(subject) except scan_ident.ParseException: try: ident = scan_ident.parse(subject + '_01') except scan_ident.ParseException: logger.error('{s} and {s}_01, is invalid!'.format(s=subject)) raise return ident.get_bids_name()
def test_get_kcni_identifier_handles_already_kcni(): kcni = "ABC01_UTO_12345678_01_SE02_MR" kcni_ident = scanid.parse(kcni) kcni1 = scanid.get_kcni_identifier(kcni) assert isinstance(kcni1, scanid.KCNIIdentifier) assert kcni1.orig_id == kcni kcni2 = scanid.get_kcni_identifier(kcni_ident) assert isinstance(kcni2, scanid.KCNIIdentifier) assert kcni2.orig_id == kcni
def test_id_field_changes_correct_for_repeat_conversions(): settings = {'STUDY': {'AND01': 'ANDT'}, 'SITE': {'UTO': 'CMH'}} correct_kcni = "AND01_UTO_0001_01_SE01_MR" correct_datman = "ANDT_CMH_0001_01_01" # KCNI to datman and back kcni_ident = scanid.parse(correct_kcni, settings) dm_ident = scanid.parse(str(kcni_ident), settings) assert str(dm_ident) == correct_datman new_kcni = scanid.get_kcni_identifier(dm_ident, settings) assert new_kcni.orig_id == correct_kcni # Datman to KCNI and back dm_ident = scanid.parse(correct_datman, settings) kcni_ident = scanid.get_kcni_identifier(dm_ident, settings) assert kcni_ident.orig_id == correct_kcni new_dm = scanid.parse(str(kcni_ident), settings) assert str(new_dm) == correct_datman
def main(): quit = "n" username = os.environ["XNAT_USER"] password = os.environ["XNAT_PASS"] central = Interface(server="https://xnat.imaging-genetics.camh.ca", user=username, password=password) while (quit != "y"): study = raw_input("Which study do you want to track scans for? ") con = CON.config() try: projects = set(con.get_xnat_projects(study)) except ValueError: print "Study does not exist" return 0 tracking_table = dict() for project in projects: constraints = [('xnat:mrSessionData/PROJECT', '=', project)] table = central.select('xnat:mrSessionData', [ 'xnat:mrSessionData/SUBJECT_LABEL', 'xnat:mrSessionData/DATE', 'xnat:mrSessionData/INSERT_DATE' ]).where(constraints) sort = sorted(table.items(), key=operator.itemgetter(2)) for item in sort: #print(item) site_name = scanid.parse(item[0]).site if scanid.is_phantom(item[0]): site_name += "_PHA" if "FBN" in item[0]: site_name += "_FBN" elif "ADN" in item[0]: site_name += "_ADN" site_dict = tracking_table.setdefault(site_name, dict()) last_update = site_dict.setdefault(uploaddate, datetime.min) current_update = datetime.strptime(item[2], datetimeformat) if last_update < current_update: site_dict[date] = item[1] site_dict[uploaddate] = current_update if last_update == datetime.min: site_dict[uploaddiff] = "No Other Uploads" else: site_dict[uploaddiff] = dttostr(current_update - last_update) #break printdict(tracking_table) quit = raw_input("Quit? y/n ")
def find_scans(search_str): """ Used by the dashboard's search bar and so must work around fuzzy user input. """ search_str = search_str.strip().upper() try: ident, tag, series, _ = scanid.parse_filename(search_str) except: try: ident = scanid.parse(search_str) except: # Doesnt match a file name or a subject ID so fuzzy search # for... # matching scan name query = Scan.query.filter( func.upper(Scan.name).contains(search_str)) if query.count() == 0: # or matching subid query = Scan.query.filter( func.upper(Scan.timepoint).contains(search_str)) if query.count() == 0: # or matching tags query = Scan.query.filter( func.upper(Scan.tag).contains(search_str)) if query.count() == 0: # or matching series description query = Scan.query.filter( func.upper(Scan.description).contains(search_str)) else: if ident.session: query = Scan.query.filter( and_( func.upper(Scan.timepoint) == ident.get_full_subjectid_with_timepoint(), Scan.repeat == int(ident.session))) if not query.count(): ident.session = None if not ident.session: query = Scan.query.filter( (func.upper(Scan.timepoint) == ident.get_full_subjectid_with_timepoint())) else: name = "_".join( [ident.get_full_subjectid_with_timepoint_session(), tag, series]) query = Scan.query.filter(func.upper(Scan.name).contains(name)) return query.all()
def get_freesurfer_folders(freesurfer_dir, qc_subjects): fs_data = {} for subject in qc_subjects: try: ident = sid.parse(subject) except sid.ParseException: logger.error("Subject {} from checklist does not match datman " "convention. Skipping".format(subject)) continue fs_path = os.path.join(freesurfer_dir, subject) if not os.path.exists(fs_path) or not os.listdir(fs_path): continue # Add to list of subjects for the site fs_data.setdefault(ident.site, []).append(fs_path) return fs_data
def validate_subject_id(subject_id, config): """Ensures subject ID correctness based on configuration settings. This checks that a given ID: 1. Matches a supported naming convention 2. Matches a study tag that's defined in the configuration file for the current study 3. Matches a site that is defined for the study tag Args: subject_id (:obj:`str`): A subject ID to check. config (:obj:`datman.config.config`): A datman config instance that has been initialized to the study the subject ID should belong to. Raises: ParseException: When an ID is given that does not match any supported convention or that contains incorrect fields for the current study. Returns: :obj:`datman.scanid.Identifier`: A parsed datman identifier matching subject_id """ try: settings = config.get_key("ID_MAP") except datman.config.UndefinedSetting: settings = None new_subject_id = scanid.parse(subject_id, settings) valid_tags = config.get_study_tags() try: sites = valid_tags[new_subject_id.study] except KeyError: raise ParseException( f"Subject id {new_subject_id} has undefined " f"study code {new_subject_id.study}" ) if new_subject_id.site not in sites: raise ParseException( f"Subject id {new_subject_id} has undefined " f"site {new_subject_id.site} for study " f"{new_subject_id.study}" ) return new_subject_id
def update_checklist(entries, study=None, config=None, path=None): """ Handles QC checklist updates. Will preferentially update the dashboard (ignoring any 'checklist.csv' files) unless the dashboard is not installed or a specific path is given to a file. <entries> should be a dictionary with subject IDs (minus session/repeat) as the keys and qc entries as the value (with an empty string for new/blank QC entries) This will raise a MetadataException if any part of the update fails for any entry. """ if not isinstance(entries, dict): raise MetadataException( "Checklist entries must be in dictionary " "format with subject ID as the key and " "comment as the value (empty string for new, " "unreviewed subjects)" ) if dashboard.dash_found and not path: _update_qc_reviewers(entries) return # No dashboard, or path was given, so update file system. checklist_path = locate_metadata( "checklist.csv", study=study, config=config, path=path ) old_entries = read_checklist(path=checklist_path) # Merge with existing list for subject in entries: try: ident = scanid.parse(subject) except scanid.ParseException: raise MetadataException( f"Attempt to add invalid subject ID {subject} to QC checklist" ) subject = ident.get_full_subjectid_with_timepoint() old_entries[subject] = entries[subject] # Reformat to expected checklist line format lines = [f"qc_{sub}.html {old_entries[sub]}\n" for sub in old_entries] write_metadata(sorted(lines), checklist_path)
def get_nuiter_settings(subject_id): """ Returns the site specific nu_iter settings for the pipeline Note as this is run as a function node I'm not sure how to handle logging >>> get_nuiter_settings('SPN01_CMH_0001_01') 4 >>> get_nuiter_settings('SPN01_MRC_0001_01') 8 """ import datman.config as cfg import datman.scanid as scanid default_value = '-nuiterations 4' config = cfg.config() ident = scanid.parse(subject_id) site = ident.site try: study = config.map_xnat_archive_to_project(ident.study) config.set_study(study) except ValueError: # logger.warning('Study:{} not defined in config'.format(study)) return (default_value) try: settings = config.get_key('freesurfer') nu_iter_settings = settings['nu_iter'] except KeyError: # logger.warning('Freesurfer setting not found') return (default_value) try: if site in nu_iter_settings: iter_count = nu_iter_settings[site] elif 'DEFAULT' in nu_iter_settings: iter_count = nu_iter_settings['DEFAULT'] except TypeError: # incase the nu_iter isn't defined as a dict () iter_count = nu_iter_settings return ('-nuiterations {}'.format(iter_count))
def check_checklist(session_name, study=None): """Reads the checklist identified from the session_name If there is an entry returns the comment, otherwise returns None """ try: ident = scanid.parse(session_name) except scanid.ParseException: logger.warning('Invalid session id:{}'.format(session_name)) return if study: cfg = datman.config.config(study=study) else: cfg = datman.config.config(study=session_name) try: checklist_path = os.path.join(cfg.get_path('meta'), 'checklist.csv') except KeyError: logger.warning('Unable to identify meta path for study:{}'.format( cfg.study_name)) return try: with open(checklist_path, 'r') as f: lines = f.readlines() except IOError: logger.warning('Unable to open checklist file:{} for reading'.format( checklist_path)) return for line in lines: parts = line.split(None, 1) if parts: # fix for empty lines if os.path.splitext(parts[0])[0] == 'qc_{}'.format(session_name): try: return parts[1].strip() except IndexError: return '' return None
def test_parse_garbage(): scanid.parse("lkjlksjdf")
def test_PHA_timepoint(): ident = scanid.parse("DTI_CMH_PHA_ADN0001") eq_(ident.get_full_subjectid_with_timepoint(), 'DTI_CMH_PHA_ADN0001')
def test_subject_id_with_timepoint(): ident = scanid.parse("DTI_CMH_H001_01_02") eq_(ident.get_full_subjectid_with_timepoint(), 'DTI_CMH_H001_01')
def test_get_full_subjectid(): ident = scanid.parse("DTI_CMH_H001_01_02") eq_(ident.get_full_subjectid(), "DTI_CMH_H001")
def test_parse_empty(): scanid.parse("")
def test_parse_None(): scanid.parse(None)