def get_config(study): """ Retrieves the configuration information for this site and checks that the expected paths are all defined. Will raise KeyError if an expected path has not been defined """ logger.info('Loading config') try: config = datman.config.config(study=study) except Exception: logger.error("Cannot find configuration info for study {}" "".format(study)) sys.exit(1) required_paths = ['dcm', 'nii', 'qc', 'std', 'meta'] for path in required_paths: try: config.get_path(path) except KeyError: logger.error('Path {} not found for project: {}'.format( path, study)) sys.exit(1) return config
def get_xnat_config(config, site): try: cred_file = config.get_key('XNAT_source_credentials', site=site) server = config.get_key('XNAT_source', site=site) archive = config.get_key('XNAT_source_archive', site=site) except KeyError: raise KeyError("Missing configuration. Please ensure study or site " "configuration defines all needed values: XNAT_source, " "XNAT_source_credentials, XNAT_source_archive. See help string " "for more details.") destination = config.get_path('zips') # User may provide full path or name of a file in metadata folder if os.path.exists(cred_file): credentials_path = cred_file else: credentials_path = os.path.join(config.get_path('meta'), cred_file) if not os.path.exists(credentials_path): logger.critical("Can't find credentials file at {} or {}. Please " "check that \'XNAT_source_credentials\' is set " "correctly.".format(cred_file, credentials_path)) sys.exit(1) return credentials_path, server, archive, destination
def get_config(study): """ Retrieves the configuration information for this site and checks that the expected paths are all defined. Will raise KeyError if an expected path has not been defined for this study. """ logger.info('Loading config') try: config = datman.config.config(study=study) except: logger.error("Cannot find configuration info for study {}".format(study)) sys.exit(1) required_paths = ['dcm', 'nii', 'qc', 'std', 'meta'] for path in required_paths: try: config.get_path(path) except KeyError: logger.error('Path {} not found for project: {}' .format(path, study)) sys.exit(1) return config
def get_new_subjects(config): qc_dir = config.get_path('qc') nii_dir = config.get_path('nii') subject_nii_dirs = glob.glob(os.path.join(nii_dir, '*')) all_subs = [os.path.basename(path) for path in subject_nii_dirs] if REWRITE: return all_subs # Finished subjects are those that have an html file in their qc output dir html_pages = glob.glob(os.path.join(qc_dir, '*/*.html')) subject_qc_dirs = [os.path.dirname(qc_path) for qc_path in html_pages] finished_subs = [os.path.basename(path) for path in subject_qc_dirs] # Finished phantoms are those that have a non-empty folder # (so if qc outputs are missing, delete the folder to get it to re-run!) qced_phantoms_paths = [subj for subj in glob.glob(os.path.join(qc_dir, '*_PHA_*')) if len(os.listdir(subj)) > 0] finished_phantoms = [os.path.basename(path) for path in qced_phantoms_paths] new_subs = filter(lambda sub: sub not in finished_subs, all_subs) # also filter out the finished phantoms new_subs = filter(lambda sub: sub not in finished_phantoms, new_subs) return new_subs
def main(): arguments = docopt(__doc__) url = arguments['--URL'] study = arguments['<study>'] output_path = arguments['--output'] config = datman.config.config(study=study) meta_path = config.get_path('meta') token_file = 'ahrc_token' token_path = os.path.join(meta_path, token_file) output_path = os.path.join(config.get_path('data'), output_path) token = get_token(token_path) payload = get_payload(token) REDCap_variables = [ 'record_id', 'redcap_event_name', 'demo_sex_birth', 'demo_age_study_entry', 'demo_highest_grade_self', 'term_premature_yn' ] data = make_rest(url, payload, REDCap_variables) column_headers = [ 'record_id', 'group', 'sex', 'age', 'education', 'terminated' ] make_csv(output_path, data, column_headers)
def main(): arguments = docopt(__doc__) url = arguments['--URL'] study = arguments['<study>'] output_path = arguments['--output'] config = datman.config.config(study=study) meta_path = config.get_path('meta') token_file = 'ahrc_token' token_path = os.path.join(meta_path, token_file) output_path = os.path.join(config.get_path('data'), output_path) token = get_token(token_path) payload = get_payload(token) REDCap_variables = ['record_id', 'redcap_event_name', 'demo_sex_birth', 'demo_age_study_entry', 'demo_highest_grade_self', 'term_premature_yn'] data = make_rest(url, payload, REDCap_variables) column_headers = ['record_id', 'group', 'sex', 'age', 'education', 'terminated'] make_csv(output_path, data, column_headers)
def get_xnat_config(config, site): try: cred_file = config.get_key('XNAT_source_credentials', site=site) server = config.get_key('XNAT_source', site=site) archive = config.get_key('XNAT_source_archive', site=site) except datman.config.UndefinedSetting: raise KeyError("Missing configuration. Please ensure study or site " "configuration defines all needed values: XNAT_source, " "XNAT_source_credentials, XNAT_source_archive. See " "help string for more details.") destination = config.get_path('zips') # User may provide full path or name of a file in metadata folder if os.path.exists(cred_file): credentials_path = cred_file else: credentials_path = os.path.join(config.get_path('meta'), cred_file) if not os.path.exists(credentials_path): logger.critical("Can't find credentials file at {} or {}. Please " "check that 'XNAT_source_credentials' is set " "correctly.".format(cred_file, credentials_path)) sys.exit(1) return credentials_path, server, archive, destination
def get_new_subjects(config): qc_dir = config.get_path('qc') nii_dir = config.get_path('nii') # Finished subjects are those that have an html file in their qc output dir html_pages = glob.glob(os.path.join(qc_dir, '*/*.html')) subject_qc_dirs = [os.path.dirname(qc_path) for qc_path in html_pages] finished_subs = [os.path.basename(path) for path in subject_qc_dirs] # Finished phantoms are those that have a non-empty folder # (so if qc outputs are missing, delete the folder to get it to re-run!) qced_phantoms_paths = [ subj for subj in glob.glob(os.path.join(qc_dir, '*_PHA_*')) if len(os.listdir(subj)) > 0 ] finished_phantoms = [ os.path.basename(path) for path in qced_phantoms_paths ] subject_nii_dirs = glob.glob(os.path.join(nii_dir, '*')) all_subs = [os.path.basename(path) for path in subject_nii_dirs] new_subs = filter(lambda sub: sub not in finished_subs, all_subs) # also filter out the finished phantoms new_subs = filter(lambda sub: sub not in finished_phantoms, new_subs) return new_subs
def main(): arguments = docopt(__doc__) study = arguments["<study>"] experiment = arguments["--experiment"] length_file = arguments["--lengths"] timing_file = arguments["--timings"] task_regex = arguments["--regex"] debug = arguments["--debug"] if debug: logger.setLevel(logging.DEBUG) if not length_file: length_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), "../assets/EA-vid-lengths.csv", ) if not timing_file: timing_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), "../assets/EA-timing.csv", ) config = datman.config.config(study=study) task_path = config.get_path("task") nii_path = config.get_path("nii") if not experiment: experiments = os.listdir(task_path) else: experiments = [experiment] logger.info(f"Running EA parsing for {experiment}") for experiment in experiments: logger.info(f"Parsing {experiment}...") try: ident = datman.scanid.parse(experiment) except datman.scanid.ParseException: logger.error( f"Skipping task folder with malformed ID {experiment}") continue exp_task_dir = os.path.join(task_path, experiment) sub_nii = os.path.join(nii_path, ident.get_full_subjectid_with_timepoint()) if not os.path.isdir(exp_task_dir): logger.warning( f"{experiment} has no task directory {exp_task_dir}, skipping") continue for task_file in glob.glob(os.path.join(exp_task_dir, task_regex)): parse_task(ident, task_file, sub_nii, length_file, timing_file)
def delete(config, key, folder=None, files=None): if files and not isinstance(files, list): files = [files] try: path = config.get_path(key) except Exception: return if folder: path = os.path.join(path, folder) if not os.path.exists(path): return if files is None: shutil.rmtree(path) return for item in files: matches = glob.glob(os.path.join(path, item + "*")) for match in matches: os.remove(match) if not os.listdir(path): os.rmdir(path)
def run_all_subjects(config, arguments): t1_tag = arguments['--t1-tag'] t2_tag = arguments['--t2-tag'] blacklist_file = arguments['--blacklist'] walltime = arguments['--walltime'] subjects = utils.get_subject_metadata(config) if blacklist_file: subjects = add_pipeline_blacklist(subjects, blacklist_file) hcp_fs_path = config.get_path('hcp_fs') logs = make_log_dir(hcp_fs_path) update_aggregate_log(hcp_fs_path, subjects) # Update FS log ? commands = [] for subject in subjects: if is_completed(subject, hcp_fs_path): continue if is_started(subject, hcp_fs_path): logger.debug("{} has partial outputs and may still be running. " "Skipping".format(subject)) continue scan = datman.scan.Scan(subject, config) blacklisted_files = subjects[subject] try: t1 = get_anatomical_file(scan, t1_tag, blacklisted_files) t2 = get_anatomical_file(scan, t2_tag, blacklisted_files) except ValueError as e: logger.error("Skipping subject. Reason: {}".format(e.message)) continue cmd = create_command(config.study_name, subject, t1, t2, arguments) submit_job(cmd, subject, logs, walltime=walltime)
def main(): args = docopt(__doc__) study = args["<study>"] config = datman.config.config(study=study) resources = get_resources_dirs(config) out_dir = config.get_path("task") regex = get_regex(config) try: os.mkdir(out_dir) except OSError: pass for resource_folder in resources: task_files = get_task_files(regex, resource_folder) if not task_files: continue session = os.path.basename(resource_folder) dest_folder = os.path.join(out_dir, session) try: os.mkdir(dest_folder) except OSError: pass renamed_files = resolve_duplicate_names(task_files) for fname in renamed_files: dest_path = os.path.join(dest_folder, fname) link_task_file(renamed_files[fname], dest_path) add_to_dashboard(session, dest_path)
def run_all_subjects(config, arguments): t1_tag = arguments['--t1-tag'] t2_tag = arguments['--t2-tag'] blacklist_file = arguments['--blacklist'] walltime = arguments['--walltime'] subjects = config.get_subject_metadata() if blacklist_file: subjects = add_pipeline_blacklist(subjects, blacklist_file) hcp_fs_path = config.get_path('hcp_fs') logs = make_log_dir(hcp_fs_path) update_aggregate_log(hcp_fs_path, subjects) # Update FS log ? commands = [] for subject in subjects: if is_completed(subject, hcp_fs_path): continue if is_started(subject, hcp_fs_path): logger.debug("{} has partial outputs and may still be running. " "Skipping".format(subject)) continue scan = datman.scan.Scan(subject, config) blacklisted_files = subjects[subject] try: t1 = get_anatomical_file(scan, t1_tag, blacklisted_files) t2 = get_anatomical_file(scan, t2_tag, blacklisted_files) except ValueError as e: logger.error("Skipping subject. Reason: {}".format(e.message)) continue cmd = create_command(config.study_name, subject, t1, t2, arguments) submit_job(cmd, subject, logs, walltime=walltime)
def qc_all_scans(config): """ Creates a dm-qc-report.py command for each scan and submits all jobs to the queue. Phantom jobs are submitted in chained mode, which means they will run one at a time. This is currently needed because some of the phantom pipelines use expensive and limited software liscenses (i.e., MATLAB). """ human_commands = [] phantom_commands = [] nii_dir = config.get_path('nii') for path in os.listdir(nii_dir): subject = os.path.basename(path) command = make_qc_command(subject, config.study_name) if '_PHA_' in subject: phantom_commands.append(command) else: human_commands.append(command) if human_commands: logger.debug('submitting human qc jobs\n{}'.format(human_commands)) submit_qc_jobs(human_commands) if phantom_commands: logger.debug('running phantom qc jobs\n{}'.format(phantom_commands)) submit_qc_jobs(phantom_commands, chained=True)
def get_redcap_token(config, redcap_cred): if not redcap_cred: redcap_cred = os.path.join(config.get_path('meta'), 'redcap-token') try: token = datman.utils.read_credentials(redcap_cred)[0] except IndexError: logger.error("REDCap credential file {} is empty.".format(redcap_cred)) sys.exit(1) return token
def set_output_name(output_loc, config): if not output_loc: output_loc = config.get_path('meta') output_name = os.path.join(output_loc, '{}-{}-overview.csv'.format(config.study_name, datetime.date.today())) logger.info("Output location set to: {}".format(output_name)) return output_name
def set_output_name(output_loc, config): if not output_loc: output_loc = config.get_path('meta') output_name = os.path.join( output_loc, '{}-{}-overview.csv'.format(config.study_name, datetime.date.today())) logger.info("Output location set to: {}".format(output_name)) return output_name
def get_nifti_dir(config): try: nii_dir = config.get_path('nii') except KeyError: logger.error("'nii' path is not defined for study {}" "".format(config.study_name)) sys.exit(1) if not os.path.exists(nii_dir): logger.error("{} does not exist. No data to process.".format(nii_dir)) sys.exit(1) return nii_dir
def main(): args = docopt(__doc__) study = args['<study>'] config = datman.config.config(study=study) subjects = datman.dashboard.get_study_subjects(study) resources_dir = config.get_path('resources') out_dir = config.get_path('task') regex = get_regex(config) try: os.mkdir(out_dir) except OSError: pass for subject in subjects: sessions = glob.glob(os.path.join(resources_dir, subject + '_*')) if not sessions: continue for resource_folder in sessions: task_files = get_task_files(regex, resource_folder) if not task_files: continue session = os.path.basename(resource_folder) dest_folder = os.path.join(out_dir, session) try: os.mkdir(dest_folder) except OSError: pass renamed_files = resolve_duplicate_names(task_files) for fname in renamed_files: dest_path = os.path.join(dest_folder, fname) link_task_file(renamed_files[fname], dest_path) add_to_dashboard(session, dest_path)
def run_pipeline(config, subject, t1, t2): if not input_exists(t1) or not input_exists(t2): sys.exit(1) base_dir = utils.define_folder(config.get_path('hcp_fs')) dest_dir = utils.define_folder(os.path.join(base_dir, subject)) with utils.cd(dest_dir): hcp_pipeline = "hcp-freesurfer.sh {} {} {} {}".format(base_dir, subject, t1, t2) rtn, out = utils.run(hcp_pipeline, dryrun=DRYRUN) if rtn: logger.error("hcp-freesurfer.sh exited with non-zero status code. " "Output: {}".format(out))
def run_pipeline(config, subject, t1, t2): if not input_exists(t1) or not input_exists(t2): sys.exit(1) base_dir = utils.define_folder(config.get_path('hcp_fs')) dest_dir = utils.define_folder(os.path.join(base_dir, subject)) with utils.cd(dest_dir): hcp_pipeline = "hcp-freesurfer.sh {} {} {} {}".format( base_dir, subject, t1, t2) rtn, out = utils.run(hcp_pipeline, dryrun=DRYRUN) if rtn: logger.error("hcp-freesurfer.sh exited with non-zero status code. " "Output: {}".format(out))
def qc_subject(subject, config): """ subject : The created Scan object for the subject_id this run config : The settings obtained from project_settings.yml Returns the path to the qc_<subject_id>.html file """ report_name = os.path.join(subject.qc_path, 'qc_{}.html'.format(subject.full_id)) if os.path.isfile(report_name): if not REWRITE: logger.debug("{} exists, skipping.".format(report_name)) return os.remove(report_name) # header diff header_diffs = os.path.join(subject.qc_path, 'header-diff.log') if not os.path.isfile(header_diffs): run_header_qc(subject, config.get_path('std'), header_diffs) expected_files = find_expected_files(subject, config) try: # Update checklist even if report generation fails checklist_path = os.path.join(config.get_path('meta'), 'checklist.csv') add_report_to_checklist(report_name, checklist_path) except: logger.error("Error adding {} to checklist.".format(subject.full_id)) try: generate_qc_report(report_name, subject, expected_files, header_diffs, config) except: logger.error("Exception raised during qc-report generation for {}. " \ "Removing .html page.".format(subject.full_id), exc_info=True) if os.path.exists(report_name): os.remove(report_name) return report_name
def collect_blacklisted_items(blacklist, config, ignored_paths): search_paths = get_search_paths(config, ignored_paths) file_list = [] for path in search_paths: full_path = config.get_path(path) if not os.path.exists(full_path): continue for item in blacklist: found_files = find_files(full_path, item) if found_files: file_list.extend(found_files) return file_list
def get_xnat_credentials(config, xnat_cred): if not xnat_cred: xnat_cred = os.path.join(config.get_path('meta'), 'xnat-credentials') logger.debug("Retrieving xnat credentials from {}".format(xnat_cred)) try: credentials = read_credentials(xnat_cred) user_name = credentials[0] password = credentials[1] except IndexError: logger.error("XNAT credential file {} is missing the user name or " \ "password.".format(xnat_cred)) sys.exit(1) return user_name, password
def get_manifests(timepoint): """Collects and organizes all QC manifest files for a timepoint. Args: timepoint (:obj:`dashboard.models.Timepoint`): A timepoint from the database. Returns: A dictionary mapping session numbers to a dictionary of input nifti files and their manifest contents. For example: { 1: {nifti_1: nifti_1_manifest, nifti_2: nifti_2_manifest}, 2: {nifti_3: nifti_3_manifest} } """ study = timepoint.get_study().id config = datman.config.config(study=study) try: qc_dir = config.get_path("qc") except UndefinedSetting: logger.error("No QC path defined for study {}".format(study)) return {} qc_path = os.path.join(qc_dir, str(timepoint)) found = {} for num in timepoint.sessions: session = timepoint.sessions[num] found[num] = {} manifests = glob.glob( os.path.join(qc_path, f"{session}_*_manifest.json")) for manifest in manifests: contents = read_json(manifest) _, _, _, description = datman.scanid.parse_filename(manifest) scan_name = os.path.basename(manifest).replace( f"{description}.json", "").strip("_") # Needed to ensure ordering respected ordered_contents = OrderedDict( sorted(contents.items(), key=lambda x: x[1].get("order", 999))) found[num][scan_name] = ordered_contents return found
def get_xnat_credentials(config, xnat_cred): if not xnat_cred: xnat_cred = os.path.join(config.get_path("meta"), "xnat-credentials") logger.debug(f"Retrieving xnat credentials from {xnat_cred}") try: credentials = read_credentials(xnat_cred) user_name = credentials[0] password = credentials[1] except IndexError: logger.error( f"XNAT credential file {xnat_cred} is missing the user name " "or password.") sys.exit(1) return user_name, password
def read_token(config): """Read the REDCap token from a file defined by the Datman config. Args: config (:obj:`datman.config.config`): A datman config object for a specific study. """ metadata = config.get_path("meta") token_file = config.get_key("RedcapToken") token_path = os.path.join(metadata, token_file) try: with open(token_path, "r") as fh: return fh.readline().strip() except Exception as e: logger.error( f"Failed to read RedCap token at {token_path}. Reason - {e}")
def locate_metadata(filename, study=None, subject=None, config=None, path=None): if not (path or study or config or subject): raise MetadataException( f"Can't locate metadata file {filename} without either " "1) a full path to the file 2) a study or " "subject ID or 3) a datman.config " "object" ) if path: file_path = path else: if not config: given_study = subject or study config = datman.config.config(study=given_study) file_path = os.path.join(config.get_path("meta"), filename) return file_path
def qc_subject(subject, config): """ subject : The created Scan object for the subject_id this run config : The settings obtained from project_settings.yml Returns the path to the qc_<subject_id>.html file """ report_name = os.path.join(subject.qc_path, 'qc_{}.html'.format(subject.full_id)) # header diff header_diffs = os.path.join(subject.qc_path, 'header-diff.log') if os.path.isfile(report_name): if not REWRITE: logger.debug("{} exists, skipping.".format(report_name)) return os.remove(report_name) # This probably exists if you're rewriting, and needs to be removed to regenerate try: os.remove(header_diffs) except: pass if not os.path.isfile(header_diffs): run_header_qc(subject, config.get_path('std'), header_diffs, config) expected_files = find_expected_files(subject, config) new_entry = {str(subject): ''} try: # Update checklist even if report generation fails datman.utils.update_checklist(new_entry, config=config) except: logger.error("Error adding {} to checklist.".format(subject.full_id)) try: generate_qc_report(report_name, subject, expected_files, header_diffs, config) except: logger.error("Exception raised during qc-report generation for {}. " \ "Removing .html page.".format(subject.full_id), exc_info=True) if os.path.exists(report_name): os.remove(report_name) return report_name
def update_site(study, site_id, config, skip_delete=False, delete_all=False): """Update the settings in the database for a study's scan site. Args: study (:obj:`dashboard.models.Study`): A study from the database. site_id (:obj:`str`): The name of a site that should be associated with this study or a site from the study that should have its settings updated. config (:obj:`datman.config.config`): A datman config instance for the study. skip_delete (bool, optional): Don't prompt the user and skip deletion of any site records no longer in the config files. delete_all (bool, optional): Don't prompt the user and delete any site records no longer in the config files. """ settings = collect_settings(config, { "code": "StudyTag", "redcap": "UsesRedcap", "notes": "UsesTechNotes", "xnat_archive": "XnatArchive", "xnat_convention": "XnatConvention" }, site=site_id) try: xnat_fname = config.get_key("XnatCredentials", site=site_id) settings["xnat_credentials"] = os.path.join(config.get_path("meta"), xnat_fname) except UndefinedSetting: pass try: settings["xnat_url"] = get_server(config) except UndefinedSetting: pass try: study.update_site(site_id, create=True, **settings) except Exception as e: logger.error(f"Failed updating settings for study {study} and site " f"{site_id}. Reason - {e}") update_expected_scans(study, site_id, config, skip_delete, delete_all)
def get_resources_dirs(config): resources_dir = config.get_path("resources") if dashboard.dash_found: subjects = datman.dashboard.get_study_subjects(config.study_name) sessions = [] for subject in subjects: found_sessions = glob.glob( os.path.join(resources_dir, subject + "_*") ) if found_sessions: sessions.extend(found_sessions) else: # Grabbing everything in resources comes with a small risk of # non-session folders being explored. sessions = [ item for item in glob.glob(os.path.join(resources_dir, "*")) if os.path.isdir(item) and "_PHA_" not in os.path.basename(item) ] return sessions
def get_blacklist(blacklist_file, series, config): if series: return [series] if blacklist_file is None: blacklist_file = os.path.join(config.get_path('meta'), 'blacklist.csv') if not os.path.exists(blacklist_file): logger.error("Blacklist {} does not exist. " \ "Exiting.".format(blacklist_file)) sys.exit(1) logger.debug("Reading blacklist file {}".format(blacklist_file)) blacklist = [] with open(blacklist_file, 'r') as blacklist_data: for line in blacklist_data: series = get_series(line) if not series: continue blacklist.append(series) return blacklist
def delete_bids(config, subject, session, scan=None): try: bids = config.get_path('bids') except Exception: return subject_folder = os.path.join(bids, 'sub-{}'.format(subject)) session_folder = os.path.join(subject_folder, 'ses-{}'.format(session)) if not scan: if os.path.exists(session_folder): shutil.rmtree(session_folder) if not os.listdir(subject_folder): os.rmdir(subject_folder) return if not scan.bids_name: return bids_file = datman.scanid.parse_bids_filename(scan.bids_name) sub_dirs = [] sub_dirs.append(subject_folder) sub_dirs.append(session_folder) for path, dirs, files in os.walk(session_folder): for sub_dir in dirs: sub_dirs.append(os.path.join(path, sub_dir)) for item in files: if bids_file == item: full_path = os.path.join(path, item) os.remove(full_path) # Clean up any folders that may now be empty for sub_dir in reversed(sub_dirs): try: os.rmdir(sub_dir) except OSError: pass
def get_all_subjects(config): nii_dir = config.get_path('nii') subject_nii_dirs = glob.glob(os.path.join(nii_dir, '*')) all_subs = [os.path.basename(path) for path in subject_nii_dirs] return all_subs
def run_header_qc(subject, config): """ For each nifti, finds its json file + compares it to the matching gold standard. Differences are returned in a dictionary with one entry per scan """ try: ignored_headers = config.get_key('IgnoreHeaderFields', site=subject.site) except datman.config.UndefinedSetting: ignored_headers = [] try: header_tolerances = config.get_key('HeaderFieldTolerance', site=subject.site) except datman.config.UndefinedSetting: header_tolerances = {} tag_settings = config.get_tags(site=subject.site) header_diffs = {} if datman.dashboard.dash_found: db_timepoint = datman.dashboard.get_subject(subject._ident) if not db_timepoint: logger.error("Can't find {} in dashboard database".format(subject)) return for sess_num in db_timepoint.sessions: db_session = db_timepoint.sessions[sess_num] for series in db_session.scans: if not series.active_gold_standard: header_diffs[series.name] = { 'error': 'Gold standard not ' 'found' } continue if not series.json_contents: logger.debug("No JSON found for {}".format(series)) header_diffs[series.name] = {'error': 'JSON not found'} continue check_bvals = needs_bval_check(tag_settings, series) db_diffs = series.update_header_diffs( ignore=ignored_headers, tolerance=header_tolerances, bvals=check_bvals) header_diffs[series.name] = db_diffs.diffs return header_diffs standard_dir = config.get_path('std') standards_dict = get_standards(standard_dir, subject.site) for series in subject.niftis: scan_name = get_scan_name(series) try: standard_json = standards_dict[series.tag] except KeyError: logger.debug('No standard with tag {} found in {}'.format( series.tag, standard_dir)) header_diffs[scan_name] = {'error': 'Gold standard not found'} continue try: series_json = find_json(series) except IOError: logger.debug('No JSON found for {}'.format(series)) header_diffs[scan_name] = {'error': 'JSON not found'} continue check_bvals = needs_bval_check(tag_settings, series) diffs = header_checks.construct_diffs(series_json, standard_json, ignored_fields=ignored_headers, tolerances=header_tolerances, dti=check_bvals) header_diffs[scan_name] = diffs return header_diffs
def main(): #Parse arguments arguments = docopt(__doc__) study = arguments['<study>'] out = arguments['<out>'] bids_json = arguments['<json>'] subjects = arguments['--subject'] exclude = arguments['--exclude'] quiet = arguments['--quiet'] verbose = arguments['--verbose'] debug = arguments['--debug'] rewrite = arguments['--rewrite'] tmp_dir = arguments['--tmp-dir'] or '/tmp/' bids_dir = arguments['--bids-dir'] or tmp_dir log_dir = arguments['--log'] DRYRUN = arguments['--DRYRUN'] walltime = arguments['--walltime'] #Strategy pattern dictionary for running different applications strat_dict = { 'FMRIPREP': fmriprep_fork, 'MRIQC': mriqc_fork, 'FMRIPREP_CIFTIFY': ciftify_fork } #Thread dictionary for specifying thread arguments unique to application thread_dict = { 'FMRIPREP': '--nthreads', 'MRIQC': '--n_procs', 'FMRIPREP_CIFTIFY': '--n_cpus' } #Configuration config = get_datman_config(study) configure_logger(quiet, verbose, debug) try: queue = config.get_key('QUEUE') except Exception as e: logger.error("Couldnt retrieve queue type from config. Reason: " "{}".format(e)) raise e #JSON parsing, formatting, and validating jargs = get_json_args(bids_json) jargs = validate_json_args(jargs, strat_dict) try: jargs.update({'keeprecon': config.get_key('KeepRecon')}) except datman.config.UndefinedSetting: jargs.update({'keeprecon': True}) n_thread = get_requested_threads(jargs, thread_dict) #Handle partition argument if using slurm partition = None try: partition = jargs['partition'] except KeyError: pass #Get redirect command string and exclusion list log_dir = log_dir or os.path.join(out, 'bids_logs') log_dir = os.path.join(log_dir, jargs['app'].lower()) log_cmd = partial(gen_log_redirect, log_dir=log_dir) exclude_cmd_list = [''] if not exclude else get_exclusion_cmd(exclude) #Get subjects and filter if not rewrite and group if longitudinal #Need better way to manage... subjects = subjects or [ s for s in os.listdir(config.get_path('nii')) if 'PHA' not in s ] subjects = subjects if rewrite else filter_subjects( subjects, out, jargs['app'], log_dir) logger.info('Running {}'.format(subjects)) subjects = group_subjects(subjects) #Process subject groups for s in subjects.keys(): #Get subject directory and log tag log_tag = log_cmd(subject=s, app_name=jargs['app']) #Get commands init_cmd_list = get_init_cmd(study, s, bids_dir, tmp_dir, out, jargs['img'], log_tag) n2b_cmd = get_nii_to_bids_cmd(study, subjects[s], log_tag) bids_cmd_list = strat_dict[jargs['app']](jargs, log_tag, out, s) #Write commands to executable and submit master_cmd = init_cmd_list + [ n2b_cmd ] + exclude_cmd_list + bids_cmd_list + ['\n cleanup \n'] fd, job_file = tempfile.mkstemp(suffix='datman_BIDS_job', dir=tmp_dir) os.close(fd) write_executable(job_file, master_cmd) if not DRYRUN: submit_jobfile(job_file, s, queue, walltime, n_thread, partition)
def main(): global dryrun arguments = docopt(__doc__) study = arguments['<study>'] config = arguments['--config'] system = arguments['--system'] QC_file = arguments['--QC-transfer'] FA_tag = arguments['--FA-tag'] subject_filter = arguments['--subject-filter'] FA_filter = arguments['--FA-filter'] CALC_MD = arguments['--calc-MD'] CALC_ALL = arguments['--calc-all'] walltime = arguments['--walltime'] walltime_post = arguments['--walltime-post'] POST_ONLY = arguments['--post-only'] NO_POST = arguments['--no-post'] quiet = arguments['--quiet'] verbose = arguments['--verbose'] debug = arguments['--debug'] DRYRUN = arguments['--dry-run'] if quiet: logger.setLevel(logging.ERROR) if verbose: logger.setLevel(logging.INFO) if debug: logger.setLevel(logging.DEBUG) config = datman.config.config(filename=config, system=system, study=study) ## make the output directory if it doesn't exist input_dir = config.get_path('dtifit') output_dir = config.get_path('enigmaDTI') log_dir = os.path.join(output_dir,'logs') run_dir = os.path.join(output_dir,'bin') dm.utils.makedirs(log_dir) dm.utils.makedirs(run_dir) logger.debug(arguments) if FA_tag == None: FA_tag = '_FA.nii.gz' subjects = dm.proc.get_subject_list(input_dir, subject_filter, QC_file) # check if we have any work to do, exit if not if len(subjects) == 0: logger.info('No outstanding scans to process.') sys.exit(1) # grab the prefix from the subid if not given prefix = config.get_key('STUDY_TAG') ## write and check the run scripts script_names = ['run_engimadti.sh','concatresults.sh'] write_run_scripts(script_names, run_dir, output_dir, CALC_MD, CALC_ALL, debug) checklist_file = os.path.normpath(output_dir + '/ENIGMA-DTI-checklist.csv') checklist_cols = ['id', 'FA_nii', 'date_ran','qc_rator', 'qc_rating', 'notes'] checklist = dm.proc.load_checklist(checklist_file, checklist_cols) checklist = dm.proc.add_new_subjects_to_checklist(subjects, checklist, checklist_cols) # Update checklist with new FA files to process listed under FA_nii column checklist = dm.proc.find_images(checklist, 'FA_nii', input_dir, FA_tag, subject_filter = subject_filter, image_filter = FA_filter) job_name_prefix="edti{}_{}".format(prefix,datetime.datetime.today().strftime("%Y%m%d-%H%M%S")) submit_edti = False ## Change dir so it can be submitted without the full path os.chdir(run_dir) if not POST_ONLY: with make_temp_directory() as temp_dir: cmds_file = os.path.join(temp_dir,'commands.txt') with open(cmds_file, 'w') as cmdlist: for i in range(0,len(checklist)): subid = checklist['id'][i] # make sure that second filter is being applied to the qsub bit if subject_filter and subject_filter not in subid: continue ## make sure that a T1 has been selected for this subject if pd.isnull(checklist['FA_nii'][i]): continue ## format contents of T1 column into recon-all command input smap = checklist['FA_nii'][i] if subject_previously_completed(output_dir, subid, smap): continue # If POSTFS_ONLY == False, the run script will be the first or # only name in the list cmdlist.write("bash -l {rundir}/{script} {output} {inputFA}\n".format( rundir = run_dir, script = script_names[0], output = os.path.join(output_dir,subid), inputFA = os.path.join(input_dir, subid, smap))) ## add today's date to the checklist checklist['date_ran'][i] = datetime.date.today() submit_edti = True if submit_edti: qbatch_run_cmd = dm.proc.make_file_qbatch_command(cmds_file, job_name_prefix, log_dir, walltime) os.chdir(run_dir) dm.utils.run(qbatch_run_cmd, DRYRUN) ## if any subjects have been submitted, ## submit a final job that will consolidate the results after they are finished os.chdir(run_dir) post_edit_cmd = 'echo bash -l {rundir}/{script}'.format( rundir = run_dir, script = script_names[1]) if submit_edti: qbatch_post_cmd = dm.proc.make_piped_qbatch_command(post_edit_cmd, '{}_post'.format(job_name_prefix), log_dir, walltime_post, afterok = job_name_prefix) dm.utils.run(qbatch_post_cmd, DRYRUN) if not DRYRUN: ## write the checklist out to a file checklist.to_csv(checklist_file, sep=',', index = False)
def main(): #Parse arguments arguments = docopt(__doc__) study = arguments['<study>'] out = arguments['<out>'] bids_json = arguments['<json>'] subjects = arguments['--subject'] exclude = arguments['--exclude'] quiet = arguments['--quiet'] verbose = arguments['--verbose'] debug = arguments['--debug'] rewrite = arguments['--rewrite'] tmp_dir = arguments['--tmp-dir'] or '/tmp/' bids_dir = arguments['--bids-dir'] or tmp_dir log_dir = arguments['--log'] DRYRUN = arguments['--DRYRUN'] walltime = arguments['--walltime'] #Strategy pattern dictionary strat_dict = { 'FMRIPREP' : fmriprep_fork, 'MRIQC' : mriqc_fork, 'FMRIPREP_CIFTIFY' : ciftify_fork } thread_dict = { 'FMRIPREP' : '--nthreads', 'MRIQC' : '--n_procs', 'FMRIPREP_CIFTIFY' : '--n_cpus' } #Configuration config = get_datman_config(study) configure_logger(quiet,verbose,debug) try: queue = config.site_config['SystemSettings'][os.environ['DM_SYSTEM']]['QUEUE'] except KeyError as e: logger.error('Config exception, key not found: {}'.format(e)) sys.exit(1) #JSON parsing, formatting, and validating jargs = get_json_args(bids_json) jargs = validate_json_args(jargs,strat_dict) try: jargs.update({'keeprecon' : config.get_key('KeepRecon')}) except KeyError: jargs.update({'keeprecon':True}) n_thread = get_requested_threads(jargs,thread_dict) #Get redirect command string and exclusion list log_cmd = partial(gen_log_redirect,log_dir=log_dir,out_dir=out) exclude_cmd_list = [''] if exclude else get_exclusion_cmd(exclude) #Get subjects and filter if not rewrite and group if longitudinal subjects = subjects or [s for s in os.listdir(config.get_path('nii')) if 'PHA' not in s] subjects = subjects if rewrite else filter_subjects(subjects, out, jargs['app']) logger.info('Running {}'.format(subjects)) subjects = group_subjects(subjects) #Process subject groups for s in subjects.keys(): #Get subject directory and log tag log_tag = log_cmd(subject=s,app_name=jargs['app']) #Get commands init_cmd_list = get_init_cmd(study,s,bids_dir,tmp_dir,out,jargs['img'],log_tag) n2b_cmd = get_nii_to_bids_cmd(study,subjects[s],log_tag) bids_cmd_list = strat_dict[jargs['app']](jargs,log_tag,out,s) #Write commands to executable and submit master_cmd = init_cmd_list + [n2b_cmd] + exclude_cmd_list + bids_cmd_list + ['\n cleanup \n'] fd, job_file = tempfile.mkstemp(suffix='datman_BIDS_job',dir=tmp_dir) os.close(fd) write_executable(job_file,master_cmd) if not DRYRUN: submit_jobfile(job_file,s,queue,walltime,n_thread)
def main(): arguments = docopt(__doc__) out_dir = arguments['--output-dir'] sessions = arguments['<session>'] study = arguments['<study>'] verbose = arguments["--verbose"] debug = arguments["--debug"] quiet = arguments["--quiet"] dryrun = arguments["--dry-run"] if verbose: logger.setLevel(logging.INFO) if debug: logger.setLevel(logging.DEBUG) if quiet: logger.setLevel(logging.ERROR) if dryrun: logger.info("Dry run - will not write any output") config = datman.config.config(study=study) task_path = config.get_path("task") nii_path = config.get_path("nii") if not sessions: sessions = os.listdir(task_path) elif not isinstance(sessions, list): sessions = [sessions] logger.debug(f"Running FACES parser for {len(sessions)} session(s):") logger.debug(f"Out dir: {out_dir}") for ses in sessions: logger.info(f"Parsing {ses}...") try: ident = datman.scanid.parse(ses) except datman.scanid.ParseException: logger.error(f"Skipping task folder with malformed ID {ses}") continue ses_path = os.path.join(task_path, ses) task_files = glob.glob(ses_path + '/*.txt') if not task_files: logger.warning(f"No .txt files found for {ses}, skipping.") continue for eprimefile in task_files: logger.info(f"Found file: {eprimefile}") try: eprime = read_eprime(eprimefile) except UnicodeError as e: logger.error(f"Cannot parse {eprimefile}: {e}") continue init_tag = find_all_data(eprime, "Procedure: InitialTR\r\n") init_start = np.empty([len(init_tag)], dtype=int) init_end = np.empty([len(init_tag)], dtype=int) for i, ind_init in enumerate(init_tag): if i < len(init_tag) - 1: init_end[i] = init_tag[i + 1][0] elif i == len(init_tag) - 1: init_end[i] = len(eprime) - 1 init_start[i] = ind_init[0] init_blocks = [eprime[s:e] for s, e in zip(init_start, init_end)] syncOT = float('nan') for b in init_blocks: new_syncOT = get_event_value(b, 'SyncSlide.OnsetTime:') stim = get_event_value(b, 'Stimulus:') if not np.isnan(int(new_syncOT)) and stim == '4': syncOT = new_syncOT # tag the trials to obtain the data for each trial taglist = find_all_data(eprime, "Procedure: TrialsPROC\r\n") if not taglist: logger.error(f"No trials found for {ses} - skipping") continue trial_start = np.empty([len(taglist)], dtype=int) trial_end = np.empty([len(taglist)], dtype=int) for i, ind_trial_proc in enumerate(taglist): if (i < (len(taglist)) - 1): trial_end[i] = taglist[i + 1][0] elif (i == (len(taglist)) - 1): trial_end[i] = len(eprime) - 1 trial_start[i] = ind_trial_proc[0] trial_blocks = [ eprime[s:e] for s, e in zip(trial_start, trial_end) ] entries = [] for b in trial_blocks: stimOT = get_event_value(b, 'StimSlide.OnsetTime:') # Convert from ms to seconds rel_stimOT = (float(stimOT) - float(syncOT)) / 1000 duration = float(get_event_value(b, 'StimSlide.OnsetToOnsetTime:')) / 1000 response_time = float(get_event_value(b, 'StimSlide.RT:')) / 1000 entries.append({ 'onset': rel_stimOT, 'duration': duration, 'trial_type': 'Shapes' if 'Shape' in str(b) else 'Faces', 'response_time': response_time, 'accuracy': get_event_value(b, 'StimSlide.ACC:'), 'correct_response': map_response(get_event_value(b, 'CorrectResponse:')), 'participant_response': map_response(get_event_value(b, 'StimSlide.RESP:')) }) data = pd.DataFrame.from_dict(entries)\ .astype({ "onset": np.float, "duration": np.float, "response_time": np.float, "accuracy": np.float, "correct_response": np.float, "participant_response": np.float })\ .astype({ "correct_response": "Int64", "participant_response": "Int64", "accuracy": "Int64" }) log_head, log_tail = os.path.split(eprimefile) if not out_dir: out_path = os.path.join( nii_path, ident.get_full_subjectid_with_timepoint()) else: out_path = out_dir file_name = os.path.join(out_path, f"{ses}_FACES.tsv") if not dryrun: logger.info(f"Saving output to {file_name}") os.makedirs(os.path.dirname(file_name), exist_ok=True) data.to_csv(file_name, sep='\t', index=False) else: logger.info(f"Dry run - would save to {file_name}")
def main(): global dryrun arguments = docopt(__doc__) study = arguments['<study>'] config = arguments['--config'] system = arguments['--system'] QC_file = arguments['--QC-transfer'] FA_tag = arguments['--FA-tag'] subject_filter = arguments['--subject-filter'] FA_filter = arguments['--FA-filter'] CALC_MD = arguments['--calc-MD'] CALC_ALL = arguments['--calc-all'] walltime = arguments['--walltime'] walltime_post = arguments['--walltime-post'] POST_ONLY = arguments['--post-only'] NO_POST = arguments['--no-post'] quiet = arguments['--quiet'] verbose = arguments['--verbose'] debug = arguments['--debug'] DRYRUN = arguments['--dry-run'] if quiet: logger.setLevel(logging.ERROR) if verbose: logger.setLevel(logging.INFO) if debug: logger.setLevel(logging.DEBUG) config = datman.config.config(filename=config, system=system, study=study) ## make the output directory if it doesn't exist input_dir = config.get_path('dtifit') output_dir = config.get_path('enigmaDTI') log_dir = os.path.join(output_dir, 'logs') run_dir = os.path.join(output_dir, 'bin') dm.utils.makedirs(log_dir) dm.utils.makedirs(run_dir) logger.debug(arguments) if FA_tag == None: FA_tag = '_FA.nii.gz' subjects = dm.proc.get_subject_list(input_dir, subject_filter, QC_file) # check if we have any work to do, exit if not if len(subjects) == 0: logger.info('No outstanding scans to process.') sys.exit(1) # grab the prefix from the subid if not given prefix = config.get_key('STUDY_TAG') ## write and check the run scripts script_names = ['run_engimadti.sh', 'concatresults.sh'] write_run_scripts(script_names, run_dir, output_dir, CALC_MD, CALC_ALL, debug) checklist_file = os.path.normpath(output_dir + '/ENIGMA-DTI-checklist.csv') checklist_cols = [ 'id', 'FA_nii', 'date_ran', 'qc_rator', 'qc_rating', 'notes' ] checklist = dm.proc.load_checklist(checklist_file, checklist_cols) checklist = dm.proc.add_new_subjects_to_checklist(subjects, checklist, checklist_cols) # Update checklist with new FA files to process listed under FA_nii column checklist = dm.proc.find_images(checklist, 'FA_nii', input_dir, FA_tag, subject_filter=subject_filter, image_filter=FA_filter) job_name_prefix = "edti{}_{}".format( prefix, datetime.datetime.today().strftime("%Y%m%d-%H%M%S")) submit_edti = False ## Change dir so it can be submitted without the full path os.chdir(run_dir) if not POST_ONLY: with make_temp_directory() as temp_dir: cmds_file = os.path.join(temp_dir, 'commands.txt') with open(cmds_file, 'w') as cmdlist: for i in range(0, len(checklist)): subid = checklist['id'][i] # make sure that second filter is being applied to the qsub bit if subject_filter and subject_filter not in subid: continue ## make sure that a T1 has been selected for this subject if pd.isnull(checklist['FA_nii'][i]): continue ## format contents of T1 column into recon-all command input smap = checklist['FA_nii'][i] if subject_previously_completed(output_dir, subid, smap): continue # If POSTFS_ONLY == False, the run script will be the first or # only name in the list cmdlist.write( "bash -l {rundir}/{script} {output} {inputFA}\n". format(rundir=run_dir, script=script_names[0], output=os.path.join(output_dir, subid), inputFA=os.path.join(input_dir, subid, smap))) ## add today's date to the checklist checklist['date_ran'][i] = datetime.date.today() submit_edti = True if submit_edti: qbatch_run_cmd = dm.proc.make_file_qbatch_command( cmds_file, job_name_prefix, log_dir, walltime) os.chdir(run_dir) dm.utils.run(qbatch_run_cmd, DRYRUN) ## if any subjects have been submitted, ## submit a final job that will consolidate the results after they are finished os.chdir(run_dir) post_edit_cmd = 'echo bash -l {rundir}/{script}'.format( rundir=run_dir, script=script_names[1]) if submit_edti: qbatch_post_cmd = dm.proc.make_piped_qbatch_command( post_edit_cmd, '{}_post'.format(job_name_prefix), log_dir, walltime_post, afterok=job_name_prefix) dm.utils.run(qbatch_post_cmd, DRYRUN) if not DRYRUN: ## write the checklist out to a file checklist.to_csv(checklist_file, sep=',', index=False)