def createmap(recording: Modules.baseModule, bidsmap, template, bidsmap_unk) -> None: if plugins.RunPlugin("SequenceEP", recording) < 0: logger.warning("Sequence {} discarded by {}" .format(recording.recIdentity(False), "SequenceEP")) return logger.info("Processing: sub '{}', ses '{}', {} ({} files)" .format(recording.subId(), recording.sesId(), recording.recIdentity(), len(recording.files))) recording.index = -1 while recording.loadNextFile(): if plugins.RunPlugin("RecordingEP", recording) < 0: logger.warning("Recording {} discarded by {}" .format(recording.recIdentity(), "RecordingEP")) continue # checking in the current map modality, r_index, run = bidsmap.match_run(recording) if not modality: logger.warning("{}/{}: No run found in bidsmap. " "Looking into template" .format(recording.Module(), recording.recIdentity())) # checking in the template map modality, r_index, run = template.match_run(recording, fix=True) if not modality: logger.error("{}/{}: No compatible run found" .format(recording.Module(), recording.recIdentity())) bidsmap_unk.add_run( run, recording.Module(), recording.Type() ) continue run.template = True modality, r_index, run = bidsmap.add_run( run, recording.Module(), recording.Type() ) if not run.checked: if not run.entity: run.genEntities(recording.bidsmodalities.get(modality, [])) recording.fillMissingJSON(run) plugins.RunPlugin("SequenceEndEP", None, recording)
def sortsession(outfolder: str, session: BidsSession, recording: object, dry_run: bool) -> None: recording.setBidsSession(session) if plugins.RunPlugin("SequenceEP", recording) < 0: logger.warning("Sequence {} discarded by {}" .format(recording.recIdentity(False), "SequenceEP")) return logger.info("Processing: sub '{}', ses '{}' ({} files)" .format(recording.subId(), recording.sesId(), len(recording.files))) recording.index = -1 while recording.loadNextFile(): if session.subject is None: recording.getBidsSession().unlock_subject() recording.getBidsSession().subject = None if session.session is None: recording.getBidsSession().unlock_session() recording.getBidsSession().session = None if plugins.RunPlugin("RecordingEP", recording) < 0: logger.warning("Recording {} discarded by {}" .format(recording.recIdentity(), "RecordingEP")) continue if session.subject is None: recording.setSubId() if session.session is None: recording.setSesId() recording.getBidsSession().registerFields(True) serie = os.path.join( outfolder, recording.getBidsSession().getPath(True), recording.Module(), recording.recIdentity(index=False)) if not dry_run: os.makedirs(serie, exist_ok=True) outfile = recording.copyRawFile(serie) plugins.RunPlugin("FileEP", outfile, recording) plugins.RunPlugin("SequenceEndEP", outfolder, recording)
def coin(destination: str, recording: Modules.baseModule, bidsmap: Bidsmap, dry_run: bool) -> None: """ Converts the session dicom-files into BIDS-valid nifti-files in the corresponding bidsfolder and extracts personals (e.g. Age, Sex) from the dicom header :param session: The full-path name of the subject/session source folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param personals: The dictionary with the personal information :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :return: Nothing """ if plugins.RunPlugin("SequenceEP", recording) < 0: logger.warning("Sequence {} discarded by {}".format( recording.recIdentity(False), "SequenceEP")) return logger.info("Processing: sub '{}', ses '{}', {} ({} files)".format( recording.subId(), recording.sesId(), recording.recIdentity(), len(recording.files))) recording.sub_BIDSvalues["participant_id"] = recording.subId() recording.index = -1 while recording.loadNextFile(): if plugins.RunPlugin("RecordingEP", recording) < 0: logger.warning("Recording {} discarded by {}".format( recording.recIdentity(), "RecordingEP")) continue recording.getBidsSession().registerFields(True) out_path = os.path.join(destination, recording.getBidsPrefix("/")) # checking in the current map modality, r_index, r_obj = bidsmap.match_run(recording) if not modality: e = "{}: No compatible run found"\ .format(recording.recIdentity()) logger.error(e) raise ValueError(e) if modality == Modules.ignoremodality: logger.info('{}: ignored modality'.format(recording.recIdentity())) continue recording.setLabels(r_obj) recording.generateMeta() bidsname = recording.getBidsname() bidsmodality = os.path.join(out_path, recording.Modality()) # Check if file already exists if os.path.isfile(os.path.join(bidsmodality, bidsname + '.json')): e = "{}/{}.json exists at destination"\ .format(bidsmodality, bidsname) logger.error(e) if not dry_run: plugins.RunPlugin("FileEP", recording.getBidsSession().in_path, recording) else: plugins.RunPlugin("FileEP", None, recording) if not dry_run: plugins.RunPlugin("SequenceEndEP", recording.getBidsSession().in_path, recording) else: plugins.RunPlugin("SequenceEndEP", None, recording)
def process(source: str, destination: str, plugin_file: str = "", plugin_opt: dict = {}, sub_list: list = [], sub_skip_tsv: bool = False, sub_skip_dir: bool = False, ses_skip_dir: bool = False, part_template: str = "", bidsmapfile: str = "bidsmap.yaml", dry_run: bool = False) -> None: """ Process bidsified dataset before the bidsification. Can be used to produce derivatives, convertion anonymisation with adventage of recording identification by bidsmap.yaml Essentually it is identical to bidsification but without bidsification itself. Only subjects in source/participants.tsv are treated, this list can be narrowed using sub_list, sub_skip_tsv and sub_skip_dir options Parameters ---------- source: str folder containing source dataset destination: str folder for prepeared dataset plugin_file: str path to the plugin file to use plugin_opt: dict named options passed to plugin sub_list: list list of subject to process. Subjects are checked after plugin and must start with 'sub-', as in destination folder sub_skip_tsv: bool if set to True, subjects found in destination/participants.tsv will be ignored sub_skip_dir: bool if set to true, subjects with already created directories will be ignored Can conflict with sub_no_dir ses_skip_dir: bool if set to True, sessions with already created directories will be ignored Can conflict with ses_no_dir part_template: str path to template json file, from whitch participants.tsv will be modeled. If unset the defeault one "source/participants.tsv" is used. Setting this variable may break workflow bidsmapfile: str The name of bidsmap file, will be searched for in destination/code/bidsmap directory, unless path is absolute dry_run: bool if set to True, no disk writing operations will be performed """ logger.info("-------------- Processing data -------------") logger.info("Source directory: {}".format(source)) logger.info("Destination directory: {}".format(destination)) # Input checking # source = os.path.abspath(source) if not os.path.isdir(source): logger.critical("Source directory {} don't exists".format(source)) raise NotADirectoryError(source) if not os.path.isdir(destination): logger.critical( "Destination directory {} don't exists".format(destination)) raise NotADirectoryError(destination) # Input checking & defaults bidscodefolder = os.path.join(destination, 'code', 'bidsme') # Create a code/bidsme subfolder os.makedirs(bidscodefolder, exist_ok=True) # Check for dataset description file dataset_file = os.path.join(destination, 'dataset_description.json') if not os.path.isfile(dataset_file): logger.warning("Dataset description file 'dataset_description.json' " "not found in '{}'".format(destination)) # Check for README file readme_file = os.path.join(destination, 'README') if not os.path.isfile(readme_file): logger.warning("Dataset readme file 'README' " "not found in '{}'".format(destination)) # Get the bidsmap heuristics from the bidsmap YAML-file fname = paths.findFile(bidsmapfile, bidscodefolder, paths.local, paths.config) if not fname: logger.critical('Bidsmap file {} not found.'.format(bidsmapfile)) raise FileNotFoundError(bidsmapfile) else: bidsmapfile = fname logger.info("loading bidsmap {}".format(bidsmapfile)) bidsmap = Bidsmap(bidsmapfile) ntotal, ntemplate, nunchecked = bidsmap.countRuns() logger.debug("Map contains {} runs".format(ntotal)) if ntemplate != 0: logger.warning("Map contains {} template runs".format(ntemplate)) if nunchecked != 0: logger.critical("Map contains {} unchecked runs".format(nunchecked)) raise Exception("Unchecked runs present") ############### # Plugin setup ############### if plugin_file: plugins.ImportPlugins(plugin_file) plugins.InitPlugin(source=source, destination=destination, dry=dry_run, **plugin_opt) ############################### # Checking participants list ############################### if not part_template: part_template = os.path.join(source, "participants.json") else: logger.warning( "Loading exterior participant template {}".format(part_template)) BidsSession.loadSubjectFields(part_template) new_sub_file = os.path.join(source, "participants.tsv") df_sub = pandas.read_csv(new_sub_file, sep="\t", header=0, na_values="n/a").drop_duplicates() df_dupl = df_sub.duplicated("participant_id") if df_dupl.any(): logger.critical("Participant list contains one or several duplicated " "entries: {}".format(", ".join( df_sub[df_dupl]["participant_id"]))) raise Exception("Duplicated subjects") dupl_file = os.path.join(source, "__duplicated.tsv") if os.path.isfile(dupl_file): logger.critical("Found unmerged file with duplicated subjects") raise FileExistsError(dupl_file) new_sub_json = os.path.join(source, "participants.json") if not tools.checkTsvDefinitions(df_sub, new_sub_json): raise Exception("Incompatible sidecar json") old_sub_file = os.path.join(destination, "participants.tsv") old_sub = None if os.path.isfile(old_sub_file): old_sub = pandas.read_csv(old_sub_file, sep="\t", header=0, na_values="n/a") if not old_sub.columns.equals(df_sub.columns): logger.warning("Source participant.tsv has different columns " "from destination dataset") old_sub = old_sub["participant_id"] ############################## # Subjects loop ############################## n_subjects = len(df_sub["participant_id"]) for index, sub_row in df_sub.iterrows(): sub_no = index + 1 sub_id = sub_row["participant_id"] sub_dir = os.path.join(source, sub_id) if not os.path.isdir(sub_dir): logger.error("{}: Not found in {}".format(sub_id, source)) continue scan = BidsSession() scan.in_path = sub_dir scan.subject = sub_id ################################################# # Cloning df_sub row values in scans sub_values ################################################# for column in df_sub.columns: scan.sub_values[column] = sub_row[column] # locking subjects here forbids renaming in process # as it will be unclear how manage folders with data scan.lock_subject() if plugins.RunPlugin("SubjectEP", scan) < 0: logger.warning("Subject {} discarded by {}".format( scan.subject, "SubjectEP")) continue if not scan.isSubValid(): logger.error("{}: Subject id '{}' is not valid".format( sub_id, scan.subject)) continue if tools.skipEntity(scan.subject, sub_list, old_sub if sub_skip_tsv else None, destination if sub_skip_dir else ""): logger.info("Skipping subject '{}'".format(scan.subject)) continue ses_dirs = tools.lsdirs(sub_dir, 'ses-*') if not ses_dirs: logger.error("{}: No sessions found in: {}".format( scan.subject, sub_dir)) continue for ses_dir in ses_dirs: scan.in_path = ses_dir logger.info("{} ({}/{}): Scanning folder {}".format( scan.subject, sub_no, n_subjects, ses_dir)) scan.unlock_session() scan.session = os.path.basename(ses_dir) if plugins.RunPlugin("SessionEP", scan) < 0: logger.warning("Session {} discarded by {}".format( scan.session, "SessionEP")) continue scan.lock() if ses_skip_dir and tools.skipEntity( scan.session, [], None, os.path.join(destination, scan.subject)): logger.info("Skipping session '{}'".format(scan.session)) continue for module in Modules.selector.types_list: mod_dir = os.path.join(ses_dir, module) if not os.path.isdir(mod_dir): logger.debug("Module {} not found in {}".format( module, ses_dir)) continue for run in tools.lsdirs(mod_dir): scan.in_path = run cls = Modules.select(run, module) if cls is None: logger.error( "Failed to identify data in {}".format(run)) continue recording = cls(rec_path=run) if not recording or len(recording.files) == 0: logger.error( "unable to load data in folder {}".format(run)) continue recording.setBidsSession(scan) coin(destination, recording, bidsmap, dry_run) plugins.RunPlugin("SessionEndEP", scan) scan.in_path = sub_dir plugins.RunPlugin("SubjectEndEP", scan) ################################## # Merging the participants table ################################## df_processed = BidsSession.exportAsDataFrame() col_mismatch = False if not df_processed.columns.equals(df_sub.columns): col_mismatch = True logger.warning("Modified participant table do not match " "original table. This is discouraged and can " "break future preparation and process steps") for col in df_processed.columns.difference(df_sub.columns): df_sub[col] = None df_sub = df_sub[BidsSession.getSubjectColumns()] df_sub.drop_duplicates(inplace=True) df_res = pandas.concat([df_sub, df_processed], join="inner", keys=("original", "processed"), names=("stage", "ID")) df_res = df_res.drop_duplicates() df_dupl = df_res.duplicated("participant_id", keep=False) if df_dupl.any(): logger.info("Updating participants values") df_dupl = df_dupl.drop(["processed"]) df_res.drop(df_dupl[df_dupl].index, inplace=True) df_dupl = df_res.duplicated("participant_id") if df_dupl.any(): logger.error("Participant list contains one or several duplicated " "entries: {}".format(", ".join( df_res[df_dupl]["participant_id"]))) ################################## # Saving the participants table ################################## if not dry_run: df_res[~df_dupl].to_csv(new_sub_file, sep='\t', na_rep="n/a", index=False, header=True) if df_dupl.any(): logger.info("Saving the list to be merged manually to {}".format( dupl_file)) df_res[df_dupl].to_csv(dupl_file, sep='\t', na_rep="n/a", index=False, header=True) json_file = tools.change_ext(new_sub_file, "json") if col_mismatch or not os.path.isfile(json_file): BidsSession.exportDefinitions(json_file) plugins.RunPlugin("FinaliseEP")
def mapper(source: str, destination: str, plugin_file: str = "", plugin_opt: dict = {}, sub_list: list = [], sub_skip_tsv: bool = False, sub_skip_dir: bool = False, ses_skip_dir: bool = False, bidsmapfile: str = "bidsmap.yaml", map_template: str = "bidsmap_template.yaml", dry_run: bool = False ) -> None: """ Generates bidsmap.yaml from prepeared dataset and map template. Only subjects in source/participants.tsv are treated, this list can be narrowed using sub_list, sub_skip_tsv and sub_skip_dir options Parameters ---------- source: str folder containing source dataset destination: str folder for prepeared dataset plugin_file: str path to the plugin file to use plugin_opt: dict named options passed to plugin sub_list: list list of subject to process. Subjects are checked after plugin and must start with 'sub-', as in destination folder sub_skip_tsv: bool if set to True, subjects found in destination/participants.tsv will be ignored sub_skip_dir: bool if set to true, subjects with already created directories will be ignored Can conflict with sub_no_dir ses_skip_dir: bool if set to True, sessions with already created directories will be ignored Can conflict with ses_no_dir bidsmapfile: str The name of bidsmap file, will be searched for in destination/code/bidsmap directory, unless path is absolute map_template: str The name of template map. The file is searched in heuristics folder dry_run: bool if set to True, no disk writing operations will be performed """ logger.info("------------ Generating bidsmap ------------") logger.info("Current directory: {}".format(os.getcwd())) logger.info("Source directory: {}".format(source)) logger.info("Destination directory: {}".format(destination)) # Input checking if not os.path.isdir(source): logger.critical("Source directory {} don't exists" .format(source)) raise NotADirectoryError(source) if not os.path.isdir(destination): logger.critical("Destination directory {} don't exists" .format(destination)) raise NotADirectoryError(destination) bidscodefolder = os.path.join(destination, 'code', 'bidsme') os.makedirs(bidscodefolder, exist_ok=True) # Get the heuristics for filling the new bidsmap logger.info("loading template bidsmap {}".format(map_template)) fname = paths.findFile(map_template, paths.local, paths.config, paths.heuristics) if not fname: logger.warning("Unable to find template map {}" .format(map_template)) template = bidsmap.Bidsmap(fname) fname = paths.findFile(bidsmapfile, bidscodefolder, paths.local, paths.config ) if not fname: bidsmapfile = os.path.join(bidscodefolder, bidsmapfile) else: bidsmapfile = fname logger.info("loading working bidsmap {}".format(bidsmapfile)) bidsmap_new = bidsmap.Bidsmap(bidsmapfile) logger.debug("Creating bidsmap for unknown modalities") # removing old unknown files bidsunknown = os.path.join(bidscodefolder, 'unknown.yaml') if os.path.isfile(bidsunknown): os.remove(bidsunknown) bidsmap_unk = bidsmap.Bidsmap(bidsunknown) ############### # Plugin setup ############### if plugin_file: plugins.ImportPlugins(plugin_file) plugins.InitPlugin(source=source, destination=destination, dry=True, **plugin_opt) ############################### # Checking participants list ############################### new_sub_file = os.path.join(source, "participants.tsv") df_sub = pandas.read_csv(new_sub_file, sep="\t", header=0, na_values="n/a") df_dupl = df_sub.duplicated("participant_id") if df_dupl.any(): logger.critical("Participant list contains one or several duplicated " "entries: {}" .format(", ".join(df_sub[df_dupl]["participant_id"])) ) raise Exception("Duplicated subjects") new_sub_json = os.path.join(source, "participants.json") if not tools.checkTsvDefinitions(df_sub, new_sub_json): raise Exception("Incompatible sidecar json") BidsSession.loadSubjectFields(new_sub_json) old_sub_file = os.path.join(destination, "participants.tsv") old_sub = None if os.path.isfile(old_sub_file): old_sub = pandas.read_csv(old_sub_file, sep="\t", header=0, na_values="n/a") df_res = df_sub if old_sub is not None: if not old_sub.columns.equals(df_sub.columns): logger.critical("Participant.tsv has differenrt columns " "from destination dataset") raise Exception("Participants column mismatch") df_res = old_sub.append(df_sub, ignore_index=True).drop_duplicates() df_dupl = df_res.duplicated("participant_id") if df_dupl.any(): logger.critical("Joined participant list contains one or " "several duplicated entries: {}" .format(", ".join( df_sub[df_dupl]["participant_id"]) ) ) raise Exception("Duplicated subjects") old_sub = old_sub["participant_id"] ############################## # Subjects loop ############################## n_subjects = len(df_sub["participant_id"]) for sub_no, sub_id in enumerate(df_sub["participant_id"], 1): sub_dir = os.path.join(source, sub_id) if not os.path.isdir(sub_dir): logger.error("{}: Not found in {}" .format(sub_id, source)) continue scan = BidsSession() scan.in_path = sub_dir scan.subject = sub_id if plugins.RunPlugin("SubjectEP", scan) < 0: logger.warning("Subject {} discarded by {}" .format(scan.subject, "SubjectEP")) continue scan.lock_subject() if not scan.isSubValid(): logger.error("{}: Subject id '{}' is not valid" .format(sub_id, scan.subject)) continue if tools.skipEntity(scan.subject, sub_list, old_sub if sub_skip_tsv else None, destination if sub_skip_dir else ""): logger.info("Skipping subject '{}'" .format(scan.subject)) continue ses_dirs = tools.lsdirs(sub_dir, 'ses-*') if not ses_dirs: logger.error("{}: No sessions found in: {}" .format(scan.subject, sub_dir)) continue for ses_dir in ses_dirs: scan.in_path = ses_dir logger.info("{} ({}/{}): Scanning folder {}" .format(scan.subject, sub_no, n_subjects, ses_dir)) scan.unlock_session() scan.session = os.path.basename(ses_dir) if plugins.RunPlugin("SessionEP", scan) < 0: logger.warning("Session {} discarded by {}" .format(scan.session, "SessionEP")) continue scan.lock() if ses_skip_dir and tools.skipEntity(scan.session, [], None, os.path.join(destination, scan.subject)): logger.info("Skipping session '{}'" .format(scan.session)) continue for module in Modules.selector.types_list: mod_dir = os.path.join(ses_dir, module) if not os.path.isdir(mod_dir): logger.debug("Module {} not found in {}" .format(module, ses_dir)) continue for run in tools.lsdirs(mod_dir): cls = Modules.selector.select(run, module) if cls is None: logger.error("Failed to identify data in {}" .format(mod_dir)) continue recording = cls(rec_path=run) if not recording or len(recording.files) == 0: logger.error("unable to load data in folder {}" .format(run)) recording.setBidsSession(scan) createmap(recording, bidsmap_new, template, bidsmap_unk) if not dry_run: # Save the bidsmap to the bidsmap YAML-file bidsmap_new.save(bidsmapfile, empty_attributes=False) ntotal, ntemplate, nunchecked = bidsmap_new.countRuns() logger.info("Map contains {} runs".format(ntotal)) if ntemplate != 0: logger.warning("Map contains {} template runs" .format(ntemplate)) if nunchecked != 0: logger.warning("Map contains {} unchecked runs" .format(nunchecked)) # Scanning unknowing and exporting them to yaml file unkn_recordings = bidsmap_unk.countRuns()[0] if unkn_recordings > 0: logger.error("Was unable to identify {} recordings. " "See {} for details" .format(unkn_recordings, bidsunknown)) if not dry_run: bidsmap_unk.save(bidsunknown)
def prepare(source: str, destination: str, plugin_file: str = "", plugin_opt: dict = {}, sub_list: list = [], sub_skip_tsv: bool = False, sub_skip_dir: bool = False, ses_skip_dir: bool = False, part_template: str = "", sub_prefix: str = "", ses_prefix: str = "", sub_no_dir: bool = False, ses_no_dir: bool = False, data_dirs: dict = {}, dry_run: bool = False ) -> None: """ Prepare data from surce folder and place it in sestination folder. Source folder is expected to have structure source/[subId/][sesId/][data/]file. Absence of subId and sesId levels must be communicated via sub_no_dir and ses_no_dir options. List of data folders must be given in data_dirs. Prepeared data will have structure destination/sub-<subId>/ses-<sesId>/<type>/<sequence>/file A list of treated subjects will be created/updated in destination/participants.tsv file Parameters ---------- source: str folder containing source dataset destination: str folder for prepeared dataset plugin_file: str path to the plugin file to use plugin_opt: dict named options passed to plugin sub_list: list list of subject to process. Subjects are checked after plugin and must start with 'sub-', as in destination folder sub_skip_tsv: bool if set to True, subjects found in destination/participants.tsv will be ignored sub_skip_dir: bool if set to true, subjects with already created directories will be ignored Can conflict with sub_no_dir ses_skip_dir: bool if set to True, sessions with already created directories will be ignored Can conflict with ses_no_dir part_template: str path to template json file, from whitch participants.tsv will be modeled. Must be formated as usual BIDS sidecar json file for tsv files sub_prefix: str prefix for subject folders in source dataset. If set, subject folders without prefix will be ignored, and will be stripped from subject Ids: sub001 -> 001 if sub_prefix==sub Option has no effect if sub_no_dir==True ses_prefix: str prefix for session folders in source dataset. If set, session folders without prefix will be ignored, and will be stripped from session Ids: sesTest -> Test if ses_prefix==ses Option has no effect if ses_no_dir==True sub_no_dir: bool if set to True, source dataset will not be expected to have subject folders. ses_no_dir: bool if set to True, source dataset will not be expected to have session folders. data_dirs: dict dictionary containing list of folders with recording data as key and data type as value. If folder contain several types of data, then value must be set to empty string dry_run: bool if set to True, no disk writing operations will be performed """ logger.info("-------------- Prepearing data -------------") logger.info("Source directory: {}".format(source)) logger.info("Destination directory: {}".format(destination)) # Input checking # source = os.path.abspath(source) if not os.path.isdir(source): logger.critical("Source directory {} don't exists" .format(source)) raise NotADirectoryError(source) if not os.path.isdir(destination): logger.critical("Destination directory {} don't exists" .format(destination)) raise NotADirectoryError(destination) if sub_no_dir and sub_skip_dir: logger.warning("Both sub_no_dir and sub_skip_dir are set. " "Subjects will not be skipped " "unless subId defined in plugin") if ses_no_dir and ses_skip_dir: logger.warning("Both ses_no_dir and ses_skip_dir are set. " "Sessions will not be skipped " "unless sesId defined in plugin") ############### # Plugin setup ############### if plugin_file: plugins.ImportPlugins(plugin_file) plugins.InitPlugin(source=source, destination=destination, dry=dry_run, **plugin_opt) ############################### # Checking participants list ############################### new_sub_json = os.path.join(destination, "participants.json") if not part_template: if os.path.isfile(new_sub_json): part_template = new_sub_json BidsSession.loadSubjectFields(part_template) old_sub_file = os.path.join(destination, "participants.tsv") old_sub = None if os.path.isfile(old_sub_file): old_sub = pandas.read_csv(old_sub_file, sep="\t", header=0, na_values="n/a") if not BidsSession.checkDefinitions(old_sub): raise Exception("Destination participant.tsv incompatible " "with given columns definitions") dupl_file = os.path.join(destination, "__duplicated.tsv") if os.path.isfile(dupl_file): logger.critical("Found unmerged file with duplicated subjects") raise FileExistsError(dupl_file) ############### # Subject loop ############### sub_prefix_dir, sub_prefix = os.path.split(sub_prefix) ses_prefix_dir, ses_prefix = os.path.split(ses_prefix) if not sub_no_dir: sub_dirs = tools.lsdirs( os.path.join(source, sub_prefix_dir), sub_prefix + '*') else: sub_dirs = [source] if not sub_dirs: logger.warning("No subject folders found") if not data_dirs: data_dirs = {} for sub_dir in sub_dirs: scan = BidsSession() scan.in_path = sub_dir # get name of subject from folder name if not sub_no_dir: scan.subject = os.path.basename(sub_dir) scan.subject = scan.subject[len(sub_prefix):] if plugins.RunPlugin("SubjectEP", scan) < 0: logger.warning("Subject {} discarded by {}" .format(scan.subject, "SubjectEP")) continue scan.lock_subject() if scan.subject is not None: if tools.skipEntity(scan.subject, sub_list, old_sub if sub_skip_tsv else None, destination if sub_skip_dir else ""): logger.info("Skipping subject '{}'" .format(scan.subject)) continue if not ses_no_dir: ses_dirs = tools.lsdirs( os.path.join(sub_dir, ses_prefix_dir), ses_prefix + '*') else: ses_dirs = [sub_dir] if not ses_dirs: logger.warning("No session folders found") for ses_dir in ses_dirs: scan.in_path = ses_dir logger.info("Scanning folder {}".format(ses_dir)) if not ses_no_dir: scan.unlock_session() scan.session = os.path.basename(ses_dir) scan.session = scan.session[len(ses_prefix):] else: scan.unlock_session() scan.session = "" if plugins.RunPlugin("SessionEP", scan) < 0: logger.warning("Session {} discarded by {}" .format(scan.session, "SessionEP")) continue scan.lock() if scan.session is not None: skip = False if ses_skip_dir: if os.path.isdir(os.path.join(destination, scan.session)): logger.debug("{} dir exists".format(scan.session)) skip = True if skip: logger.info("Skipping session '{}'" .format(scan.session)) continue if not data_dirs: data_dirs[""] = "" for rec_dirs, rec_type in data_dirs.items(): rec_dirs = tools.lsdirs(ses_dir, rec_dirs) for rec_dir in rec_dirs: if not os.path.isdir(rec_dir): logger.warning("Sub: '{}', Ses: '{}': " "'{}' don't exists " "or not a folder" .format(scan.subject, scan.session, rec_dir)) continue cls = Modules.select(rec_dir, rec_type) if cls is None: logger.warning("Unable to identify data in folder {}" .format(rec_dir)) continue recording = cls(rec_path=rec_dir) if not recording or len(recording.files) == 0: logger.warning("unable to load data in folder {}" .format(rec_dir)) sortsession(destination, scan, recording, dry_run) plugins.RunPlugin("SessionEndEP", scan) scan.in_path = sub_dir plugins.RunPlugin("SubjectEndEP", scan) df_processed = BidsSession.exportAsDataFrame() if old_sub is not None: df_res = pandas.concat([old_sub, df_processed], sort=False, ignore_index=True) else: df_res = df_processed df_res = df_res[BidsSession.getSubjectColumns()].drop_duplicates() df_dupl = df_res.duplicated("participant_id") if df_dupl.any(): logger.critical("Participant list contains one or several duplicated " "entries: {}" .format(", ".join(df_res[df_dupl]["participant_id"])) ) if not dry_run: df_res[~df_dupl].to_csv(old_sub_file, sep='\t', na_rep="n/a", index=False, header=True) if df_dupl.any(): logger.info("Saving the list to be merged manually to {}" .format(dupl_file)) df_res[df_dupl].to_csv(dupl_file, sep='\t', na_rep="n/a", index=False, header=True) new_sub_json = os.path.join(destination, "participants.json") if not os.path.isfile(new_sub_json): BidsSession.exportDefinitions(new_sub_json) plugins.RunPlugin("FinaliseEP")