示例#1
0
def bidscoiner(rawfolder: str,
               bidsfolder: str,
               subjects: list = (),
               force: bool = False,
               participants: bool = False,
               bidsmapfile: str = 'bidsmap.yaml',
               subprefix: str = 'sub-',
               sesprefix: str = 'ses-') -> None:
    """
    Main function that processes all the subjects and session in the sourcefolder and uses the
    bidsmap.yaml file in bidsfolder/code/bidscoin to cast the data into the BIDS folder.

    :param rawfolder:       The root folder-name of the sub/ses/data/file tree containing the source data files
    :param bidsfolder:      The name of the BIDS root folder
    :param subjects:        List of selected subjects / participants (i.e. sub-# names / folders) to be processed (the sub- prefix can be removed). Otherwise all subjects in the sourcefolder will be selected
    :param force:           If True, subjects will be processed, regardless of existing folders in the bidsfolder. Otherwise existing folders will be skipped
    :param participants:    If True, subjects in particpants.tsv will not be processed (this could be used e.g. to protect these subjects from being reprocessed), also when force=True
    :param bidsmapfile:     The name of the bidsmap YAML-file. If the bidsmap pathname is relative (i.e. no "/" in the name) then it is assumed to be located in bidsfolder/code/bidscoin
    :param subprefix:       The prefix common for all source subject-folders
    :param sesprefix:       The prefix common for all source session-folders
    :return:                Nothing
    """

    # Input checking & defaults
    rawfolder = Path(rawfolder).resolve()
    bidsfolder = Path(bidsfolder).resolve()
    bidsmapfile = Path(bidsmapfile)

    # Start logging
    bids.setup_logging(bidsfolder / 'code' / 'bidscoin' / 'bidscoiner.log')
    LOGGER.info('')
    LOGGER.info(
        f"-------------- START BIDScoiner {bids.version()}: BIDS {bids.bidsversion()} ------------"
    )
    LOGGER.info(
        f">>> bidscoiner sourcefolder={rawfolder} bidsfolder={bidsfolder} subjects={subjects} force={force}"
        f" participants={participants} bidsmap={bidsmapfile} subprefix={subprefix} sesprefix={sesprefix}"
    )

    # Create a code/bidscoin subfolder
    (bidsfolder / 'code' / 'bidscoin').mkdir(parents=True, exist_ok=True)

    # Create a dataset description file if it does not exist
    dataset_file = bidsfolder / 'dataset_description.json'
    if not dataset_file.is_file():
        dataset_description = {
            "Name":
            "REQUIRED. Name of the dataset",
            "BIDSVersion":
            str(bids.bidsversion()),
            "DatasetType":
            "raw",
            "License":
            "RECOMMENDED. The license for the dataset. The use of license name abbreviations is RECOMMENDED for specifying a license. The corresponding full license text MAY be specified in an additional LICENSE file",
            "Authors": [
                "OPTIONAL. List of individuals who contributed to the creation/curation of the dataset"
            ],
            "Acknowledgements":
            "OPTIONAL. Text acknowledging contributions of individuals or institutions beyond those listed in Authors or Funding",
            "HowToAcknowledge":
            "OPTIONAL. Instructions how researchers using this dataset should acknowledge the original authors. This field can also be used to define a publication that should be cited in publications that use the dataset",
            "Funding":
            ["OPTIONAL. List of sources of funding (grant numbers)"],
            "EthicsApprovals": [
                "OPTIONAL. List of ethics committee approvals of the research protocols and/or protocol identifiers"
            ],
            "ReferencesAndLinks": [
                "OPTIONAL. List of references to publication that contain information on the dataset, or links",
                "https://github.com/Donders-Institute/bidscoin"
            ],
            "DatasetDOI":
            "OPTIONAL. The Document Object Identifier of the dataset (not the corresponding paper)"
        }
        LOGGER.info(f"Creating dataset description file: {dataset_file}")
        with open(dataset_file, 'w') as fid:
            json.dump(dataset_description, fid, indent=4)

    # Create a README file if it does not exist
    readme_file = bidsfolder / 'README'
    if not readme_file.is_file():
        LOGGER.info(f"Creating README file: {readme_file}")
        with open(readme_file, 'w') as fid:
            fid.write(
                f"A free form text ( README ) describing the dataset in more details that SHOULD be provided\n\n"
                f"The raw BIDS data was created using BIDScoin {bids.version()}\n"
                f"All provenance information and settings can be found in ./code/bidscoin\n"
                f"For more information see: https://github.com/Donders-Institute/bidscoin"
            )

    # Get the bidsmap heuristics from the bidsmap YAML-file
    bidsmap, _ = bids.load_bidsmap(bidsmapfile,
                                   bidsfolder / 'code' / 'bidscoin')
    if not bidsmap:
        LOGGER.error(
            f"No bidsmap file found in {bidsfolder}. Please run the bidsmapper first and / or use the correct bidsfolder"
        )
        return

    # Save options to the .bidsignore file
    bidsignore_items = [
        item.strip()
        for item in bidsmap['Options']['bidscoin']['bidsignore'].split(';')
    ]
    LOGGER.info(
        f"Writing {bidsignore_items} entries to {bidsfolder}.bidsignore")
    with (bidsfolder / '.bidsignore').open('w') as bidsignore:
        for item in bidsignore_items:
            bidsignore.write(item + '\n')

    # Get the table & dictionary of the subjects that have been processed
    participants_tsv = bidsfolder / 'participants.tsv'
    participants_json = participants_tsv.with_suffix('.json')
    if participants_tsv.is_file():
        participants_table = pd.read_csv(participants_tsv, sep='\t')
        participants_table.set_index(['participant_id'],
                                     verify_integrity=True,
                                     inplace=True)
    else:
        participants_table = pd.DataFrame()
        participants_table.index.name = 'participant_id'
    if participants_json.is_file():
        with participants_json.open('r') as json_fid:
            participants_dict = json.load(json_fid)
    else:
        participants_dict = {
            'participant_id': {
                'Description': 'Unique participant identifier'
            }
        }

    # Get the list of subjects
    if not subjects:
        subjects = bids.lsdirs(rawfolder, subprefix + '*')
        if not subjects:
            LOGGER.warning(f"No subjects found in: {rawfolder/subprefix}*")
    else:
        subjects = [
            subprefix + re.sub(f"^{subprefix}", '', subject)
            for subject in subjects
        ]  # Make sure there is a "sub-" prefix
        subjects = [
            rawfolder / subject for subject in subjects
            if (rawfolder / subject).is_dir()
        ]

    # Loop over all subjects and sessions and convert them using the bidsmap entries
    for n, subject in enumerate(subjects, 1):

        LOGGER.info(
            f"------------------- Subject {n}/{len(subjects)} -------------------"
        )
        if participants and subject.name in list(participants_table.index):
            LOGGER.info(f"Skipping subject: {subject} ({n}/{len(subjects)})")
            continue

        personals = dict()
        sessions = bids.lsdirs(subject, sesprefix + '*')
        if not sessions:
            sessions = [subject]
        for session in sessions:

            # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file
            session, unpacked = bids.unpack(session, subprefix, sesprefix)

            # See what dataformat we have
            dataformat = bids.get_dataformat(session)
            if not dataformat:
                LOGGER.info(f"Skipping unknown session: {session}")
                continue

            # Check if we should skip the session-folder
            if not force:
                subid, sesid = bids.get_subid_sesid(session / 'dum.my',
                                                    subprefix=subprefix,
                                                    sesprefix=sesprefix)
                bidssession = bidsfolder / subid / sesid
                if not bidsmap[dataformat]['session']:
                    bidssession = bidssession.parent
                datatypes = []
                for datatype in bids.lsdirs(
                        bidssession
                ):  # See what datatypes we already have in the bids session-folder
                    if datatype.glob('*') and bidsmap[dataformat].get(
                            datatype.name
                    ):  # See if we are going to add data for this datatype
                        datatypes.append(datatype.name)
                if datatypes:
                    LOGGER.info(
                        f"Skipping processed session: {bidssession} already has {datatypes} data (use the -f option to overrule)"
                    )
                    continue

            LOGGER.info(f"Coining session: {session}")

            # Update / append the source data mapping
            if dataformat in ('DICOM', 'PAR'):
                coin_data2bids(dataformat, session, bidsmap, bidsfolder,
                               personals, subprefix, sesprefix)

            # Update / append the P7 mapping
            if dataformat == 'P7':
                LOGGER.error(
                    f"{dataformat} not (yet) supported, skipping session: {session}"
                )
                continue

            # Update / append the nifti mapping
            if dataformat == 'Nifti':
                coin_nifti(session, bidsmap, bidsfolder, personals)

            # Update / append the file-system mapping
            if dataformat == 'FileSystem':
                coin_filesystem(session, bidsmap, bidsfolder, personals)

            # Update / append the plugin mapping
            if bidsmap['PlugIns']:
                coin_plugin(session, bidsmap, bidsfolder, personals)

            # Clean-up the temporary unpacked data
            if unpacked:
                shutil.rmtree(session)

        # Store the collected personals in the participant_table
        for key in personals:

            # participant_id is the index of the participants_table
            assert 'participant_id' in personals
            if key == 'participant_id':
                continue

            # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file

            if key not in participants_dict:
                participants_dict[key] = dict(
                    LongName='Long (unabbreviated) name of the column',
                    Description='Description of the the column',
                    Levels=dict(
                        Key=
                        'Value (This is for categorical variables: a dictionary of possible values (keys) and their descriptions (values))'
                    ),
                    Units=
                    'Measurement units. [<prefix symbol>]<unit symbol> format following the SI standard is RECOMMENDED',
                    TermURL=
                    'URL pointing to a formal definition of this type of data in an ontology available on the web'
                )
            participants_table.loc[personals['participant_id'],
                                   key] = personals[key]

    # Write the collected data to the participant files
    LOGGER.info(f"Writing subject data to: {participants_tsv}")
    participants_table.replace('', 'n/a').to_csv(participants_tsv,
                                                 sep='\t',
                                                 encoding='utf-8',
                                                 na_rep='n/a')

    LOGGER.info(f"Writing subject data dictionary to: {participants_json}")
    with participants_json.open('w') as json_fid:
        json.dump(participants_dict, json_fid, indent=4)

    LOGGER.info('-------------- FINISHED! ------------')
    LOGGER.info('')

    bids.reporterrors()
示例#2
0
def bidsmapper(rawfolder: str,
               bidsfolder: str,
               bidsmapfile: str,
               templatefile: str,
               subprefix: str = 'sub-',
               sesprefix: str = 'ses-',
               store: bool = False,
               interactive: bool = True) -> None:
    """
    Main function that processes all the subjects and session in the sourcefolder
    and that generates a maximally filled-in bidsmap.yaml file in bidsfolder/code/bidscoin.
    Folders in sourcefolder are assumed to contain a single dataset.

    :param rawfolder:       The root folder-name of the sub/ses/data/file tree containing the source data files
    :param bidsfolder:      The name of the BIDS root folder
    :param bidsmapfile:     The name of the bidsmap YAML-file
    :param templatefile:    The name of the bidsmap template YAML-file
    :param subprefix:       The prefix common for all source subject-folders
    :param sesprefix:       The prefix common for all source session-folders
    :param store:           If True, the provenance samples will be stored
    :param interactive:     If True, the user will be asked for help if an unknown run is encountered
    :return:bidsmapfile:    The name of the mapped bidsmap YAML-file
    """

    # Input checking
    rawfolder = Path(rawfolder).resolve()
    bidsfolder = Path(bidsfolder).resolve()
    bidsmapfile = Path(bidsmapfile)
    templatefile = Path(templatefile)
    bidscoinfolder = bidsfolder / 'code' / 'bidscoin'

    # Start logging
    bids.setup_logging(bidscoinfolder / 'bidsmapper.log')
    LOGGER.info('')
    LOGGER.info('-------------- START BIDSmapper ------------')
    LOGGER.info(
        f">>> bidsmapper sourcefolder={rawfolder} bidsfolder={bidsfolder} bidsmap={bidsmapfile} "
        f" template={templatefile} subprefix={subprefix} sesprefix={sesprefix} store={store} interactive={interactive}"
    )

    # Get the heuristics for filling the new bidsmap
    bidsmap_old, _ = bids.load_bidsmap(bidsmapfile, bidscoinfolder)
    template, _ = bids.load_bidsmap(templatefile, bidscoinfolder)

    # Create the new bidsmap as a copy / bidsmap skeleton with no modality entries (i.e. bidsmap with empty lists)
    if bidsmap_old:
        bidsmap_new = copy.deepcopy(bidsmap_old)
    else:
        bidsmap_new = copy.deepcopy(template)
    for logic in ('DICOM', 'PAR', 'P7', 'Nifti', 'FileSystem'):
        for modality in bids.bidsmodalities + (bids.unknownmodality,
                                               bids.ignoremodality):
            if bidsmap_new[logic] and modality in bidsmap_new[logic]:
                bidsmap_new[logic][modality] = None

    # Start with an empty skeleton if we didn't have an old bidsmap
    if not bidsmap_old:
        bidsmap_old = copy.deepcopy(bidsmap_new)

    # Start the Qt-application
    gui = interactive
    if gui:
        app = QApplication(sys.argv)
        app.setApplicationName('BIDS editor')
        mainwin = bidseditor.MainWindow()
        gui = bidseditor.Ui_MainWindow()
        gui.interactive = interactive
        gui.subprefix = subprefix
        gui.sesprefix = sesprefix

        if gui.interactive == 2:
            QMessageBox.information(
                mainwin, 'BIDS mapping workflow',
                f"The bidsmapper will now scan {bidsfolder} and whenever "
                f"it detects a new type of scan it will ask you to identify it.\n\n"
                f"It is important that you choose the correct BIDS modality "
                f"(e.g. 'anat', 'dwi' or 'func') and suffix (e.g. 'bold' or 'sbref').\n\n"
                f"At the end you will be shown an overview of all the "
                f"different scan types and BIDScoin options (as in the "
                f"bidseditor) that you can then (re)edit to your needs")

    # Loop over all subjects and sessions and built up the bidsmap entries
    dataformat = ''
    subjects = bids.lsdirs(rawfolder, subprefix + '*')
    if not subjects:
        LOGGER.warning(f'No subjects found in: {rawfolder/subprefix}*')
        gui = None
    for n, subject in enumerate(subjects, 1):

        sessions = bids.lsdirs(subject, sesprefix + '*')
        if not sessions:
            sessions = [subject]
        for session in sessions:

            # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file
            session, unpacked = bids.unpack(session, subprefix, sesprefix, '*')
            if unpacked:
                store = dict(source=unpacked,
                             target=bidscoinfolder / 'provenance')
            elif store:
                store = dict(source=rawfolder,
                             target=bidscoinfolder / 'provenance')
            else:
                store = dict()

            # Loop of the different DICOM runs (series) and collect source files
            sourcefiles = []
            dataformat = bids.get_dataformat(session)
            if not dataformat:
                LOGGER.info(
                    f"Skipping: {session} (subject {n}/{len(subjects)})")
                continue

            LOGGER.info(f"Parsing: {session} (subject {n}/{len(subjects)})")

            if dataformat == 'DICOM':
                for sourcedir in bids.lsdirs(session):
                    sourcefile = bids.get_dicomfile(sourcedir)
                    if sourcefile.name:
                        sourcefiles.append(sourcefile)

            if dataformat == 'PAR':
                sourcefiles = bids.get_parfiles(session)

            if dataformat == 'P7':
                sourcefiles = bids.get_p7file(session)

            # Update the bidsmap with the info from the source files
            for sourcefile in sourcefiles:
                bidsmap_new = build_bidsmap(dataformat, sourcefile,
                                            bidsmap_new, bidsmap_old, template,
                                            store, gui)

            # Update / append the nifti mapping
            if dataformat == 'Nifti':
                bidsmap_new = build_niftimap(session, bidsmap_new, bidsmap_old)

            # Update / append the file-system mapping
            if dataformat == 'FileSystem':
                bidsmap_new = build_filesystemmap(session, bidsmap_new,
                                                  bidsmap_old)

            # Update / append the plugin mapping
            if bidsmap_old['PlugIns']:
                bidsmap_new = build_pluginmap(session, bidsmap_new,
                                              bidsmap_old)

            # Clean-up the temporary unpacked data
            if unpacked:
                shutil.rmtree(session)

    if not dataformat:
        LOGGER.warning('Could not determine the dataformat of the source data')

    # (Re)launch the bidseditor UI_MainWindow
    bidsmapfile = bidscoinfolder / 'bidsmap.yaml'
    if gui:
        if not dataformat:
            QMessageBox.information(
                mainwin, 'BIDS mapping workflow',
                'Could not determine the dataformat of the source data.\n'
                'You can try running the bidseditor tool yourself')
        else:
            QMessageBox.information(
                mainwin, 'BIDS mapping workflow',
                f"The bidsmapper has finished scanning {rawfolder}\n\n"
                f"Please carefully check all the different BIDS output names "
                f"and BIDScoin options and (re)edit them to your needs.\n\n"
                f"You can always redo this step later by re-running the "
                f"bidsmapper or by just running the bidseditor tool")

            LOGGER.info('Opening the bidseditor')
            gui.setupUi(mainwin,
                        bidsfolder,
                        bidsmapfile,
                        bidsmap_new,
                        copy.deepcopy(bidsmap_new),
                        template,
                        dataformat,
                        subprefix=subprefix,
                        sesprefix=sesprefix)
            mainwin.show()
            app.exec()
    else:
        # Save the bidsmap in the bidscoinfolder
        bids.save_bidsmap(bidsmapfile, bidsmap_new)

    LOGGER.info('-------------- FINISHED! -------------------')
    LOGGER.info('')

    bids.reporterrors()