Exemplo n.º 1
0
def process(source: str,
            destination: str,
            plugin_file: str = "",
            plugin_opt: dict = {},
            sub_list: list = [],
            sub_skip_tsv: bool = False,
            sub_skip_dir: bool = False,
            ses_skip_dir: bool = False,
            part_template: str = "",
            bidsmapfile: str = "bidsmap.yaml",
            dry_run: bool = False) -> None:
    """
    Process bidsified dataset before the bidsification.
    Can be used to produce derivatives, convertion
    anonymisation with adventage of recording identification
    by bidsmap.yaml

    Essentually it is identical to bidsification but without
    bidsification itself.

    Only subjects in source/participants.tsv are treated,
    this list can be narrowed using sub_list, sub_skip_tsv
    and sub_skip_dir options

    Parameters
    ----------
    source: str
        folder containing source dataset
    destination: str
        folder for prepeared dataset
    plugin_file: str
        path to the plugin file to use
    plugin_opt: dict
        named options passed to plugin
    sub_list: list
        list of subject to process. Subjects
        are checked after plugin and must
        start with 'sub-', as in destination
        folder
    sub_skip_tsv: bool
        if set to True, subjects found in
        destination/participants.tsv will be
        ignored
    sub_skip_dir: bool
        if set to true, subjects with already
        created directories will be ignored
        Can conflict with sub_no_dir
    ses_skip_dir: bool
        if set to True, sessions with already
        created directories will be ignored
        Can conflict with ses_no_dir
    part_template: str
        path to template json file, from whitch
        participants.tsv will be modeled. If unset
        the defeault one "source/participants.tsv"
        is used. Setting this variable may break
        workflow
    bidsmapfile: str
        The name of bidsmap file, will be searched for
        in destination/code/bidsmap directory, unless
        path is absolute
    dry_run: bool
        if set to True, no disk writing operations
        will be performed
    """

    logger.info("-------------- Processing data -------------")
    logger.info("Source directory: {}".format(source))
    logger.info("Destination directory: {}".format(destination))

    # Input checking
    # source = os.path.abspath(source)
    if not os.path.isdir(source):
        logger.critical("Source directory {} don't exists".format(source))
        raise NotADirectoryError(source)
    if not os.path.isdir(destination):
        logger.critical(
            "Destination directory {} don't exists".format(destination))
        raise NotADirectoryError(destination)

    # Input checking & defaults
    bidscodefolder = os.path.join(destination, 'code', 'bidsme')

    # Create a code/bidsme subfolder
    os.makedirs(bidscodefolder, exist_ok=True)

    # Check for dataset description file
    dataset_file = os.path.join(destination, 'dataset_description.json')
    if not os.path.isfile(dataset_file):
        logger.warning("Dataset description file 'dataset_description.json' "
                       "not found in '{}'".format(destination))

    # Check for README file
    readme_file = os.path.join(destination, 'README')
    if not os.path.isfile(readme_file):
        logger.warning("Dataset readme file 'README' "
                       "not found in '{}'".format(destination))

    # Get the bidsmap heuristics from the bidsmap YAML-file
    fname = paths.findFile(bidsmapfile, bidscodefolder, paths.local,
                           paths.config)
    if not fname:
        logger.critical('Bidsmap file {} not found.'.format(bidsmapfile))
        raise FileNotFoundError(bidsmapfile)
    else:
        bidsmapfile = fname
    logger.info("loading bidsmap {}".format(bidsmapfile))
    bidsmap = Bidsmap(bidsmapfile)

    ntotal, ntemplate, nunchecked = bidsmap.countRuns()
    logger.debug("Map contains {} runs".format(ntotal))
    if ntemplate != 0:
        logger.warning("Map contains {} template runs".format(ntemplate))
    if nunchecked != 0:
        logger.critical("Map contains {} unchecked runs".format(nunchecked))
        raise Exception("Unchecked runs present")

    ###############
    # Plugin setup
    ###############
    if plugin_file:
        plugins.ImportPlugins(plugin_file)
        plugins.InitPlugin(source=source,
                           destination=destination,
                           dry=dry_run,
                           **plugin_opt)

    ###############################
    # Checking participants list
    ###############################
    if not part_template:
        part_template = os.path.join(source, "participants.json")
    else:
        logger.warning(
            "Loading exterior participant template {}".format(part_template))
    BidsSession.loadSubjectFields(part_template)

    new_sub_file = os.path.join(source, "participants.tsv")
    df_sub = pandas.read_csv(new_sub_file, sep="\t", header=0,
                             na_values="n/a").drop_duplicates()
    df_dupl = df_sub.duplicated("participant_id")
    if df_dupl.any():
        logger.critical("Participant list contains one or several duplicated "
                        "entries: {}".format(", ".join(
                            df_sub[df_dupl]["participant_id"])))
        raise Exception("Duplicated subjects")

    dupl_file = os.path.join(source, "__duplicated.tsv")
    if os.path.isfile(dupl_file):
        logger.critical("Found unmerged file with duplicated subjects")
        raise FileExistsError(dupl_file)

    new_sub_json = os.path.join(source, "participants.json")
    if not tools.checkTsvDefinitions(df_sub, new_sub_json):
        raise Exception("Incompatible sidecar json")

    old_sub_file = os.path.join(destination, "participants.tsv")
    old_sub = None
    if os.path.isfile(old_sub_file):
        old_sub = pandas.read_csv(old_sub_file,
                                  sep="\t",
                                  header=0,
                                  na_values="n/a")
        if not old_sub.columns.equals(df_sub.columns):
            logger.warning("Source participant.tsv has different columns "
                           "from destination dataset")
        old_sub = old_sub["participant_id"]

    ##############################
    # Subjects loop
    ##############################
    n_subjects = len(df_sub["participant_id"])
    for index, sub_row in df_sub.iterrows():
        sub_no = index + 1
        sub_id = sub_row["participant_id"]
        sub_dir = os.path.join(source, sub_id)
        if not os.path.isdir(sub_dir):
            logger.error("{}: Not found in {}".format(sub_id, source))
            continue

        scan = BidsSession()
        scan.in_path = sub_dir
        scan.subject = sub_id

        #################################################
        # Cloning df_sub row values in scans sub_values
        #################################################
        for column in df_sub.columns:
            scan.sub_values[column] = sub_row[column]

        # locking subjects here forbids renaming in process
        # as it will be unclear how manage folders with data
        scan.lock_subject()
        if plugins.RunPlugin("SubjectEP", scan) < 0:
            logger.warning("Subject {} discarded by {}".format(
                scan.subject, "SubjectEP"))
            continue

        if not scan.isSubValid():
            logger.error("{}: Subject id '{}' is not valid".format(
                sub_id, scan.subject))
            continue

        if tools.skipEntity(scan.subject, sub_list,
                            old_sub if sub_skip_tsv else None,
                            destination if sub_skip_dir else ""):
            logger.info("Skipping subject '{}'".format(scan.subject))
            continue

        ses_dirs = tools.lsdirs(sub_dir, 'ses-*')
        if not ses_dirs:
            logger.error("{}: No sessions found in: {}".format(
                scan.subject, sub_dir))
            continue

        for ses_dir in ses_dirs:
            scan.in_path = ses_dir
            logger.info("{} ({}/{}): Scanning folder {}".format(
                scan.subject, sub_no, n_subjects, ses_dir))
            scan.unlock_session()
            scan.session = os.path.basename(ses_dir)
            if plugins.RunPlugin("SessionEP", scan) < 0:
                logger.warning("Session {} discarded by {}".format(
                    scan.session, "SessionEP"))
                continue

            scan.lock()

            if ses_skip_dir and tools.skipEntity(
                    scan.session, [], None,
                    os.path.join(destination, scan.subject)):
                logger.info("Skipping session '{}'".format(scan.session))
                continue

            for module in Modules.selector.types_list:
                mod_dir = os.path.join(ses_dir, module)
                if not os.path.isdir(mod_dir):
                    logger.debug("Module {} not found in {}".format(
                        module, ses_dir))
                    continue
                for run in tools.lsdirs(mod_dir):
                    scan.in_path = run
                    cls = Modules.select(run, module)
                    if cls is None:
                        logger.error(
                            "Failed to identify data in {}".format(run))
                        continue
                    recording = cls(rec_path=run)
                    if not recording or len(recording.files) == 0:
                        logger.error(
                            "unable to load data in folder {}".format(run))
                        continue
                    recording.setBidsSession(scan)
                    coin(destination, recording, bidsmap, dry_run)
            plugins.RunPlugin("SessionEndEP", scan)

        scan.in_path = sub_dir
        plugins.RunPlugin("SubjectEndEP", scan)

    ##################################
    # Merging the participants table
    ##################################
    df_processed = BidsSession.exportAsDataFrame()

    col_mismatch = False
    if not df_processed.columns.equals(df_sub.columns):
        col_mismatch = True
        logger.warning("Modified participant table do not match "
                       "original table. This is discouraged and can "
                       "break future preparation and process steps")
        for col in df_processed.columns.difference(df_sub.columns):
            df_sub[col] = None
        df_sub = df_sub[BidsSession.getSubjectColumns()]
        df_sub.drop_duplicates(inplace=True)

    df_res = pandas.concat([df_sub, df_processed],
                           join="inner",
                           keys=("original", "processed"),
                           names=("stage", "ID"))
    df_res = df_res.drop_duplicates()

    df_dupl = df_res.duplicated("participant_id", keep=False)

    if df_dupl.any():
        logger.info("Updating participants values")
        df_dupl = df_dupl.drop(["processed"])
        df_res.drop(df_dupl[df_dupl].index, inplace=True)

    df_dupl = df_res.duplicated("participant_id")
    if df_dupl.any():
        logger.error("Participant list contains one or several duplicated "
                     "entries: {}".format(", ".join(
                         df_res[df_dupl]["participant_id"])))

    ##################################
    # Saving the participants table
    ##################################
    if not dry_run:
        df_res[~df_dupl].to_csv(new_sub_file,
                                sep='\t',
                                na_rep="n/a",
                                index=False,
                                header=True)
        if df_dupl.any():
            logger.info("Saving the list to be merged manually to {}".format(
                dupl_file))
            df_res[df_dupl].to_csv(dupl_file,
                                   sep='\t',
                                   na_rep="n/a",
                                   index=False,
                                   header=True)
        json_file = tools.change_ext(new_sub_file, "json")
        if col_mismatch or not os.path.isfile(json_file):
            BidsSession.exportDefinitions(json_file)

    plugins.RunPlugin("FinaliseEP")
Exemplo n.º 2
0
def mapper(source: str, destination: str,
           plugin_file: str = "",
           plugin_opt: dict = {},
           sub_list: list = [],
           sub_skip_tsv: bool = False,
           sub_skip_dir: bool = False,
           ses_skip_dir: bool = False,
           bidsmapfile: str = "bidsmap.yaml",
           map_template: str = "bidsmap_template.yaml",
           dry_run: bool = False
           ) -> None:
    """
    Generates bidsmap.yaml from prepeared dataset and
    map template.

    Only subjects in source/participants.tsv are treated,
    this list can be narrowed using sub_list, sub_skip_tsv
    and sub_skip_dir options

    Parameters
    ----------
    source: str
        folder containing source dataset
    destination: str
        folder for prepeared dataset
    plugin_file: str
        path to the plugin file to use
    plugin_opt: dict
        named options passed to plugin
    sub_list: list
        list of subject to process. Subjects
        are checked after plugin and must
        start with 'sub-', as in destination
        folder
    sub_skip_tsv: bool
        if set to True, subjects found in
        destination/participants.tsv will be
        ignored
    sub_skip_dir: bool
        if set to true, subjects with already
        created directories will be ignored
        Can conflict with sub_no_dir
    ses_skip_dir: bool
        if set to True, sessions with already
        created directories will be ignored
        Can conflict with ses_no_dir
    bidsmapfile: str
        The name of bidsmap file, will be searched for
        in destination/code/bidsmap directory, unless
        path is absolute
    map_template: str
        The name of template map. The file is searched
        in heuristics folder
    dry_run: bool
        if set to True, no disk writing operations
        will be performed
    """

    logger.info("------------ Generating bidsmap ------------")
    logger.info("Current directory: {}".format(os.getcwd()))
    logger.info("Source directory: {}".format(source))
    logger.info("Destination directory: {}".format(destination))

    # Input checking
    if not os.path.isdir(source):
        logger.critical("Source directory {} don't exists"
                        .format(source))
        raise NotADirectoryError(source)
    if not os.path.isdir(destination):
        logger.critical("Destination directory {} don't exists"
                        .format(destination))
        raise NotADirectoryError(destination)

    bidscodefolder = os.path.join(destination, 'code', 'bidsme')
    os.makedirs(bidscodefolder, exist_ok=True)

    # Get the heuristics for filling the new bidsmap
    logger.info("loading template bidsmap {}".format(map_template))
    fname = paths.findFile(map_template,
                           paths.local,
                           paths.config,
                           paths.heuristics)
    if not fname:
        logger.warning("Unable to find template map {}"
                       .format(map_template))
    template = bidsmap.Bidsmap(fname)

    fname = paths.findFile(bidsmapfile,
                           bidscodefolder,
                           paths.local,
                           paths.config
                           )
    if not fname:
        bidsmapfile = os.path.join(bidscodefolder, bidsmapfile)
    else:
        bidsmapfile = fname
    logger.info("loading working bidsmap {}".format(bidsmapfile))
    bidsmap_new = bidsmap.Bidsmap(bidsmapfile)

    logger.debug("Creating bidsmap for unknown modalities")
    # removing old unknown files
    bidsunknown = os.path.join(bidscodefolder, 'unknown.yaml')
    if os.path.isfile(bidsunknown):
        os.remove(bidsunknown)
    bidsmap_unk = bidsmap.Bidsmap(bidsunknown)

    ###############
    # Plugin setup
    ###############
    if plugin_file:
        plugins.ImportPlugins(plugin_file)
        plugins.InitPlugin(source=source,
                           destination=destination,
                           dry=True,
                           **plugin_opt)

    ###############################
    # Checking participants list
    ###############################
    new_sub_file = os.path.join(source, "participants.tsv")
    df_sub = pandas.read_csv(new_sub_file,
                             sep="\t", header=0,
                             na_values="n/a")
    df_dupl = df_sub.duplicated("participant_id")
    if df_dupl.any():
        logger.critical("Participant list contains one or several duplicated "
                        "entries: {}"
                        .format(", ".join(df_sub[df_dupl]["participant_id"]))
                        )
        raise Exception("Duplicated subjects")

    new_sub_json = os.path.join(source, "participants.json")
    if not tools.checkTsvDefinitions(df_sub, new_sub_json):
        raise Exception("Incompatible sidecar json")

    BidsSession.loadSubjectFields(new_sub_json)
    old_sub_file = os.path.join(destination, "participants.tsv")
    old_sub = None
    if os.path.isfile(old_sub_file):
        old_sub = pandas.read_csv(old_sub_file, sep="\t", header=0,
                                  na_values="n/a")

    df_res = df_sub
    if old_sub is not None:
        if not old_sub.columns.equals(df_sub.columns):
            logger.critical("Participant.tsv has differenrt columns "
                            "from destination dataset")
            raise Exception("Participants column mismatch")
        df_res = old_sub.append(df_sub, ignore_index=True).drop_duplicates()
        df_dupl = df_res.duplicated("participant_id")
        if df_dupl.any():
            logger.critical("Joined participant list contains one or "
                            "several duplicated entries: {}"
                            .format(", ".join(
                                    df_sub[df_dupl]["participant_id"])
                                    )
                            )
            raise Exception("Duplicated subjects")
        old_sub = old_sub["participant_id"]

    ##############################
    # Subjects loop
    ##############################
    n_subjects = len(df_sub["participant_id"])
    for sub_no, sub_id in enumerate(df_sub["participant_id"], 1):
        sub_dir = os.path.join(source, sub_id)
        if not os.path.isdir(sub_dir):
            logger.error("{}: Not found in {}"
                         .format(sub_id, source))
            continue

        scan = BidsSession()
        scan.in_path = sub_dir
        scan.subject = sub_id
        if plugins.RunPlugin("SubjectEP", scan) < 0:
            logger.warning("Subject {} discarded by {}"
                           .format(scan.subject, "SubjectEP"))
            continue
        scan.lock_subject()
        if not scan.isSubValid():
            logger.error("{}: Subject id '{}' is not valid"
                         .format(sub_id, scan.subject))
            continue

        if tools.skipEntity(scan.subject, sub_list,
                            old_sub if sub_skip_tsv else None,
                            destination if sub_skip_dir else ""):
            logger.info("Skipping subject '{}'"
                        .format(scan.subject))
            continue

        ses_dirs = tools.lsdirs(sub_dir, 'ses-*')
        if not ses_dirs:
            logger.error("{}: No sessions found in: {}"
                         .format(scan.subject, sub_dir))
            continue

        for ses_dir in ses_dirs:
            scan.in_path = ses_dir
            logger.info("{} ({}/{}): Scanning folder {}"
                        .format(scan.subject,
                                sub_no,
                                n_subjects,
                                ses_dir))
            scan.unlock_session()
            scan.session = os.path.basename(ses_dir)
            if plugins.RunPlugin("SessionEP", scan) < 0:
                logger.warning("Session {} discarded by {}"
                               .format(scan.session, "SessionEP"))
                continue
            scan.lock()

            if ses_skip_dir and tools.skipEntity(scan.session,
                                                 [], None,
                                                 os.path.join(destination,
                                                              scan.subject)):
                logger.info("Skipping session '{}'"
                            .format(scan.session))
                continue

            for module in Modules.selector.types_list:
                mod_dir = os.path.join(ses_dir, module)
                if not os.path.isdir(mod_dir):
                    logger.debug("Module {} not found in {}"
                                 .format(module, ses_dir))
                    continue
                for run in tools.lsdirs(mod_dir):
                    cls = Modules.selector.select(run, module)
                    if cls is None:
                        logger.error("Failed to identify data in {}"
                                     .format(mod_dir))
                        continue
                    recording = cls(rec_path=run)
                    if not recording or len(recording.files) == 0:
                        logger.error("unable to load data in folder {}"
                                     .format(run))
                    recording.setBidsSession(scan)
                    createmap(recording, bidsmap_new, template, bidsmap_unk)

    if not dry_run:
        # Save the bidsmap to the bidsmap YAML-file
        bidsmap_new.save(bidsmapfile, empty_attributes=False)

    ntotal, ntemplate, nunchecked = bidsmap_new.countRuns()
    logger.info("Map contains {} runs".format(ntotal))
    if ntemplate != 0:
        logger.warning("Map contains {} template runs"
                       .format(ntemplate))
    if nunchecked != 0:
        logger.warning("Map contains {} unchecked runs"
                       .format(nunchecked))

    # Scanning unknowing and exporting them to yaml file
    unkn_recordings = bidsmap_unk.countRuns()[0]
    if unkn_recordings > 0:
        logger.error("Was unable to identify {} recordings. "
                     "See {} for details"
                     .format(unkn_recordings, bidsunknown))
        if not dry_run:
            bidsmap_unk.save(bidsunknown)
Exemplo n.º 3
0
def SubjectEP(session: BidsSession) -> int:
    """
    Subject determination and initialisation

    1. Checks if subject not in balck list
    2. Loads demographics from subject table
    3. Creates session parcing dictionary

    Parameters
    ----------
    session: BidsSession

    Returns
    -------
    int:
        if 0, plugin succesfull
        if > 0, plugin failed, an exception will be raised
        if < 0, plugin failed, and subject will be skipped
    """

    #################################
    # Skipping if in the black list #
    #################################
    if session.subject in sub_black_list:
        logger.info("Subject '{}' is in black_list"
                    .format(session.subject))
        return -1

    ################################
    # Retriving subject from table #
    ################################
    try:
        # in case if folder name in source dataset
        # cannot be converted to integer
        sub_id = int(session.subject)
    except ValueError as e:
        logger.error("Subject {}: Can't determine subject Id for: {}"
                     .format(session.subject, e))
        return -1

    # storing bidsified subject id into session object
    # optional, but useful as reference
    session.sub_values["participant_id"] = "sub-" + session.subject
    # looking for subject in dataframe
    prefix = "pat"
    index = df_subjects.loc[df_subjects[prefix] == sub_id].index
    # storing participant group in session
    session.sub_values["group"] = "patient"

    if len(index) == 0:
        # Subject not in patient list, looking in control
        prefix = "cnt"
        index = df_subjects.loc[df_subjects[prefix] == sub_id].index
        session.sub_values["group"] = "control"
    if len(index) == 0:
        raise KeyError("Subject {} not found in table"
                       .format(sub_id))
    if len(index) > 1:
        logger.warning("Subject {}: several column entries present"
                       .format(sub_id))
    index = index[0]

    # retrieving demographics
    sex = df_subjects.loc[index, prefix + "_sex"]
    age = df_subjects.loc[index, prefix + "_age"]
    education = df_subjects.loc[index, prefix + "_edu"]

    # session initialised values are Null
    # fill them only if they are retrieved from table
    if pandas.notna(sex):
        session.sub_values["sex"] = sex
    if pandas.notna(age):
        session.sub_values["age"] = float(age)
    if pandas.notna(education):
        session.sub_values["education"] = float(education)

    # looking for pairing
    paired = df_subjects.loc[index, sub_prefix[prefix == "cnt"]]
    if pandas.notna(paired):
        session.sub_values["paired"] = "sub-{:03}".format(int(paired))

    #################################
    # determining order of sessions #
    #################################
    scans_map.clear()
    scans_order = sorted([os.path.basename(s) for s in
                          tools.lsdirs(os.path.join(rawfolder,
                                                    session.subject),
                                       "s*")
                          ])
    # looping over session defined in columns
    for ind, s in enumerate(("_1", "_2", "_3")):
        v = "ses-" + str(df_subjects.loc[index, prefix + s]).strip()
        ses = "ses" + s
        if v == "ses-nan":
            # Session not defined in table, but existing
            # in source dataset
            session.sub_values[ses] = ""
            logger.warning("Subject {}({}): missing {} value"
                           .format(session.sub_values["participant_id"],
                                   session.sub_values["group"],
                                   ses)
                           )
        elif v == "ses-OUT":
            # participant left study
            logger.warning("Subject {}({}): seems to be abandoned study"
                           .format(session.sub_values["participant_id"],
                                   session.sub_values["group"]
                                   )
                           )
            return -1
        elif v not in Series:
            # invalid session name
            logger.critical("Subject {}({}): Invalid {}: {}"
                            .format(session.sub_values["participant_id"],
                                    session.sub_values["group"],
                                    ses,
                                    session.sub_values[ses])
                            )
            raise KeyError("Invalid {}: {}"
                           .format(ses, v))
        else:
            # session retrieved, storing values
            session.sub_values[ses] = v
            scans_map[scans_order[ind]] = v

    # checking if all scans are identifyable
    # if not, additional scans will be stored
    # with original names
    for scan in scans_order:
        if scan not in scans_map:
            logger.error("Subject {}({}): Can't identify session {}"
                         .format(session.sub_values["participant_id"],
                                 session.sub_values["group"],
                                 scan))
            scans_map[scan] = scan

    # opional, the sub- prefix added automatically
    # if not present
    session.subject = "sub-" + session.subject
Exemplo n.º 4
0
def prepare(source: str, destination: str,
            plugin_file: str = "",
            plugin_opt: dict = {},
            sub_list: list = [],
            sub_skip_tsv: bool = False,
            sub_skip_dir: bool = False,
            ses_skip_dir: bool = False,
            part_template: str = "",
            sub_prefix: str = "",
            ses_prefix: str = "",
            sub_no_dir: bool = False,
            ses_no_dir: bool = False,
            data_dirs: dict = {},
            dry_run: bool = False
            ) -> None:
    """
    Prepare data from surce folder and place it in
    sestination folder.

    Source folder is expected to have structure
    source/[subId/][sesId/][data/]file.
    Absence of subId and sesId levels must be communicated
    via sub_no_dir and ses_no_dir options. List of data
    folders must be given in data_dirs.

    Prepeared data will have structure
    destination/sub-<subId>/ses-<sesId>/<type>/<sequence>/file

    A list of treated subjects will be created/updated
    in destination/participants.tsv file

    Parameters
    ----------
    source: str
        folder containing source dataset
    destination: str
        folder for prepeared dataset
    plugin_file: str
        path to the plugin file to use
    plugin_opt: dict
        named options passed to plugin
    sub_list: list
        list of subject to process. Subjects
        are checked after plugin and must
        start with 'sub-', as in destination
        folder
    sub_skip_tsv: bool
        if set to True, subjects found in
        destination/participants.tsv will be
        ignored
    sub_skip_dir: bool
        if set to true, subjects with already
        created directories will be ignored
        Can conflict with sub_no_dir
    ses_skip_dir: bool
        if set to True, sessions with already
        created directories will be ignored
        Can conflict with ses_no_dir
    part_template: str
        path to template json file, from whitch
        participants.tsv will be modeled. Must be
        formated as usual BIDS sidecar json file
        for tsv files
    sub_prefix: str
        prefix for subject folders in source dataset.
        If set, subject folders without prefix will
        be ignored, and will be stripped from subject
        Ids: sub001 -> 001 if sub_prefix==sub
        Option has no effect if sub_no_dir==True
    ses_prefix: str
        prefix for session folders in source dataset.
        If set, session folders without prefix will
        be ignored, and will be stripped from session
        Ids: sesTest -> Test if ses_prefix==ses
        Option has no effect if ses_no_dir==True
    sub_no_dir: bool
        if set to True, source dataset will not be
        expected to have subject folders.
    ses_no_dir: bool
        if set to True, source dataset will not be
        expected to have session folders.
    data_dirs: dict
        dictionary containing list of folders with
        recording data as key and data type as value.
        If folder contain several types of data,
        then value must be set to empty string
    dry_run: bool
        if set to True, no disk writing operations
        will be performed
    """

    logger.info("-------------- Prepearing data -------------")
    logger.info("Source directory: {}".format(source))
    logger.info("Destination directory: {}".format(destination))

    # Input checking
    # source = os.path.abspath(source)
    if not os.path.isdir(source):
        logger.critical("Source directory {} don't exists"
                        .format(source))
        raise NotADirectoryError(source)
    if not os.path.isdir(destination):
        logger.critical("Destination directory {} don't exists"
                        .format(destination))
        raise NotADirectoryError(destination)

    if sub_no_dir and sub_skip_dir:
        logger.warning("Both sub_no_dir and sub_skip_dir are set. "
                       "Subjects will not be skipped "
                       "unless subId defined in plugin")
    if ses_no_dir and ses_skip_dir:
        logger.warning("Both ses_no_dir and ses_skip_dir are set. "
                       "Sessions will not be skipped "
                       "unless sesId defined in plugin")

    ###############
    # Plugin setup
    ###############
    if plugin_file:
        plugins.ImportPlugins(plugin_file)
        plugins.InitPlugin(source=source,
                           destination=destination,
                           dry=dry_run,
                           **plugin_opt)

    ###############################
    # Checking participants list
    ###############################
    new_sub_json = os.path.join(destination, "participants.json")
    if not part_template:
        if os.path.isfile(new_sub_json):
            part_template = new_sub_json
    BidsSession.loadSubjectFields(part_template)

    old_sub_file = os.path.join(destination, "participants.tsv")
    old_sub = None
    if os.path.isfile(old_sub_file):
        old_sub = pandas.read_csv(old_sub_file, sep="\t", header=0,
                                  na_values="n/a")
        if not BidsSession.checkDefinitions(old_sub):
            raise Exception("Destination participant.tsv incompatible "
                            "with given columns definitions")
    dupl_file = os.path.join(destination, "__duplicated.tsv")
    if os.path.isfile(dupl_file):
        logger.critical("Found unmerged file with duplicated subjects")
        raise FileExistsError(dupl_file)

    ###############
    # Subject loop
    ###############
    sub_prefix_dir, sub_prefix = os.path.split(sub_prefix)
    ses_prefix_dir, ses_prefix = os.path.split(ses_prefix)

    if not sub_no_dir:
        sub_dirs = tools.lsdirs(
                os.path.join(source, sub_prefix_dir),
                sub_prefix + '*')
    else:
        sub_dirs = [source]
    if not sub_dirs:
        logger.warning("No subject folders found")

    if not data_dirs:
        data_dirs = {}

    for sub_dir in sub_dirs:
        scan = BidsSession()
        scan.in_path = sub_dir
        # get name of subject from folder name
        if not sub_no_dir:
            scan.subject = os.path.basename(sub_dir)
            scan.subject = scan.subject[len(sub_prefix):]
        if plugins.RunPlugin("SubjectEP", scan) < 0:
            logger.warning("Subject {} discarded by {}"
                           .format(scan.subject, "SubjectEP"))
            continue
        scan.lock_subject()

        if scan.subject is not None:
            if tools.skipEntity(scan.subject, sub_list,
                                old_sub if sub_skip_tsv else None,
                                destination if sub_skip_dir else ""):
                logger.info("Skipping subject '{}'"
                            .format(scan.subject))
                continue

        if not ses_no_dir:
            ses_dirs = tools.lsdirs(
                    os.path.join(sub_dir, ses_prefix_dir),
                    ses_prefix + '*')
        else:
            ses_dirs = [sub_dir]
        if not ses_dirs:
            logger.warning("No session folders found")

        for ses_dir in ses_dirs:
            scan.in_path = ses_dir
            logger.info("Scanning folder {}".format(ses_dir))
            if not ses_no_dir:
                scan.unlock_session()
                scan.session = os.path.basename(ses_dir)
                scan.session = scan.session[len(ses_prefix):]
            else:
                scan.unlock_session()
                scan.session = ""
            if plugins.RunPlugin("SessionEP", scan) < 0:
                logger.warning("Session {} discarded by {}"
                               .format(scan.session, "SessionEP"))
                continue

            scan.lock()

            if scan.session is not None:
                skip = False
                if ses_skip_dir:
                    if os.path.isdir(os.path.join(destination,
                                                  scan.session)):
                        logger.debug("{} dir exists".format(scan.session))
                        skip = True
                if skip:
                    logger.info("Skipping session '{}'"
                                .format(scan.session))
                    continue

            if not data_dirs:
                data_dirs[""] = ""
            for rec_dirs, rec_type in data_dirs.items():
                rec_dirs = tools.lsdirs(ses_dir, rec_dirs)
                for rec_dir in rec_dirs:
                    if not os.path.isdir(rec_dir):
                        logger.warning("Sub: '{}', Ses: '{}': "
                                       "'{}' don't exists "
                                       "or not a folder"
                                       .format(scan.subject,
                                               scan.session,
                                               rec_dir))
                        continue
                    cls = Modules.select(rec_dir, rec_type)
                    if cls is None:
                        logger.warning("Unable to identify data in folder {}"
                                       .format(rec_dir))
                        continue
                    recording = cls(rec_path=rec_dir)
                    if not recording or len(recording.files) == 0:
                        logger.warning("unable to load data in folder {}"
                                       .format(rec_dir))
                    sortsession(destination, scan, recording, dry_run)
            plugins.RunPlugin("SessionEndEP", scan)

        scan.in_path = sub_dir
        plugins.RunPlugin("SubjectEndEP", scan)

    df_processed = BidsSession.exportAsDataFrame()

    if old_sub is not None:
        df_res = pandas.concat([old_sub, df_processed],
                               sort=False,
                               ignore_index=True)
    else:
        df_res = df_processed
    df_res = df_res[BidsSession.getSubjectColumns()].drop_duplicates()

    df_dupl = df_res.duplicated("participant_id")
    if df_dupl.any():
        logger.critical("Participant list contains one or several duplicated "
                        "entries: {}"
                        .format(", ".join(df_res[df_dupl]["participant_id"]))
                        )

    if not dry_run:
        df_res[~df_dupl].to_csv(old_sub_file,
                                sep='\t', na_rep="n/a",
                                index=False, header=True)
        if df_dupl.any():
            logger.info("Saving the list to be merged manually to {}"
                        .format(dupl_file))
            df_res[df_dupl].to_csv(dupl_file,
                                   sep='\t', na_rep="n/a",
                                   index=False, header=True)

        new_sub_json = os.path.join(destination, "participants.json")
        if not os.path.isfile(new_sub_json):
            BidsSession.exportDefinitions(new_sub_json)

    plugins.RunPlugin("FinaliseEP")