Exemplo n.º 1
0
def interpret(
    maps_dir,
    data_group,
    name,
    caps_directory,
    tsv_path,
    selection_metrics,
    multi_cohort,
    target_node,
    save_individual,
    batch_size,
    nproc,
    use_cpu,
    verbose=0,
):
    verbose_list = ["warning", "info", "debug"]

    maps_manager = MapsManager(maps_dir, verbose=verbose_list[verbose])

    maps_manager.interpret(
        data_group=data_group,
        name=name,
        caps_directory=caps_directory,
        tsv_path=tsv_path,
        selection_metrics=selection_metrics,
        multi_cohort=multi_cohort,
        target_node=target_node,
        save_individual=save_individual,
        batch_size=batch_size,
        num_workers=nproc,
        use_cpu=use_cpu,
    )
Exemplo n.º 2
0
def train(
    maps_dir: str,
    train_dict: Dict[str, Any],
    split_list: List[int],
    erase_existing: bool = True,
):

    maps_manager = MapsManager(maps_dir, train_dict, verbose="info")
    maps_manager.train(split_list=split_list, overwrite=erase_existing)
Exemplo n.º 3
0
def test_interpret(cli_commands):
    cnn_input = cli_commands
    if os.path.exists("results"):
        shutil.rmtree("results")

    train_error = not os.system("clinicadl " + " ".join(cnn_input))
    maps_manager = MapsManager("results", verbose="debug")
    maps_manager.interpret("train", "test")
    interpret_map = maps_manager.get_interpretation("train", "test")
    assert train_error
    shutil.rmtree("results")
Exemplo n.º 4
0
def predict(
    maps_dir,
    data_group,
    caps_directory,
    tsv_path,
    labels=True,
    gpu=True,
    num_workers=0,
    batch_size=1,
    prepare_dl=True,
    selection_metrics=None,
    diagnoses=None,
    multi_cohort=False,
    overwrite=False,
):
    """
    This function loads a MAPS and predicts the global metrics and individual values
    for all the models selected using a metric in selection_metrics.

    Args:
        maps_dir (str): file with the model (pth format).
        data_group: prefix of all classification outputs.
        caps_directory (str): path to the CAPS folder. For more information please refer to
            [clinica documentation](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/).
        tsv_path (str): path to a TSV file containing the list of participants and sessions to interpret.
        labels: by default is True. If False no metrics tsv files will be written.
        gpu: if true, it uses gpu.
        num_workers: num_workers used in DataLoader
        batch_size: batch size of the DataLoader
        prepare_dl: if true, uses extracted patches/slices otherwise extract them
        on-the-fly.
        selection_metrics: list of metrics to find best models to be evaluated.
        diagnoses: list of diagnoses to be tested if tsv_path is a folder.
        verbose: level of verbosity.
        multi_cohort (bool): If True caps_directory is the path to a TSV file linking cohort names and paths.
        overwrite (bool): If True former definition of data group is erased
    """
    verbose_list = ["warning", "info", "debug"]

    maps_manager = MapsManager(maps_dir, verbose=verbose_list[0])
    maps_manager.predict(
        data_group,
        caps_directory=caps_directory,
        tsv_path=tsv_path,
        selection_metrics=selection_metrics,
        multi_cohort=multi_cohort,
        diagnoses=diagnoses,
        use_labels=labels,
        prepare_dl=prepare_dl,
        batch_size=batch_size,
        num_workers=num_workers,
        use_cpu=not gpu,
        overwrite=overwrite,
    )
Exemplo n.º 5
0
def test_resume(input_directory):
    flag_error = not system(f"clinicadl -vv train resume {input_directory}")
    assert flag_error

    maps_manager = MapsManager(input_directory)
    split_manager = maps_manager._init_split_manager()
    for split in split_manager.split_iterator():
        performances_flag = pathlib.Path(input_directory, f"split-{split}",
                                         "best-loss", "train").exists()
        assert performances_flag
    shutil.rmtree(input_directory)
Exemplo n.º 6
0
def predict(
    maps_dir: str,
    data_group: str,
    caps_directory: str,
    tsv_path: str,
    use_labels: bool = True,
    label: str = None,
    gpu: bool = True,
    n_proc: int = 0,
    batch_size: int = 1,
    selection_metrics: List[str] = None,
    diagnoses: List[str] = None,
    multi_cohort: bool = False,
    overwrite: bool = False,
):
    """
    This function loads a MAPS and predicts the global metrics and individual values
    for all the models selected using a metric in selection_metrics.

    Args:
        maps_dir: path to the MAPS.
        data_group: name of the data group tested.
        caps_directory: path to the CAPS folder. For more information please refer to
            [clinica documentation](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/).
        tsv_path: path to a TSV file containing the list of participants and sessions to interpret.
        use_labels: by default is True. If False no metrics tsv files will be written.
        label: Name of the target value, if different from training.
        gpu: if true, it uses gpu.
        n_proc: num_workers used in DataLoader
        batch_size: batch size of the DataLoader
        selection_metrics: list of metrics to find best models to be evaluated.
        diagnoses: list of diagnoses to be tested if tsv_path is a folder.
        multi_cohort: If True caps_directory is the path to a TSV file linking cohort names and paths.
        overwrite: If True former definition of data group is erased
    """
    verbose_list = ["warning", "info", "debug"]

    maps_manager = MapsManager(maps_dir, verbose=verbose_list[0])
    maps_manager.predict(
        data_group,
        caps_directory=caps_directory,
        tsv_path=tsv_path,
        selection_metrics=selection_metrics,
        multi_cohort=multi_cohort,
        diagnoses=diagnoses,
        label=label,
        use_labels=use_labels,
        batch_size=batch_size,
        n_proc=n_proc,
        gpu=gpu,
        overwrite=overwrite,
    )
Exemplo n.º 7
0
def meta_maps_analysis(launch_dir, evaluation_metric="loss"):
    """
    This function summarizes the validation performance according to `evaluation_metric`
    of several MAPS stored in the folder `launch_dir`.
    The output TSV files are written in `launch_dir`.

    Args:
        launch_dir (str): Path to the directory containing several MAPS.
        evaluation_metric (str): Name of the metric used for validation evaluation.
    """

    jobs_list = [
        job
        for job in os.listdir(launch_dir)
        if path.exists(path.join(launch_dir, job, "maps.json"))
    ]

    selection_set = set()  # Set of all selection metrics seen
    folds_set = set()  # Set of all folds seen

    performances_dict = dict()
    for job in jobs_list:
        performances_dict[job] = dict()
        maps_manager = MapsManager(path.join(launch_dir, job))
        folds = maps_manager._find_folds()
        folds_set = folds_set | set(folds)
        for fold in folds:
            performances_dict[job][fold] = dict()
            selection_metrics = maps_manager._find_selection_metrics(fold)
            selection_set = selection_set | set(selection_metrics)
            for metric in selection_metrics:
                validation_metrics = maps_manager.get_metrics(
                    "validation", fold, metric
                )
                if evaluation_metric not in validation_metrics:
                    raise ValueError(
                        f"Evaluation metric {evaluation_metric} not found in "
                        f"MAPS {job}, for fold {fold} and selection {metric}."
                    )
                performances_dict[job][fold][metric] = validation_metrics[
                    evaluation_metric
                ]

    # Produce one analysis for each selection metric
    for metric in selection_set:
        df = pd.DataFrame()
        filename = f"analysis_metric-{evaluation_metric}_selection-{metric}.tsv"
        for job in jobs_list:
            for fold in folds_set:
                df.loc[job, f"fold-{fold}"] = performances_dict[job][fold][metric]
        df.to_csv(path.join(launch_dir, filename), sep="\t")
Exemplo n.º 8
0
def test_predict(predict_commands):
    model_folder, use_labels, modes = predict_commands
    out_dir = join(model_folder, "split-0/best-loss/test-RANDOM")

    if exists(out_dir):
        shutil.rmtree(out_dir)

    # Correction of JSON file for ROI
    if "roi" in modes:
        json_path = join(model_folder, "maps.json")
        with open(json_path, "r") as f:
            parameters = json.load(f)
        parameters["roi_list"] = ["leftHippocampusBox", "rightHippocampusBox"]
        json_data = json.dumps(parameters, skipkeys=True, indent=4)
        with open(json_path, "w") as f:
            f.write(json_data)

    maps_manager = MapsManager(model_folder, verbose="debug")
    maps_manager.predict(
        data_group="test-RANDOM",
        caps_directory="data/dataset/OasisCaps_example",
        tsv_path="data/dataset/OasisCaps_example/data.tsv",
        gpu=False,
        use_labels=use_labels,
        overwrite=True,
    )

    for mode in modes:
        maps_manager.get_prediction(data_group="test-RANDOM", mode=mode)
        if use_labels:
            maps_manager.get_metrics(data_group="test-RANDOM", mode=mode)
Exemplo n.º 9
0
def save_tensor(
    maps_dir,
    data_group,
    caps_directory,
    tsv_path,
    gpu=True,
    selection_metrics=None,
    diagnoses=None,
    multi_cohort=False,
    nifti=False,
    overwrite=False,
):
    """
    This function loads a MAPS and compute reconstruction outputs and will save them in the MAPS
    for all the models selected.

    Args:
        maps_dir (str): file with the model (pth format).
        data_group: prefix of all classification outputs.
        caps_directory (str): path to the CAPS folder. For more information please refer to
            [clinica documentation](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/).
        tsv_path (str): path to a TSV file containing the list of participants and sessions to interpret.
        gpu: if true, it uses gpu.
        selection_metrics: list of metrics to find best models to be evaluated.
        diagnoses: list of diagnoses to be tested if tsv_path is a folder.
        verbose: level of verbosity.
        multi_cohort (bool): If True caps_directory is the path to a TSV file linking cohort names and paths.
        nifti (bool): If True will save the outputs as nifti files instead of Pytorch tensors.
        overwrite (bool): If True former definition of data group is erased
    """
    verbose_list = ["warning", "info", "debug"]

    maps_manager = MapsManager(maps_dir, verbose=verbose_list[0])
    maps_manager.save_tensors(
        data_group,
        caps_directory=caps_directory,
        tsv_path=tsv_path,
        selection_metrics=selection_metrics,
        multi_cohort=multi_cohort,
        diagnoses=diagnoses,
        gpu=gpu,
        nifti=nifti,
        overwrite=overwrite,
    )
Exemplo n.º 10
0
def automatic_resume(model_path, user_split_list=None, verbose=0):
    logger = getLogger("clinicadl")

    verbose_list = ["warning", "info", "debug"]
    maps_manager = MapsManager(model_path, verbose=verbose_list[verbose])

    existing_split_list = maps_manager._find_splits()
    stopped_splits = [
        split
        for split in existing_split_list
        if "tmp"
        in os.listdir(path.join(model_path, f"{maps_manager.split_name}-{split}"))
    ]

    # Find finished split
    finished_splits = list()
    for split in existing_split_list:
        if split not in stopped_splits:
            performance_dir_list = [
                performance_dir
                for performance_dir in os.listdir(
                    path.join(model_path, f"{maps_manager.split_name}-{split}")
                )
                if "best-" in performance_dir
            ]
            if len(performance_dir_list) > 0:
                finished_splits.append(split)

    split_manager = maps_manager._init_split_manager(split_list=user_split_list)
    split_iterator = split_manager.split_iterator()

    absent_splits = [
        split
        for split in split_iterator
        if split not in finished_splits and split not in stopped_splits
    ]

    # To ensure retro-compatibility with random search
    logger.info(
        f"Finished splits {finished_splits}\n"
        f"Stopped splits {stopped_splits}\n"
        f"Absent splits {absent_splits}"
    )
    if len(stopped_splits) > 0:
        maps_manager.resume(stopped_splits)
    if len(absent_splits) > 0:
        maps_manager.train(absent_splits, overwrite=True)
Exemplo n.º 11
0
def test_predict(predict_commands):
    model_folder, use_labels, modes = predict_commands
    out_dir = join(model_folder, "fold-0/best-loss/test-RANDOM")

    if os.path.exists(out_dir):
        shutil.rmtree(out_dir)

    maps_manager = MapsManager(model_folder, verbose="debug")
    maps_manager.predict(
        data_group="test-RANDOM",
        caps_directory="data/dataset/OasisCaps_example",
        tsv_path="data/dataset/OasisCaps_example/data.tsv",
        use_cpu=True,
        use_labels=use_labels,
        overwrite=True,
    )

    for mode in modes:
        maps_manager.get_prediction(data_group="test-RANDOM", mode=mode)
        if use_labels:
            maps_manager.get_metrics(data_group="test-RANDOM", mode=mode)
Exemplo n.º 12
0
def automatic_resume(model_path, verbose=0):
    logger = getLogger("clinicadl")

    verbose_list = ["warning", "info", "debug"]
    maps_manager = MapsManager(model_path, verbose=verbose_list[verbose])
    if len(glob(os.path.join(model_path, "fold-*"))) > 0:
        raise ValueError(
            "This MAPS cannot be resumed with the current version of ClinicaDL. "
            "Please use the same version as for training or rename manually the folders "
            "'fold-*' in 'split-*' to respect the new MAPS convention.")

    split_list = sorted([
        int(split.split("-")[1]) for split in os.listdir(model_path)
        if split[:4:] == "split"
    ])
    stopped_splits = [
        split for split in split_list
        if "tmp" in os.listdir(path.join(model_path, f"split-{split}"))
    ]
    finished_splits = [
        split for split in split_list if split not in stopped_splits
    ]

    split_manager = maps_manager._init_split_manager()
    split_iterator = split_manager.split_iterator()

    absent_splits = [
        split for split in split_iterator
        if split not in finished_splits and split not in stopped_splits
    ]

    logger.info(f"List of finished splits {finished_splits}")
    logger.info(f"List of stopped splits {stopped_splits}")
    logger.info(f"List of absent splits {absent_splits}")
    # To ensure retro-compatibility with random search
    logger.info(f"Finished splits {finished_splits}\n"
                f"Stopped splits {stopped_splits}\n"
                f"Absent splits {absent_splits}")
    maps_manager.resume(stopped_splits)
    maps_manager.train(absent_splits)
Exemplo n.º 13
0
def train(maps_dir, train_dict, folds, erase_existing=True):

    maps_manager = MapsManager(maps_dir, train_dict, verbose="info")
    maps_manager.train(folds=folds, overwrite=erase_existing)
Exemplo n.º 14
0
def interpret(
    maps_dir: str,
    data_group: str,
    name: str,
    caps_directory: str,
    tsv_path: str,
    selection_metrics: List[str],
    diagnoses: List[str],
    multi_cohort: bool,
    target_node: int,
    save_individual: bool,
    batch_size: int,
    n_proc: int,
    gpu: bool,
    verbose=0,
    overwrite: bool = False,
    overwrite_name: bool = False,
):
    """
    This function loads a MAPS and interprets all the models selected using a metric in selection_metrics.

    Args:
        maps_dir: path to the MAPS.
        data_group: name of the data group interpreted.
        caps_directory: path to the CAPS folder. For more information please refer to
            [clinica documentation](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/).
        tsv_path: path to a TSV file containing the list of participants and sessions to interpret.
        target_node: Node from which the interpretation is computed.
        save_individual: If True saves the individual map of each participant / session couple.
        gpu: if true, it uses gpu.
        n_proc: num_workers used in DataLoader
        batch_size: batch size of the DataLoader
        selection_metrics: list of metrics to find best models to be evaluated.
        diagnoses: list of diagnoses to be tested if tsv_path is a folder.
        multi_cohort: If True caps_directory is the path to a TSV file linking cohort names and paths.
        verbose: level of verbosity (0: warning, 1: info, 2: debug).
        overwrite: If True former definition of data group is erased.
        overwrite_name: If True former interpretability map with the same name is erased.
    """
    verbose_list = ["warning", "info", "debug"]
    if verbose > 2:
        verbose_str = "debug"
    else:
        verbose_str = verbose_list[verbose]

    maps_manager = MapsManager(maps_dir, verbose=verbose_str)

    maps_manager.interpret(
        data_group=data_group,
        name=name,
        caps_directory=caps_directory,
        tsv_path=tsv_path,
        selection_metrics=selection_metrics,
        diagnoses=diagnoses,
        multi_cohort=multi_cohort,
        target_node=target_node,
        save_individual=save_individual,
        batch_size=batch_size,
        n_proc=n_proc,
        gpu=gpu,
        overwrite=overwrite,
        overwrite_name=overwrite_name,
    )