Exemplo n.º 1
0
def test_split():
    """Checks that:
     -  split and kfold are working
     -  the loading functions can find the output
     -  no data leakage is introduced in split and kfold.
     """
    n_splits = 5
    flag_split = not os.system("clinicadl tsvtool split %s %s --age_name age" %
                               (merged_tsv, reference_path))
    flag_kfold = not os.system("clinicadl tsvtool kfold %s --n_splits %i" %
                               (path.join(reference_path, "train"), n_splits))
    assert flag_split
    assert flag_kfold
    flag_load = True
    try:
        _ = load_data_test(path.join(reference_path, "test"),
                           diagnoses.split(" "))
        for fold in range(n_splits):
            _, _ = load_data(path.join(reference_path, "train"),
                             diagnoses.split(" "),
                             fold,
                             n_splits=n_splits)
    except FileNotFoundError:
        flag_load = False
    assert flag_load

    run_test_suite(reference_path, 0, "test")
    run_test_suite(path.join(reference_path, "train"), n_splits, "validation")

    shutil.rmtree(path.join(reference_path, "train"))
    shutil.rmtree(path.join(reference_path, "test"))
Exemplo n.º 2
0
def main(options):

    # Read json
    model_options = argparse.Namespace()
    json_path = path.join(options.model_path, "commandline_cnn.json")
    model_options = read_json(model_options, json_path=json_path)
    num_cnn = compute_num_cnn(options.input_dir,
                              options.tsv_path,
                              model_options,
                              data="test")

    # Load test data
    if options.diagnoses is None:
        options.diagnoses = model_options.diagnoses

    test_df = load_data_test(options.tsv_path, options.diagnoses)
    transformations = get_transforms(model_options.mode,
                                     model_options.minmaxnormalization)
    criterion = torch.nn.CrossEntropyLoss()

    # Loop on all folds trained
    best_model_dir = os.path.join(options.model_path, 'best_model_dir')
    folds_dir = os.listdir(best_model_dir)

    # Loop on folds
    for fold_dir in folds_dir:
        split = int(fold_dir[-1])
        print("Fold %i" % split)

        for cnn_index in range(num_cnn):
            dataset = return_dataset(model_options.mode,
                                     options.input_dir,
                                     test_df,
                                     options.preprocessing,
                                     transformations,
                                     options,
                                     cnn_index=cnn_index)

            test_loader = DataLoader(dataset,
                                     batch_size=options.batch_size,
                                     shuffle=False,
                                     num_workers=options.num_workers,
                                     pin_memory=True)

            test_cnn(options.model_path, test_loader, options.dataset, split,
                     criterion, cnn_index, model_options, options.gpu)

        for selection in ['best_acc', 'best_loss']:
            soft_voting_to_tsvs(
                options.model_path,
                split,
                selection,
                mode=options.mode,
                dataset=options.dataset,
                num_cnn=num_cnn,
                selection_threshold=model_options.selection_threshold)
Exemplo n.º 3
0
def main(options):

    # Initialize the model
    print('Do transfer learning with existed model trained on ImageNet.')

    model = create_model(options.network, options.gpu)
    trg_size = (224, 224
                )  # most of the imagenet pretrained model has this input size

    # All pre-trained models expect input images normalized in the same way, i.e. mini-batches of 3-channel RGB
    # images of shape (3 x H x W), where H and W are expected to be at least 224. The images have to be loaded in
    # to a range of [0, 1] and then normalized using mean = [0.485, 0.456, 0.406] and std = [0.229, 0.224, 0.225].
    transformations = transforms.Compose([
        MinMaxNormalization(),
        transforms.ToPILImage(),
        transforms.Resize(trg_size),
        transforms.ToTensor()
    ])
    # Define loss and optimizer
    loss = torch.nn.CrossEntropyLoss()

    if options.split is None:
        fold_iterator = range(options.n_splits)
    else:
        fold_iterator = [options.split]

    # Loop on folds
    for fi in fold_iterator:
        print("Fold %i" % fi)

        if options.dataset == 'validation':
            _, test_df = load_data(options.diagnosis_tsv_path,
                                   options.diagnoses,
                                   fi,
                                   n_splits=options.n_splits,
                                   baseline=True)
        else:
            test_df = load_data_test(options.diagnosis_tsv_path,
                                     options.diagnoses)

        data_test = MRIDataset_slice(options.caps_directory,
                                     test_df,
                                     transformations=transformations,
                                     mri_plane=options.mri_plane,
                                     prepare_dl=options.prepare_dl)

        test_loader = DataLoader(data_test,
                                 batch_size=options.batch_size,
                                 shuffle=False,
                                 num_workers=options.num_workers,
                                 pin_memory=True)

        # load the best trained model during the training
        model, best_epoch = load_model(model,
                                       os.path.join(options.output_dir,
                                                    'best_model_dir',
                                                    "fold_%i" % fi, 'CNN',
                                                    str(options.selection)),
                                       gpu=options.gpu,
                                       filename='model_best.pth.tar')

        results_df, metrics = test(model, test_loader, options.gpu, loss)
        print("Slice level balanced accuracy is %f" %
              (metrics['balanced_accuracy']))

        slice_level_to_tsvs(options.output_dir,
                            results_df,
                            metrics,
                            fi,
                            options.selection,
                            dataset=options.dataset)

        # Soft voting
        soft_voting_to_tsvs(options.output_dir,
                            fi,
                            selection=options.selection,
                            dataset=options.dataset,
                            selection_threshold=options.selection_threshold)
Exemplo n.º 4
0
def group_backprop(options):

    main_logger = return_logger(options.verbose, "main process")
    options = translate_parameters(options)

    fold_list = [
        fold for fold in os.listdir(options.model_path) if fold[:5:] == "fold-"
    ]
    if len(fold_list) == 0:
        raise ValueError("No folds were found at path %s" % options.model_path)

    for fold in fold_list:
        main_logger.info(fold)
        for selection in options.selection:
            results_path = path.join(options.model_path, fold, 'gradients',
                                     selection, options.name)

            model_options = argparse.Namespace()
            model_options = read_json(
                model_options, path.join(options.model_path,
                                         'commandline.json'))
            model_options = translate_parameters(model_options)
            model_options.gpu = options.gpu

            if options.tsv_path is None:
                options.tsv_path = model_options.tsv_path
            if options.input_dir is None:
                options.input_dir = model_options.input_dir
            if options.target_diagnosis is None:
                options.target_diagnosis = options.diagnosis

            criterion = get_criterion(model_options.loss)

            # Data management (remove data not well predicted by the CNN)
            training_df = load_data_test(options.tsv_path, [options.diagnosis],
                                         baseline=options.baseline)
            training_df.reset_index(drop=True, inplace=True)

            # Model creation
            _, all_transforms = get_transforms(
                model_options.mode,
                minmaxnormalization=model_options.minmaxnormalization)
            data_example = return_dataset(model_options.mode,
                                          options.input_dir,
                                          training_df,
                                          model_options.preprocessing,
                                          train_transformations=None,
                                          all_transformations=all_transforms,
                                          params=options)

            model = create_model(model_options, data_example.size)
            model_dir = os.path.join(options.model_path, fold, 'models',
                                     selection)
            model, best_epoch = load_model(model,
                                           model_dir,
                                           gpu=options.gpu,
                                           filename='model_best.pth.tar')
            options.output_dir = results_path
            commandline_to_json(options, logger=main_logger)

            # Keep only subjects who were correctly / wrongly predicted by the network
            training_df = sort_predicted(model,
                                         training_df,
                                         options.input_dir,
                                         model_options,
                                         criterion,
                                         options.keep_true,
                                         batch_size=options.batch_size,
                                         num_workers=options.num_workers,
                                         gpu=options.gpu)

            if len(training_df) > 0:

                # Save the tsv files used for the saliency maps
                training_df.to_csv(path.join('data.tsv'),
                                   sep='\t',
                                   index=False)

                data_train = return_dataset(model_options.mode,
                                            options.input_dir,
                                            training_df,
                                            model_options.preprocessing,
                                            train_transformations=None,
                                            all_transformations=all_transforms,
                                            params=options)

                train_loader = DataLoader(data_train,
                                          batch_size=options.batch_size,
                                          shuffle=True,
                                          num_workers=options.num_workers,
                                          pin_memory=True)

                interpreter = VanillaBackProp(model, gpu=options.gpu)

                cum_map = 0
                for data in train_loader:
                    if options.gpu:
                        input_batch = data['image'].cuda()
                    else:
                        input_batch = data['image']

                    maps = interpreter.generate_gradients(
                        input_batch,
                        data_train.diagnosis_code[options.target_diagnosis])
                    cum_map += maps.sum(axis=0)

                mean_map = cum_map / len(data_train)

                if len(data_train.size) == 4:
                    if options.nifti_template_path is not None:
                        image_nii = nib.load(options.nifti_template_path)
                        affine = image_nii.affine
                    else:
                        affine = np.eye(4)

                    mean_map_nii = nib.Nifti1Image(mean_map[0], affine)
                    nib.save(mean_map_nii, path.join(results_path,
                                                     "map.nii.gz"))
                    np.save(path.join(results_path, "map.npy"), mean_map[0])
                else:
                    jpg_path = path.join(results_path, "map.jpg")
                    plt.imshow(mean_map[0],
                               cmap="coolwarm",
                               vmin=-options.vmax,
                               vmax=options.vmax)
                    plt.colorbar()
                    plt.savefig(jpg_path)
                    plt.close()
                    numpy_path = path.join(results_path, "map.npy")
                    np.save(numpy_path, mean_map[0])
            else:
                main_logger.warn("There are no subjects for the given options")
Exemplo n.º 5
0
def inference_from_model_generic(caps_dir,
                                 tsv_path,
                                 model_path,
                                 model_options,
                                 prefix,
                                 output_dir,
                                 fold,
                                 selection,
                                 labels=True,
                                 num_cnn=None,
                                 logger=None):
    from os.path import join
    import logging

    if logger is None:
        logger = logging

    gpu = not model_options.use_cpu

    _, all_transforms = get_transforms(model_options.mode,
                                       model_options.minmaxnormalization)

    test_df = load_data_test(tsv_path, model_options.diagnoses)

    # Define loss and optimizer
    criterion = get_criterion(model_options.loss)

    if model_options.mode_task == 'multicnn':

        for n in range(num_cnn):

            test_dataset = return_dataset(model_options.mode,
                                          caps_dir,
                                          test_df,
                                          model_options.preprocessing,
                                          train_transformations=None,
                                          all_transformations=all_transforms,
                                          params=model_options,
                                          cnn_index=n,
                                          labels=labels)

            test_loader = DataLoader(test_dataset,
                                     batch_size=model_options.batch_size,
                                     shuffle=False,
                                     num_workers=model_options.nproc,
                                     pin_memory=True)

            # load the best trained model during the training
            model = create_model(model_options, test_dataset.size)
            model, best_epoch = load_model(model,
                                           join(model_path, 'cnn-%i' % n,
                                                selection),
                                           gpu,
                                           filename='model_best.pth.tar')

            cnn_df, cnn_metrics = test(model,
                                       test_loader,
                                       gpu,
                                       criterion,
                                       mode=model_options.mode,
                                       use_labels=labels)

            if labels:
                logger.info(
                    "%s balanced accuracy is %f for %s %i and model selected on %s"
                    % (prefix, cnn_metrics["balanced_accuracy"],
                       model_options.mode, n, selection))

            mode_level_to_tsvs(output_dir,
                               cnn_df,
                               cnn_metrics,
                               fold,
                               selection,
                               model_options.mode,
                               dataset=prefix,
                               cnn_index=n)

    else:

        # Read/localize the data
        test_dataset = return_dataset(model_options.mode,
                                      caps_dir,
                                      test_df,
                                      model_options.preprocessing,
                                      train_transformations=None,
                                      all_transformations=all_transforms,
                                      params=model_options,
                                      labels=labels)

        # Load the data
        test_loader = DataLoader(test_dataset,
                                 batch_size=model_options.batch_size,
                                 shuffle=False,
                                 num_workers=model_options.nproc,
                                 pin_memory=True)

        # Load model from path
        model = create_model(model_options, test_dataset.size)
        best_model, best_epoch = load_model(model,
                                            join(model_path, selection),
                                            gpu,
                                            filename='model_best.pth.tar')

        # Run the model on the data
        predictions_df, metrics = test(best_model,
                                       test_loader,
                                       gpu,
                                       criterion,
                                       mode=model_options.mode,
                                       use_labels=labels)

        if labels:
            logger.info(
                "%s level %s balanced accuracy is %f for model selected on %s"
                % (model_options.mode, prefix, metrics["balanced_accuracy"],
                   selection))

        mode_level_to_tsvs(output_dir,
                           predictions_df,
                           metrics,
                           fold,
                           selection,
                           model_options.mode,
                           dataset=prefix)
Exemplo n.º 6
0
def main(options):
    # Initialize the model
    model = create_model(options.network, options.gpu)
    transformations = transforms.Compose([MinMaxNormalization()])

    # Define loss and optimizer
    loss = torch.nn.CrossEntropyLoss()

    if options.split is None:
        fold_iterator = range(options.n_splits)
    else:
        fold_iterator = [options.split]

    # Loop on folds
    for fi in fold_iterator:
        print("Fold %i" % fi)

        if options.dataset == 'validation':
            _, test_df = load_data(options.diagnosis_tsv_path,
                                   options.diagnoses,
                                   fi,
                                   n_splits=options.n_splits,
                                   baseline=True)
        else:
            test_df = load_data_test(options.diagnosis_tsv_path,
                                     options.diagnoses)

        for n in range(options.num_cnn):

            dataset = MRIDataset_patch(options.caps_directory,
                                       test_df,
                                       options.patch_size,
                                       options.patch_stride,
                                       transformations=transformations,
                                       patch_index=n,
                                       prepare_dl=options.prepare_dl)

            test_loader = DataLoader(dataset,
                                     batch_size=options.batch_size,
                                     shuffle=False,
                                     num_workers=options.num_workers,
                                     pin_memory=True)

            # load the best trained model during the training
            model, best_epoch = load_model(
                model,
                os.path.join(options.output_dir, 'best_model_dir',
                             "fold_%i" % fi, 'cnn-%i' % n, options.selection),
                options.gpu,
                filename='model_best.pth.tar')

            results_df, metrics = test(model, test_loader, options.gpu, loss)
            print("Patch level balanced accuracy is %f" %
                  metrics['balanced_accuracy'])

            # write the test results into the tsv files
            patch_level_to_tsvs(options.output_dir,
                                results_df,
                                metrics,
                                fi,
                                options.selection,
                                dataset=options.dataset,
                                cnn_index=n)

        print("Selection threshold: ", options.selection_threshold)
        soft_voting_to_tsvs(options.output_dir,
                            fi,
                            options.selection,
                            dataset=options.dataset,
                            num_cnn=options.num_cnn,
                            selection_threshold=options.selection_threshold)
Exemplo n.º 7
0
        model = create_model(model_options.network, options.gpu)

        criterion = nn.CrossEntropyLoss()

        model_dir = os.path.join(best_model_dir, fold_dir, 'CNN',
                                 options.selection)
        best_model, best_epoch = load_model(model,
                                            model_dir,
                                            options.gpu,
                                            filename='model_best.pth.tar')

        # Load test data
        if options.diagnoses is None:
            options.diagnoses = model_options.diagnoses

        test_tsv = load_data_test(options.tsv_path, options.diagnoses)

        if model_options.minmaxnormalization:
            transformations = MinMaxNormalization()
        else:
            transformations = None

        data_test = MRIDataset(options.caps_dir,
                               test_tsv,
                               model_options.preprocessing,
                               transform=transformations)

        test_loader = DataLoader(data_test,
                                 batch_size=options.batch_size,
                                 shuffle=False,
                                 num_workers=options.num_workers,
Exemplo n.º 8
0
def individual_backprop(options):

    main_logger = return_logger(options.verbose, "main process")
    options = translate_parameters(options)

    fold_list = [
        fold for fold in os.listdir(options.model_path) if fold[:5:] == "fold-"
    ]
    if len(fold_list) == 0:
        raise ValueError("No folds were found at path %s" % options.model_path)

    model_options = argparse.Namespace()
    model_options = read_json(
        model_options, path.join(options.model_path, 'commandline.json'))
    model_options = translate_parameters(model_options)
    model_options.gpu = options.gpu

    if model_options.network_type == "multicnn":
        raise NotImplementedError(
            "The interpretation of multi-CNN is not implemented.")

    if options.tsv_path is None and options.input_dir is None:
        options.multi_cohort = model_options.multi_cohort
    if options.tsv_path is None:
        options.tsv_path = model_options.tsv_path
    if options.input_dir is None:
        options.input_dir = model_options.input_dir
    if options.target_diagnosis is None:
        options.target_diagnosis = options.diagnosis

    for fold in fold_list:
        main_logger.info(fold)
        for selection in options.selection:
            results_path = path.join(options.model_path, fold, 'gradients',
                                     selection, options.name)

            criterion = get_criterion(model_options.loss)

            # Data management (remove data not well predicted by the CNN)
            training_df = load_data_test(options.tsv_path, [options.diagnosis],
                                         baseline=options.baseline,
                                         multi_cohort=options.multi_cohort)
            training_df.reset_index(drop=True, inplace=True)

            # Model creation
            _, all_transforms = get_transforms(
                model_options.mode,
                minmaxnormalization=model_options.minmaxnormalization)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                data_example = return_dataset(
                    model_options.mode,
                    options.input_dir,
                    training_df,
                    model_options.preprocessing,
                    train_transformations=None,
                    all_transformations=all_transforms,
                    prepare_dl=options.prepare_dl,
                    multi_cohort=options.multi_cohort,
                    params=model_options)

            model = create_model(model_options, data_example.size)
            model_dir = os.path.join(options.model_path, fold, 'models',
                                     selection)
            model, best_epoch = load_model(model,
                                           model_dir,
                                           gpu=options.gpu,
                                           filename='model_best.pth.tar')
            options.output_dir = results_path
            commandline_to_json(options, logger=main_logger)

            # Keep only subjects who were correctly / wrongly predicted by the network
            training_df = sort_predicted(model,
                                         training_df,
                                         options.input_dir,
                                         model_options,
                                         criterion,
                                         options.keep_true,
                                         batch_size=options.batch_size,
                                         num_workers=options.num_workers,
                                         gpu=options.gpu)

            if len(training_df) > 0:

                # Save the tsv files used for the saliency maps
                training_df.to_csv(path.join('data.tsv'),
                                   sep='\t',
                                   index=False)

                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    data_train = return_dataset(
                        model_options.mode,
                        options.input_dir,
                        training_df,
                        model_options.preprocessing,
                        train_transformations=None,
                        all_transformations=all_transforms,
                        prepare_dl=options.prepare_dl,
                        multi_cohort=options.multi_cohort,
                        params=model_options)

                train_loader = DataLoader(data_train,
                                          batch_size=options.batch_size,
                                          shuffle=True,
                                          num_workers=options.num_workers,
                                          pin_memory=True)

                interpreter = VanillaBackProp(model, gpu=options.gpu)

                for data in train_loader:
                    if options.gpu:
                        input_batch = data['image'].cuda()
                    else:
                        input_batch = data['image']

                    map_np = interpreter.generate_gradients(
                        input_batch,
                        data_train.diagnosis_code[options.target_diagnosis])
                    for i in range(options.batch_size):
                        single_path = path.join(results_path,
                                                data['participant_id'][i],
                                                data['session_id'][i])
                        os.makedirs(single_path, exist_ok=True)

                        if len(data_train.size) == 4:
                            if options.nifti_template_path is not None:
                                image_nii = nib.load(
                                    options.nifti_template_path)
                                affine = image_nii.affine
                            else:
                                affine = np.eye(4)

                            map_nii = nib.Nifti1Image(map_np[i, 0, :, :, :],
                                                      affine)
                            nib.save(map_nii,
                                     path.join(single_path, "map.nii.gz"))
                        else:
                            jpg_path = path.join(single_path, "map.jpg")
                            plt.imshow(map_np[i, 0, :, :],
                                       cmap="coolwarm",
                                       vmin=-options.vmax,
                                       vmax=options.vmax)
                            plt.colorbar()
                            plt.savefig(jpg_path)
                            plt.close()
                        np.save(path.join(single_path, "map.npy"), map_np[i])