示例#1
0
    def __init__(self, loader_params, derivatives, path_output):

        # paths_data from loader parameters
        # TODO: when integrating in pipeline, remove format_path_data here (done before in main)
        self.paths_data = imed_utils.format_path_data(
            loader_params['path_data'])

        # bids_config from loader parameters
        self.bids_config = None if 'bids_config' not in loader_params else loader_params[
            'bids_config']

        # target_suffix and roi_suffix from loader parameters
        self.target_suffix = copy.deepcopy(loader_params['target_suffix'])
        # If `target_suffix` is a list of lists convert to list
        if any(isinstance(t, list) for t in self.target_suffix):
            self.target_suffix = list(
                itertools.chain.from_iterable(self.target_suffix))
        self.roi_suffix = loader_params['roi_params']['suffix']
        # If `roi_suffix` is not None, add to target_suffix
        if self.roi_suffix is not None:
            self.target_suffix.append(self.roi_suffix)

        # extensions from loader parameters
        self.extensions = loader_params['extensions']

        # contrast_lst from loader parameters
        self.contrast_lst = loader_params["contrast_params"]["contrast_lst"]

        # derivatives
        self.derivatives = derivatives

        # Create dataframe
        self.df = pd.DataFrame()
        self.create_bids_dataframe()

        # Save dataframe as csv file
        self.save(os.path.join(path_output, "bids_dataframe.csv"))
示例#2
0
文件: main.py 项目: cakester/ivadomed
def create_dataset_and_ivadomed_version_log(context):

    path_data = context['loader_parameters']['path_data']

    ivadomed_version = imed_utils._version_string()
    datasets_version = []

    if isinstance(path_data, str):
        datasets_version = [
            imed_utils.__get_commit(path_to_git_folder=path_data)
        ]
    elif isinstance(path_data, list):
        for Dataset in path_data:
            datasets_version.append(
                imed_utils.__get_commit(path_to_git_folder=Dataset))

    log_file = os.path.join(context['path_output'], 'version_info.log')

    try:
        f = open(log_file, "w")
    except OSError as err:
        print("OS error: {0}".format(err))
        raise Exception(
            "Have you selected a log folder, and do you have write permissions for that folder?"
        )

    # IVADOMED
    f.write('IVADOMED TOOLBOX\n----------------\n(' + ivadomed_version + ')')

    # DATASETS
    path_data = imed_utils.format_path_data(path_data)
    f.write('\n\n\nDATASET VERSION\n---------------\n')

    f.write('The following BIDS dataset(s) were used for training.\n')

    for i_dataset in range(len(path_data)):
        if datasets_version[i_dataset] not in ['', '?!?']:
            f.write(
                str(i_dataset + 1) + '. ' + path_data[i_dataset] +
                ' - Dataset Annex version: ' + datasets_version[i_dataset] +
                '\n')
        else:
            f.write(
                str(i_dataset + 1) + '. ' + path_data[i_dataset] +
                ' - Dataset is not Annexed.\n')

    # SYSTEM INFO
    f.write('\n\nSYSTEM INFO\n-------------\n')
    platform_running = sys.platform
    if platform_running.find('darwin') != -1:
        os_running = 'osx'
    elif platform_running.find('linux') != -1:
        os_running = 'linux'
    elif platform_running.find('win32') or platform_running.find('win64'):
        os_running = 'windows'
    else:
        os_running = 'NA'

    f.write('OS: ' + os_running + ' (' + platform.platform() + ')\n')

    # Display number of CPU cores
    f.write('CPU cores: Available: {}\n\n\n\n\n'.format(
        multiprocessing.cpu_count()))

    # USER INPUTS
    f.write('CONFIG INPUTS\n-------------\n')
    if sys.version_info[0] > 2:
        for k, v in context.items():
            f.write(str(k) + ': ' + str(v) +
                    '\n')  # Making sure all numbers are converted to strings
    else:
        for k, v in context.viewitems():  # Python2
            f.write(str(k) + ': ' + str(v) + '\n')

    f.close()
示例#3
0
文件: main.py 项目: cakester/ivadomed
def run_command(context, n_gif=0, thr_increment=None, resume_training=False):
    """Run main command.

    This function is central in the ivadomed project as training / testing / evaluation commands
    are run via this function. All the process parameters are defined in the config.

    Args:
        context (dict): Dictionary containing all parameters that are needed for a given process. See
            :doc:`configuration_file` for more details.
        n_gif (int): Generates a GIF during training if larger than zero, one frame per epoch for a given slice. The
            parameter indicates the number of 2D slices used to generate GIFs, one GIF per slice. A GIF shows
            predictions of a given slice from the validation sub-dataset. They are saved within the output path.
        thr_increment (float): A threshold analysis is performed at the end of the training using the trained model and
            the training + validation sub-dataset to find the optimal binarization threshold. The specified value
            indicates the increment between 0 and 1 used during the ROC analysis (e.g. 0.1).
        resume_training (bool): Load a saved model ("checkpoint.pth.tar" in the output directory specified with flag "--path-output" or via the config file "output_path" '            This training state is saved everytime a new best model is saved in the log
            argument) for resume training directory.

    Returns:
        float or pandas.DataFrame or None:
            * If "train" command: Returns floats: best loss score for both training and validation.
            * If "test" command: Returns a pandas Dataframe: of metrics computed for each subject of
              the testing sub-dataset and return the prediction metrics before evaluation.
            * If "segment" command: No return value.

    """
    command = copy.deepcopy(context["command"])
    path_output = set_output_path(context)

    # Create a log with the version of the Ivadomed software and the version of the Annexed dataset (if present)
    create_dataset_and_ivadomed_version_log(context)

    cuda_available, device = imed_utils.define_device(context['gpu_ids'][0])

    # BACKWARDS COMPATIBILITY: If bids_path is string, assign to list - Do this here so it propagates to all functions
    context['loader_parameters']['path_data'] = imed_utils.format_path_data(
        context['loader_parameters']['path_data'])

    # Loader params
    loader_params = set_loader_params(context, command == "train")

    # Get transforms for each subdataset
    transform_train_params, transform_valid_params, transform_test_params = \
        imed_transforms.get_subdatasets_transforms(context["transformation"])

    # MODEL PARAMETERS
    model_params, loader_params = set_model_params(context, loader_params)

    if command == 'segment':
        run_segment_command(context, model_params)
        return

    # Get subject lists. "segment" command uses all participants of data path, hence no need to split
    train_lst, valid_lst, test_lst = imed_loader_utils.get_subdatasets_subjects_list(
        context["split_dataset"], context['loader_parameters']['path_data'],
        path_output, context["loader_parameters"]['subject_selection'])
    # TESTING PARAMS
    # Aleatoric uncertainty
    if context['uncertainty'][
            'aleatoric'] and context['uncertainty']['n_it'] > 0:
        transformation_dict = transform_train_params
    else:
        transformation_dict = transform_test_params
    undo_transforms = imed_transforms.UndoCompose(
        imed_transforms.Compose(transformation_dict, requires_undo=True))
    testing_params = copy.deepcopy(context["training_parameters"])
    testing_params.update({'uncertainty': context["uncertainty"]})
    testing_params.update({
        'target_suffix': loader_params["target_suffix"],
        'undo_transforms': undo_transforms,
        'slice_axis': loader_params['slice_axis']
    })

    if command == "train":
        imed_utils.display_selected_transfoms(transform_train_params,
                                              dataset_type=["training"])
        imed_utils.display_selected_transfoms(transform_valid_params,
                                              dataset_type=["validation"])
    elif command == "test":
        imed_utils.display_selected_transfoms(transformation_dict,
                                              dataset_type=["testing"])

    # Check if multiple raters
    check_multiple_raters(command != "train", loader_params)

    if command == 'train':
        # Get Validation dataset
        ds_valid = get_dataset(loader_params, valid_lst,
                               transform_valid_params, cuda_available, device,
                               'validation')

        # Get Training dataset
        ds_train = get_dataset(loader_params, train_lst,
                               transform_train_params, cuda_available, device,
                               'training')
        metric_fns = imed_metrics.get_metric_fns(ds_train.task)

        # If FiLM, normalize data
        if 'film_layers' in model_params and any(model_params['film_layers']):
            model_params, ds_train, ds_valid, train_onehotencoder = \
                film_normalize_data(context, model_params, ds_train, ds_valid, path_output)
        else:
            train_onehotencoder = None

        # Model directory
        create_path_model(context, model_params, ds_train, path_output,
                          train_onehotencoder)

        save_config_file(context, path_output)

        # RUN TRAINING
        best_training_dice, best_training_loss, best_validation_dice, best_validation_loss = imed_training.train(
            model_params=model_params,
            dataset_train=ds_train,
            dataset_val=ds_valid,
            training_params=context["training_parameters"],
            path_output=path_output,
            device=device,
            cuda_available=cuda_available,
            metric_fns=metric_fns,
            n_gif=n_gif,
            resume_training=resume_training,
            debugging=context["debugging"])

    if thr_increment:
        # LOAD DATASET
        if command != 'train':  # If command == train, then ds_valid already load
            # Get Validation dataset
            ds_valid = get_dataset(loader_params, valid_lst,
                                   transform_valid_params, cuda_available,
                                   device, 'validation')
        # Get Training dataset with no Data Augmentation
        ds_train = get_dataset(loader_params, train_lst,
                               transform_valid_params, cuda_available, device,
                               'training')

        # Choice of optimisation metric
        metric = "recall_specificity" if model_params[
            "name"] in imed_utils.CLASSIFIER_LIST else "dice"
        # Model path
        model_path = os.path.join(path_output, "best_model.pt")
        # Run analysis
        thr = imed_testing.threshold_analysis(model_path=model_path,
                                              ds_lst=[ds_train, ds_valid],
                                              model_params=model_params,
                                              testing_params=testing_params,
                                              metric=metric,
                                              increment=thr_increment,
                                              fname_out=os.path.join(
                                                  path_output, "roc.png"),
                                              cuda_available=cuda_available)

        # Update threshold in config file
        context["postprocessing"]["binarize_prediction"] = {"thr": thr}
        save_config_file(context, path_output)

    if command == 'train':
        return best_training_dice, best_training_loss, best_validation_dice, best_validation_loss

    if command == 'test':
        # LOAD DATASET
        ds_test = imed_loader.load_dataset(**{
            **loader_params,
            **{
                'data_list': test_lst,
                'transforms_params': transformation_dict,
                'dataset_type': 'testing',
                'requires_undo': True
            }
        },
                                           device=device,
                                           cuda_available=cuda_available)

        metric_fns = imed_metrics.get_metric_fns(ds_test.task)

        if 'film_layers' in model_params and any(model_params['film_layers']):
            ds_test, model_params = update_film_model_params(
                context, ds_test, model_params, path_output)

        # RUN INFERENCE
        pred_metrics = imed_testing.test(
            model_params=model_params,
            dataset_test=ds_test,
            testing_params=testing_params,
            path_output=path_output,
            device=device,
            cuda_available=cuda_available,
            metric_fns=metric_fns,
            postprocessing=context['postprocessing'])

        # RUN EVALUATION
        df_results = imed_evaluation.evaluate(
            path_data=loader_params['path_data'],
            path_output=path_output,
            target_suffix=loader_params["target_suffix"],
            eval_params=context["evaluation_parameters"])
        return df_results, pred_metrics
示例#4
0
文件: main.py 项目: cakester/ivadomed
def run_segment_command(context, model_params):
    bids_ds = []
    path_data = imed_utils.format_path_data(
        context["loader_parameters"]["path_data"])
    for bids_folder in path_data:
        bids_ds.append(bids.BIDS(bids_folder))

    # Get the merged df from all dataset paths
    df = imed_loader_utils.merge_bids_datasets(path_data)
    subj_lst = df['participant_id'].tolist()

    # Append subjects from all BIDSdatasets into a list
    bids_subjects = []
    for i_bids_folder in range(0, len(path_data)):
        bids_subjects += [
            s for s in bids_ds[i_bids_folder].get_subjects()
            if s.record["subject_id"] in subj_lst
        ]

    # Add postprocessing to packaged model
    path_model = os.path.join(context['path_output'], context['model_name'])
    path_model_config = os.path.join(path_model,
                                     context['model_name'] + ".json")
    model_config = imed_config_manager.load_json(path_model_config)
    model_config['postprocessing'] = context['postprocessing']
    with open(path_model_config, 'w') as fp:
        json.dump(model_config, fp, indent=4)

    options = None
    for subject in bids_subjects:
        if context['loader_parameters']['multichannel']:
            fname_img = []
            provided_contrasts = []
            contrasts = context['loader_parameters']['contrast_params'][
                'testing']
            # Keep contrast order
            for c in contrasts:
                for s in bids_subjects:
                    if subject.record['subject_id'] == s.record[
                            'subject_id'] and s.record['modality'] == c:
                        provided_contrasts.append(c)
                        fname_img.append(s.record['absolute_path'])
                        bids_subjects.remove(s)
            if len(fname_img) != len(contrasts):
                logger.warning(
                    "Missing contrast for subject {}. {} were provided but {} are required. Skipping "
                    "subject.".format(subject.record['subject_id'],
                                      provided_contrasts, contrasts))
                continue
        else:
            fname_img = [subject.record['absolute_path']]

        if 'film_layers' in model_params and any(
                model_params['film_layers']) and model_params['metadata']:
            subj_id = subject.record['subject_id']
            metadata = df[df['participant_id'] == subj_id][
                model_params['metadata']].values[0]
            options = {'metadata': metadata}
        pred_list, target_list = imed_inference.segment_volume(
            path_model,
            fname_images=fname_img,
            gpu_id=context['gpu_ids'][0],
            options=options)
        pred_path = os.path.join(context['path_output'], "pred_masks")
        if not os.path.exists(pred_path):
            os.makedirs(pred_path)

        for pred, target in zip(pred_list, target_list):
            filename = subject.record['subject_id'] + "_" + subject.record['modality'] + target + "_pred" + \
                        ".nii.gz"
            nib.save(pred, os.path.join(pred_path, filename))
示例#5
0
    def __init__(self,
                 bids_df,
                 path_data,
                 subject_lst,
                 target_suffix,
                 contrast_lst,
                 path_hdf5,
                 contrast_balance=None,
                 slice_axis=2,
                 metadata_choice=False,
                 slice_filter_fn=None,
                 roi_params=None,
                 transform=None,
                 object_detection_params=None,
                 soft_gt=False):
        print("Starting conversion")

        path_data = imed_utils.format_path_data(path_data)

        # Sort subject_lst and create a sub-dataframe from bids_df containing only subjects from subject_lst
        subject_lst = sorted(subject_lst)
        df_subjects = bids_df.df[bids_df.df['filename'].isin(subject_lst)]
        # Backward compatibility for subject_lst containing participant_ids instead of filenames
        if df_subjects.empty:
            df_subjects = bids_df.df[bids_df.df['participant_id'].isin(
                subject_lst)]
            subject_lst = sorted(df_subjects['filename'].to_list())

        self.soft_gt = soft_gt
        self.dt = h5py.special_dtype(vlen=str)
        # opening an hdf5 file with write access and writing metadata
        # self.hdf5_file = h5py.File(hdf5_name, "w")
        self.path_hdf5 = path_hdf5
        list_patients = []

        self.filename_pairs = []

        if metadata_choice == 'mri_params':
            self.metadata = {
                "FlipAngle": [],
                "RepetitionTime": [],
                "EchoTime": [],
                "Manufacturer": []
            }

        self.prepro_transforms, self.transform = transform

        # Create a dictionary with the number of subjects for each contrast of contrast_balance
        tot = {
            contrast:
            df_subjects['suffix'].str.fullmatch(contrast).value_counts()[True]
            for contrast in contrast_balance.keys()
        }

        # Create a counter that helps to balance the contrasts
        c = {contrast: 0 for contrast in contrast_balance.keys()}

        # Get all subjects path from bids_df for bounding box
        get_all_subj_path = bids_df.df[bids_df.df['filename'].str.contains(
            '|'.join(bids_df.get_subject_fnames()))]['path'].to_list()

        # Load bounding box from list of path
        self.has_bounding_box = True
        bounding_box_dict = imed_obj_detect.load_bounding_boxes(
            object_detection_params, get_all_subj_path, slice_axis,
            contrast_lst)

        # Get all derivatives filenames from bids_df
        all_deriv = bids_df.get_deriv_fnames()

        for subject in tqdm(subject_lst, desc="Loading dataset"):

            df_sub = df_subjects.loc[df_subjects['filename'] == subject]

            # Training & Validation: do not consider the contrasts over the threshold contained in contrast_balance
            contrast = df_sub['suffix'].values[0]
            if contrast in (contrast_balance.keys()):
                c[contrast] = c[contrast] + 1
                if c[contrast] / tot[contrast] > contrast_balance[contrast]:
                    continue

            target_filename, roi_filename = [None] * len(target_suffix), None

            derivatives = bids_df.df[bids_df.df['filename'].str.contains(
                '|'.join(bids_df.get_derivatives(
                    subject, all_deriv)))]['path'].to_list()

            for deriv in derivatives:
                for idx, suffix in enumerate(target_suffix):
                    if suffix in deriv:
                        target_filename[idx] = deriv
                if not (roi_params["suffix"] is
                        None) and roi_params["suffix"] in deriv:
                    roi_filename = [deriv]

            if (not any(target_filename)) or (
                    not (roi_params["suffix"] is None) and
                (roi_filename is None)):
                continue

            metadata = df_sub.to_dict(orient='records')[0]
            metadata['contrast'] = contrast

            if len(bounding_box_dict):
                # Take only one bounding box for cropping
                metadata['bounding_box'] = bounding_box_dict[str(
                    df_sub['path'].values[0])][0]

            if metadata_choice == 'mri_params':
                if not all([
                        imed_film.check_isMRIparam(m, metadata, subject,
                                                   self.metadata)
                        for m in self.metadata.keys()
                ]):
                    continue

            # Get subj_id (prefix filename without modality suffix and extension)
            subj_id = re.sub(r'_' + df_sub['suffix'].values[0] + '.*', '',
                             subject)

            self.filename_pairs.append(
                (subj_id, [df_sub['path'].values[0]], target_filename,
                 roi_filename, [metadata]))
            list_patients.append(subj_id)

        self.slice_axis = slice_axis
        self.slice_filter_fn = slice_filter_fn

        # Update HDF5 metadata
        with h5py.File(self.path_hdf5, "w") as hdf5_file:
            hdf5_file.attrs.create('patients_id',
                                   list(set(list_patients)),
                                   dtype=self.dt)
            hdf5_file.attrs['slice_axis'] = slice_axis

            hdf5_file.attrs['slice_filter_fn'] = [('filter_empty_input', True),
                                                  ('filter_empty_mask', False)]
            hdf5_file.attrs['metadata_choice'] = metadata_choice

        # Save images into HDF5 file
        self._load_filenames()
        print("Files loaded.")
示例#6
0
    def __init__(self, bids_df, path_data, subject_lst, target_suffix, contrast_params, slice_axis=2,
                 cache=True, transform=None, metadata_choice=False, slice_filter_fn=None, roi_params=None,
                 multichannel=False, object_detection_params=None, task="segmentation", soft_gt=False):

        path_data = imed_utils.format_path_data(path_data)
        self.roi_params = roi_params if roi_params is not None else {"suffix": None, "slice_filter_roi": None}
        self.soft_gt = soft_gt
        self.filename_pairs = []
        if metadata_choice == 'mri_params':
            self.metadata = {"FlipAngle": [], "RepetitionTime": [],
                             "EchoTime": [], "Manufacturer": []}

        # Sort subject_lst and create a sub-dataframe from bids_df containing only subjects from subject_lst
        subject_lst = sorted(subject_lst)
        df_subjects = bids_df.df[bids_df.df['filename'].isin(subject_lst)]
        # Backward compatibility for subject_lst containing participant_ids instead of filenames
        if df_subjects.empty:
            df_subjects = bids_df.df[bids_df.df['participant_id'].isin(subject_lst)]
            subject_lst = sorted(df_subjects['filename'].to_list())

        # Create a dictionary with the number of subjects for each contrast of contrast_balance
        tot = {contrast: df_subjects['suffix'].str.fullmatch(contrast).value_counts()[True]
               for contrast in contrast_params["balance"].keys()}

        # Create a counter that helps to balance the contrasts
        c = {contrast: 0 for contrast in contrast_params["balance"].keys()}

        # Get a list of subject_ids for multichannel_subjects (prefix filename without modality suffix and extension)
        subject_ids = []
        for subject in subject_lst:
            suffix = df_subjects.loc[df_subjects['filename'] == subject]['suffix'].values[0]
            subject_ids.append(re.sub(r'_' + suffix + '.*', '', subject))
        subject_ids = list(set(subject_ids))

        # Create multichannel_subjects dictionary for each subject_id
        multichannel_subjects = {}
        if multichannel:
            num_contrast = len(contrast_params["contrast_lst"])
            idx_dict = {}
            for idx, contrast in enumerate(contrast_params["contrast_lst"]):
                idx_dict[contrast] = idx
            multichannel_subjects = {subject: {"absolute_paths": [None] * num_contrast,
                                               "deriv_path": None,
                                               "roi_filename": None,
                                               "metadata": [None] * num_contrast} for subject in subject_ids}

        # Get all subjects path from bids_df for bounding box
        get_all_subj_path = bids_df.df[bids_df.df['filename']
                                .str.contains('|'.join(bids_df.get_subject_fnames()))]['path'].to_list()

        # Load bounding box from list of path
        bounding_box_dict = imed_obj_detect.load_bounding_boxes(object_detection_params,
                                                                get_all_subj_path,
                                                                slice_axis,
                                                                contrast_params["contrast_lst"])

        # Get all derivatives filenames from bids_df
        all_deriv = bids_df.get_deriv_fnames()

        # Create filename_pairs
        for subject in tqdm(subject_lst, desc="Loading dataset"):

            df_sub = df_subjects.loc[df_subjects['filename'] == subject]

            # Training & Validation: do not consider the contrasts over the threshold contained in contrast_balance
            contrast = df_sub['suffix'].values[0]
            if contrast in (contrast_params["balance"].keys()):
                c[contrast] = c[contrast] + 1
                if c[contrast] / tot[contrast] > contrast_params["balance"][contrast]:
                    continue
            if isinstance(target_suffix[0], str):
                target_filename, roi_filename = [None] * len(target_suffix), None
            else:
                target_filename, roi_filename = [[] for _ in range(len(target_suffix))], None

            derivatives = bids_df.df[bids_df.df['filename']
                          .str.contains('|'.join(bids_df.get_derivatives(subject, all_deriv)))]['path'].to_list()

            for deriv in derivatives:
                for idx, suffix_list in enumerate(target_suffix):
                    # If suffix_list is a string, then only one rater annotation per class is available.
                    # Otherwise, multiple raters segmented the same class.
                    if isinstance(suffix_list, list):
                        for suffix in suffix_list:
                            if suffix in deriv:
                                target_filename[idx].append(deriv)
                    elif suffix_list in deriv:
                        target_filename[idx] = deriv
                if not (self.roi_params["suffix"] is None) and self.roi_params["suffix"] in deriv:
                    roi_filename = [deriv]

            if (not any(target_filename)) or (not (self.roi_params["suffix"] is None) and (roi_filename is None)):
                continue

            metadata = df_sub.to_dict(orient='records')[0]
            metadata['contrast'] = contrast

            if len(bounding_box_dict):
                # Take only one bounding box for cropping
                metadata['bounding_box'] = bounding_box_dict[str(df_sub['path'].values[0])][0]

            if metadata_choice == 'mri_params':
                if not all([imed_film.check_isMRIparam(m, metadata, subject, self.metadata) for m in
                            self.metadata.keys()]):
                    continue

            elif metadata_choice and metadata_choice != 'contrasts' and metadata_choice is not None:
                # add custom data to metadata
                if metadata_choice not in df_sub.columns:
                    raise ValueError("The following metadata cannot be found: {}. "
                                     "Invalid metadata choice.".format(metadata_choice))
                metadata[metadata_choice] = df_sub[metadata_choice].values[0]
                # Create metadata dict for OHE
                data_lst = sorted(set(bids_df.df[metadata_choice].dropna().values))
                metadata_dict = {}
                for idx, data in enumerate(data_lst):
                    metadata_dict[data] = idx
                metadata['metadata_dict'] = metadata_dict

            # Fill multichannel dictionary
            # subj_id is the filename without modality suffix and extension
            if multichannel:
                idx = idx_dict[df_sub['suffix'].values[0]]
                subj_id = re.sub(r'_' + df_sub['suffix'].values[0] + '.*', '', subject)
                multichannel_subjects[subj_id]["absolute_paths"][idx] = df_sub['path'].values[0]
                multichannel_subjects[subj_id]["deriv_path"] = target_filename
                multichannel_subjects[subj_id]["metadata"][idx] = metadata
                if roi_filename:
                    multichannel_subjects[subj_id]["roi_filename"] = roi_filename
            else:
                self.filename_pairs.append(([df_sub['path'].values[0]],
                                            target_filename, roi_filename, [metadata]))

        if multichannel:
            for subject in multichannel_subjects.values():
                if None not in subject["absolute_paths"]:
                    self.filename_pairs.append((subject["absolute_paths"], subject["deriv_path"],
                                                subject["roi_filename"], subject["metadata"]))

        if self.filename_pairs == []:
            raise Exception('No subjects were selected - check selection of parameters on config.json (e.g. center selected + target_suffix)')

        super().__init__(self.filename_pairs, slice_axis, cache, transform, slice_filter_fn, task, self.roi_params,
                         self.soft_gt)
示例#7
0
    def __init__(self,
                 path_data,
                 subject_lst,
                 target_suffix,
                 contrast_lst,
                 path_hdf5,
                 contrast_balance=None,
                 slice_axis=2,
                 metadata_choice=False,
                 slice_filter_fn=None,
                 roi_params=None,
                 transform=None,
                 object_detection_params=None,
                 soft_gt=False):
        print("Starting conversion")

        # Getting all patients id
        self.bids_ds = []
        path_data = imed_utils.format_path_data(path_data)
        for bids_folder in path_data:
            self.bids_ds.append(bids.BIDS(bids_folder))
        # Append subjects from all BIDSdatasets into a list
        bids_subjects = [
            s for s in self.bids_ds[0].get_subjects()
            if s.record["subject_id"] in subject_lst
        ]
        for i_bids_folder in range(1, len(path_data)):
            bids_subjects += [
                s for s in self.bids_ds[i_bids_folder].get_subjects()
                if s.record["subject_id"] in subject_lst
            ]

        self.soft_gt = soft_gt
        self.dt = h5py.special_dtype(vlen=str)
        # opening an hdf5 file with write access and writing metadata
        # self.hdf5_file = h5py.File(hdf5_name, "w")
        self.path_hdf5 = path_hdf5
        list_patients = []

        self.filename_pairs = []

        if metadata_choice == 'mri_params':
            self.metadata = {
                "FlipAngle": [],
                "RepetitionTime": [],
                "EchoTime": [],
                "Manufacturer": []
            }

        self.prepro_transforms, self.transform = transform
        # Create a list with the filenames for all contrasts and subjects
        subjects_tot = []
        for subject in bids_subjects:
            subjects_tot.append(str(subject.record["absolute_path"]))

        # Create a dictionary with the number of subjects for each contrast of contrast_balance
        tot = {
            contrast:
            len([s for s in bids_subjects if s.record["modality"] == contrast])
            for contrast in contrast_balance.keys()
        }

        # Create a counter that helps to balance the contrasts
        c = {contrast: 0 for contrast in contrast_balance.keys()}

        # Append get_subjects()
        get_subjects_all = self.bids_ds[0].get_subjects()
        for i_bids_folder in range(1, len(self.bids_ds)):
            get_subjects_all.extend(self.bids_ds[i_bids_folder].get_subjects())

        self.has_bounding_box = True
        bounding_box_dict = imed_obj_detect.load_bounding_boxes(
            object_detection_params, get_subjects_all, slice_axis,
            contrast_lst)

        for subject in tqdm(bids_subjects, desc="Loading dataset"):

            if subject.record["modality"] in contrast_lst:

                # Training & Validation: do not consider the contrasts over the threshold contained in contrast_balance
                if subject.record["modality"] in contrast_balance.keys():
                    c[subject.
                      record["modality"]] = c[subject.record["modality"]] + 1
                    if c[subject.record["modality"]] / tot[subject.record["modality"]] \
                            > contrast_balance[subject.record["modality"]]:
                        continue

                if not subject.has_derivative("labels"):
                    print("Subject without derivative, skipping.")
                    continue
                derivatives = subject.get_derivatives("labels")

                target_filename, roi_filename = [None
                                                 ] * len(target_suffix), None

                for deriv in derivatives:
                    for idx, suffix in enumerate(target_suffix):
                        if deriv.endswith(subject.record["modality"] + suffix +
                                          ".nii.gz"):
                            target_filename[idx] = deriv

                    if not (roi_params["suffix"] is None) and \
                            deriv.endswith(subject.record["modality"] + roi_params["suffix"] + ".nii.gz"):
                        roi_filename = [deriv]

                if (not any(target_filename)) or (
                        not (roi_params["suffix"] is None) and
                    (roi_filename is None)):
                    continue

                if not subject.has_metadata():
                    print("Subject without metadata.")
                    metadata = {}
                else:
                    metadata = subject.metadata()
                    # add contrast to metadata
                metadata['contrast'] = subject.record["modality"]

                if metadata_choice == 'mri_params':
                    if not all([
                            imed_film.check_isMRIparam(m, metadata)
                            for m in self.metadata.keys()
                    ]):
                        continue

                if len(bounding_box_dict):
                    # Take only one bounding box for cropping
                    metadata['bounding_box'] = bounding_box_dict[str(
                        subject.record["absolute_path"])][0]

                self.filename_pairs.append(
                    (subject.record["subject_id"],
                     [subject.record.absolute_path
                      ], target_filename, roi_filename, [metadata]))

                list_patients.append(subject.record["subject_id"])

        self.slice_axis = slice_axis
        self.slice_filter_fn = slice_filter_fn

        # Update HDF5 metadata
        with h5py.File(self.path_hdf5, "w") as hdf5_file:
            hdf5_file.attrs.create('patients_id',
                                   list(set(list_patients)),
                                   dtype=self.dt)
            hdf5_file.attrs['slice_axis'] = slice_axis

            hdf5_file.attrs['slice_filter_fn'] = [('filter_empty_input', True),
                                                  ('filter_empty_mask', False)]
            hdf5_file.attrs['metadata_choice'] = metadata_choice

        # Save images into HDF5 file
        self._load_filenames()
        print("Files loaded.")
示例#8
0
def merge_bids_datasets(path_data):
    """Read the participants.tsv from several BIDS folders and merge them into a single dataframe.
    Args:
        path_data (list) or (str): BIDS folders paths

    Returns:
        df: dataframe with merged subjects and columns
    """
    path_data = imed_utils.format_path_data(path_data)

    if len(path_data) == 1:
        # read participants.tsv as pandas dataframe
        df = bids.BIDS(path_data[0]).participants.content
        # Append a new column to show which dataset the Subjects belong to (this will be used later for loading)
        df['path_output'] = [path_data[0]] * len(df)
    elif path_data == []:
        raise Exception("No dataset folder selected")
    else:
        # Merge multiple .tsv files into the same dataframe
        df = pd.read_table(os.path.join(path_data[0], 'participants.tsv'),
                           encoding="ISO-8859-1")
        # Convert to string to get rid of potential TypeError during merging within the same column
        df = df.astype(str)

        # Add the Bids_path to the dataframe
        df['path_output'] = [path_data[0]] * len(df)

        for iFolder in range(1, len(path_data)):
            df_next = pd.read_table(os.path.join(path_data[iFolder],
                                                 'participants.tsv'),
                                    encoding="ISO-8859-1")
            df_next = df_next.astype(str)
            df_next['path_output'] = [path_data[iFolder]] * len(df_next)
            # Merge the .tsv files (This keeps also non-overlapping fields)
            df = pd.merge(left=df, right=df_next, how='outer')

    # Get rid of duplicate entries based on the field "participant_id" (the same subject could have in theory be
    # included in both datasets). The assumption here is that if the two datasets contain the same subject,
    # identical sessions of the subjects are contained within the two folder so only the files within the first folder
    # will be kept.
    logical_keep_first_encounter = []
    indicesOfDuplicates = []
    used = set()  # For debugging

    for iEntry in range(len(df)):
        if df['participant_id'][iEntry] not in used:
            used.add(df['participant_id'][iEntry])  # For debugging
            logical_keep_first_encounter.append(iEntry)
        else:
            indicesOfDuplicates.append(iEntry)  # For debugging
    # Just keep the dataframe with unique participant_id
    df = df.iloc[logical_keep_first_encounter, :]

    # Rearrange the bids paths to be last column of the dataframe
    cols = list(df.columns.values)
    cols.remove("path_output")
    cols.append("path_output")
    df = df[cols]

    # Substitute NaNs with string: "-". This helps with metadata selection
    df = df.fillna("-")

    return df