def create_filename_pair(self, multichannel_subjects, subject, c, tot, multichannel, df_subjects, contrast_params, target_suffix, all_deriv, bids_df, bounding_box_dict, idx_dict, metadata_choice): df_sub = df_subjects.loc[df_subjects['filename'] == subject] # Training & Validation: do not consider the contrasts over the threshold contained in contrast_balance contrast = df_sub['suffix'].values[0] if contrast in (contrast_params[ContrastParamsKW.BALANCE].keys()): c[contrast] = c[contrast] + 1 if c[contrast] / tot[contrast] > contrast_params[ ContrastParamsKW.BALANCE][contrast]: return if isinstance(target_suffix[0], str): target_filename, roi_filename = [None] * len(target_suffix), None else: target_filename, roi_filename = [[] for _ in range(len(target_suffix)) ], None derivatives = bids_df.df[bids_df.df['filename'].str.contains('|'.join( bids_df.get_derivatives(subject, all_deriv)))]['path'].to_list() for derivative in derivatives: self.get_target_filename(target_suffix, target_filename, derivative) if not (self.roi_params[ROIParamsKW.SUFFIX] is None ) and self.roi_params[ROIParamsKW.SUFFIX] in derivative: roi_filename = [derivative] if (not any(target_filename)) or ( not (self.roi_params[ROIParamsKW.SUFFIX] is None) and (roi_filename is None)): return metadata = df_sub.to_dict(orient='records')[0] metadata[MetadataKW.CONTRAST] = contrast if len(bounding_box_dict): # Take only one bounding box for cropping metadata[MetadataKW.BOUNDING_BOX] = bounding_box_dict[str( df_sub['path'].values[0])][0] if metadata_choice == MetadataKW.MRI_PARAMS: if not all([ imed_film.check_isMRIparam(m, metadata, subject, self.metadata) for m in self.metadata.keys() ]): return elif metadata_choice and metadata_choice != MetadataKW.CONTRASTS and metadata_choice is not None: self.create_metadata_dict(metadata, metadata_choice, df_sub, bids_df) return df_sub, roi_filename, target_filename, metadata
def process_subject(self, bids_df, subject, df_subjects, c, tot, contrast_balance, target_suffix, all_deriv, roi_params, bounding_box_dict, metadata_choice, list_patients): df_sub = df_subjects.loc[df_subjects['filename'] == subject] # Training & Validation: do not consider the contrasts over the threshold contained in contrast_balance contrast = df_sub['suffix'].values[0] is_over_thresh = self.is_contrast_over_threshold( c, tot, contrast, contrast_balance) if (not is_over_thresh): target_filename, roi_filename = self.get_filenames( bids_df, subject, all_deriv, target_suffix, roi_params) if (not any(target_filename)) or ( not (roi_params["suffix"] is None) and (roi_filename is None)): return metadata = df_sub.to_dict(orient='records')[0] metadata['contrast'] = contrast if len(bounding_box_dict): # Take only one bounding box for cropping metadata['bounding_box'] = bounding_box_dict[str( df_sub['path'].values[0])][0] are_mri_params = all([ imed_film.check_isMRIparam(m, metadata, subject, self.metadata) for m in self.metadata.keys() ]) if metadata_choice == 'mri_params' and not are_mri_params: return # Get subj_id (prefix filename without modality suffix and extension) subj_id = subject.split('.')[0].split('_')[0] self.filename_pairs.append( (subj_id, [df_sub['path'].values[0]], target_filename, roi_filename, [metadata])) list_patients.append(subj_id)
def __init__(self, root_dir, subject_lst, target_suffix, contrast_params, slice_axis=2, cache=True, transform=None, metadata_choice=False, slice_filter_fn=None, roi_params=None, multichannel=False, object_detection_params=None, task="segmentation", soft_gt=False): self.bids_ds = bids.BIDS(root_dir) self.roi_params = roi_params if roi_params is not None else { "suffix": None, "slice_filter_roi": None } self.soft_gt = soft_gt self.filename_pairs = [] if metadata_choice == 'mri_params': self.metadata = { "FlipAngle": [], "RepetitionTime": [], "EchoTime": [], "Manufacturer": [] } bids_subjects = [ s for s in self.bids_ds.get_subjects() if s.record["subject_id"] in subject_lst ] # Create a list with the filenames for all contrasts and subjects subjects_tot = [] for subject in bids_subjects: subjects_tot.append(str(subject.record["absolute_path"])) # Create a dictionary with the number of subjects for each contrast of contrast_balance tot = { contrast: len([s for s in bids_subjects if s.record["modality"] == contrast]) for contrast in contrast_params["balance"].keys() } # Create a counter that helps to balance the contrasts c = {contrast: 0 for contrast in contrast_params["balance"].keys()} multichannel_subjects = {} if multichannel: num_contrast = len(contrast_params["contrast_lst"]) idx_dict = {} for idx, contrast in enumerate(contrast_params["contrast_lst"]): idx_dict[contrast] = idx multichannel_subjects = { subject: { "absolute_paths": [None] * num_contrast, "deriv_path": None, "roi_filename": None, "metadata": [None] * num_contrast } for subject in subject_lst } bounding_box_dict = imed_obj_detect.load_bounding_boxes( object_detection_params, self.bids_ds.get_subjects(), slice_axis, contrast_params["contrast_lst"]) for subject in tqdm(bids_subjects, desc="Loading dataset"): if subject.record["modality"] in contrast_params["contrast_lst"]: # Training & Validation: do not consider the contrasts over the threshold contained in contrast_balance if subject.record["modality"] in contrast_params[ "balance"].keys(): c[subject. record["modality"]] = c[subject.record["modality"]] + 1 if c[subject.record["modality"]] / tot[subject.record[ "modality"]] > contrast_params["balance"][ subject.record["modality"]]: continue if not subject.has_derivative("labels"): print("Subject without derivative, skipping.") continue derivatives = subject.get_derivatives("labels") target_filename, roi_filename = [None ] * len(target_suffix), None for deriv in derivatives: for idx, suffix in enumerate(target_suffix): if deriv.endswith(subject.record["modality"] + suffix + ".nii.gz"): target_filename[idx] = deriv if not (self.roi_params["suffix"] is None) and \ deriv.endswith(subject.record["modality"] + self.roi_params["suffix"] + ".nii.gz"): roi_filename = [deriv] if (not any(target_filename)) or ( not (self.roi_params["suffix"] is None) and (roi_filename is None)): continue if not subject.has_metadata(): metadata = {} else: metadata = subject.metadata() # add contrast to metadata metadata['contrast'] = subject.record["modality"] if len(bounding_box_dict): # Take only one bounding box for cropping metadata['bounding_box'] = bounding_box_dict[str( subject.record["absolute_path"])][0] if metadata_choice == 'mri_params': if not all([ imed_film.check_isMRIparam(m, metadata, subject, self.metadata) for m in self.metadata.keys() ]): continue elif metadata_choice and metadata_choice != 'contrasts' and metadata_choice is not None: # add custom data to metadata subject_id = subject.record["subject_id"] df = bids.BIDS(root_dir).participants.content if metadata_choice not in df.columns: raise ValueError( "The following metadata cannot be found in participants.tsv file: {}. " "Invalid metadata choice.".format(metadata_choice)) metadata[metadata_choice] = df[ df['participant_id'] == subject_id][metadata_choice].values[0] # Create metadata dict for OHE data_lst = sorted(set(df[metadata_choice].values)) metadata_dict = {} for idx, data in enumerate(data_lst): metadata_dict[data] = idx metadata['metadata_dict'] = metadata_dict # Fill multichannel dictionary if multichannel: idx = idx_dict[subject.record["modality"]] subj_id = subject.record["subject_id"] multichannel_subjects[subj_id]["absolute_paths"][ idx] = subject.record.absolute_path multichannel_subjects[subj_id][ "deriv_path"] = target_filename multichannel_subjects[subj_id]["metadata"][idx] = metadata if roi_filename: multichannel_subjects[subj_id][ "roi_filename"] = roi_filename else: self.filename_pairs.append( ([subject.record.absolute_path], target_filename, roi_filename, [metadata])) if multichannel: for subject in multichannel_subjects.values(): if None not in subject["absolute_paths"]: self.filename_pairs.append( (subject["absolute_paths"], subject["deriv_path"], subject["roi_filename"], subject["metadata"])) super().__init__(self.filename_pairs, slice_axis, cache, transform, slice_filter_fn, task, self.roi_params, self.soft_gt)
def __init__(self, root_dir, subject_lst, target_suffix, contrast_lst, hdf5_name, contrast_balance=None, slice_axis=2, metadata_choice=False, slice_filter_fn=None, roi_params=None, transform=None, object_detection_params=None, soft_gt=False): print("Starting conversion") # Getting all patients id self.bids_ds = bids.BIDS(root_dir) bids_subjects = [ s for s in self.bids_ds.get_subjects() if s.record["subject_id"] in subject_lst ] self.soft_gt = soft_gt self.dt = h5py.special_dtype(vlen=str) # opening an hdf5 file with write access and writing metadata self.hdf5_file = h5py.File(hdf5_name, "w") list_patients = [] self.filename_pairs = [] if metadata_choice == 'mri_params': self.metadata = { "FlipAngle": [], "RepetitionTime": [], "EchoTime": [], "Manufacturer": [] } self.prepro_transforms, self.transform = transform # Create a list with the filenames for all contrasts and subjects subjects_tot = [] for subject in bids_subjects: subjects_tot.append(str(subject.record["absolute_path"])) # Create a dictionary with the number of subjects for each contrast of contrast_balance tot = { contrast: len([s for s in bids_subjects if s.record["modality"] == contrast]) for contrast in contrast_balance.keys() } # Create a counter that helps to balance the contrasts c = {contrast: 0 for contrast in contrast_balance.keys()} self.has_bounding_box = True bounding_box_dict = imed_obj_detect.load_bounding_boxes( object_detection_params, self.bids_ds.get_subjects(), slice_axis, contrast_lst) for subject in tqdm(bids_subjects, desc="Loading dataset"): if subject.record["modality"] in contrast_lst: # Training & Validation: do not consider the contrasts over the threshold contained in contrast_balance if subject.record["modality"] in contrast_balance.keys(): c[subject. record["modality"]] = c[subject.record["modality"]] + 1 if c[subject.record["modality"]] / tot[subject.record["modality"]] \ > contrast_balance[subject.record["modality"]]: continue if not subject.has_derivative("labels"): print("Subject without derivative, skipping.") continue derivatives = subject.get_derivatives("labels") target_filename, roi_filename = [None ] * len(target_suffix), None for deriv in derivatives: for idx, suffix in enumerate(target_suffix): if deriv.endswith(subject.record["modality"] + suffix + ".nii.gz"): target_filename[idx] = deriv if not (roi_params["suffix"] is None) and \ deriv.endswith(subject.record["modality"] + roi_params["suffix"] + ".nii.gz"): roi_filename = [deriv] if (not any(target_filename)) or ( not (roi_params["suffix"] is None) and (roi_filename is None)): continue if not subject.has_metadata(): print("Subject without metadata.") metadata = {} else: metadata = subject.metadata() # add contrast to metadata metadata['contrast'] = subject.record["modality"] if metadata_choice == 'mri_params': if not all([ imed_film.check_isMRIparam(m, metadata) for m in self.metadata.keys() ]): continue if len(bounding_box_dict): # Take only one bounding box for cropping metadata['bounding_box'] = bounding_box_dict[str( subject.record["absolute_path"])][0] self.filename_pairs.append( (subject.record["subject_id"], [subject.record.absolute_path ], target_filename, roi_filename, [metadata])) list_patients.append(subject.record["subject_id"]) self.slice_axis = slice_axis self.slice_filter_fn = slice_filter_fn # Update HDF5 metadata self.hdf5_file.attrs.create('patients_id', list(set(list_patients)), dtype=self.dt) self.hdf5_file.attrs['slice_axis'] = slice_axis self.hdf5_file.attrs['slice_filter_fn'] = [ ('filter_empty_input', True), ('filter_empty_mask', False) ] self.hdf5_file.attrs['metadata_choice'] = metadata_choice # Save images into HDF5 file self._load_filenames() print("Files loaded.")
def __init__(self, bids_df, path_data, subject_lst, target_suffix, contrast_lst, path_hdf5, contrast_balance=None, slice_axis=2, metadata_choice=False, slice_filter_fn=None, roi_params=None, transform=None, object_detection_params=None, soft_gt=False): print("Starting conversion") path_data = imed_utils.format_path_data(path_data) # Sort subject_lst and create a sub-dataframe from bids_df containing only subjects from subject_lst subject_lst = sorted(subject_lst) df_subjects = bids_df.df[bids_df.df['filename'].isin(subject_lst)] # Backward compatibility for subject_lst containing participant_ids instead of filenames if df_subjects.empty: df_subjects = bids_df.df[bids_df.df['participant_id'].isin( subject_lst)] subject_lst = sorted(df_subjects['filename'].to_list()) self.soft_gt = soft_gt self.dt = h5py.special_dtype(vlen=str) # opening an hdf5 file with write access and writing metadata # self.hdf5_file = h5py.File(hdf5_name, "w") self.path_hdf5 = path_hdf5 list_patients = [] self.filename_pairs = [] if metadata_choice == 'mri_params': self.metadata = { "FlipAngle": [], "RepetitionTime": [], "EchoTime": [], "Manufacturer": [] } self.prepro_transforms, self.transform = transform # Create a dictionary with the number of subjects for each contrast of contrast_balance tot = { contrast: df_subjects['suffix'].str.fullmatch(contrast).value_counts()[True] for contrast in contrast_balance.keys() } # Create a counter that helps to balance the contrasts c = {contrast: 0 for contrast in contrast_balance.keys()} # Get all subjects path from bids_df for bounding box get_all_subj_path = bids_df.df[bids_df.df['filename'].str.contains( '|'.join(bids_df.get_subject_fnames()))]['path'].to_list() # Load bounding box from list of path self.has_bounding_box = True bounding_box_dict = imed_obj_detect.load_bounding_boxes( object_detection_params, get_all_subj_path, slice_axis, contrast_lst) # Get all derivatives filenames from bids_df all_deriv = bids_df.get_deriv_fnames() for subject in tqdm(subject_lst, desc="Loading dataset"): df_sub = df_subjects.loc[df_subjects['filename'] == subject] # Training & Validation: do not consider the contrasts over the threshold contained in contrast_balance contrast = df_sub['suffix'].values[0] if contrast in (contrast_balance.keys()): c[contrast] = c[contrast] + 1 if c[contrast] / tot[contrast] > contrast_balance[contrast]: continue target_filename, roi_filename = [None] * len(target_suffix), None derivatives = bids_df.df[bids_df.df['filename'].str.contains( '|'.join(bids_df.get_derivatives( subject, all_deriv)))]['path'].to_list() for deriv in derivatives: for idx, suffix in enumerate(target_suffix): if suffix in deriv: target_filename[idx] = deriv if not (roi_params["suffix"] is None) and roi_params["suffix"] in deriv: roi_filename = [deriv] if (not any(target_filename)) or ( not (roi_params["suffix"] is None) and (roi_filename is None)): continue metadata = df_sub.to_dict(orient='records')[0] metadata['contrast'] = contrast if len(bounding_box_dict): # Take only one bounding box for cropping metadata['bounding_box'] = bounding_box_dict[str( df_sub['path'].values[0])][0] if metadata_choice == 'mri_params': if not all([ imed_film.check_isMRIparam(m, metadata, subject, self.metadata) for m in self.metadata.keys() ]): continue # Get subj_id (prefix filename without modality suffix and extension) subj_id = re.sub(r'_' + df_sub['suffix'].values[0] + '.*', '', subject) self.filename_pairs.append( (subj_id, [df_sub['path'].values[0]], target_filename, roi_filename, [metadata])) list_patients.append(subj_id) self.slice_axis = slice_axis self.slice_filter_fn = slice_filter_fn # Update HDF5 metadata with h5py.File(self.path_hdf5, "w") as hdf5_file: hdf5_file.attrs.create('patients_id', list(set(list_patients)), dtype=self.dt) hdf5_file.attrs['slice_axis'] = slice_axis hdf5_file.attrs['slice_filter_fn'] = [('filter_empty_input', True), ('filter_empty_mask', False)] hdf5_file.attrs['metadata_choice'] = metadata_choice # Save images into HDF5 file self._load_filenames() print("Files loaded.")
def __init__(self, bids_df, path_data, subject_lst, target_suffix, contrast_params, slice_axis=2, cache=True, transform=None, metadata_choice=False, slice_filter_fn=None, roi_params=None, multichannel=False, object_detection_params=None, task="segmentation", soft_gt=False): path_data = imed_utils.format_path_data(path_data) self.roi_params = roi_params if roi_params is not None else {"suffix": None, "slice_filter_roi": None} self.soft_gt = soft_gt self.filename_pairs = [] if metadata_choice == 'mri_params': self.metadata = {"FlipAngle": [], "RepetitionTime": [], "EchoTime": [], "Manufacturer": []} # Sort subject_lst and create a sub-dataframe from bids_df containing only subjects from subject_lst subject_lst = sorted(subject_lst) df_subjects = bids_df.df[bids_df.df['filename'].isin(subject_lst)] # Backward compatibility for subject_lst containing participant_ids instead of filenames if df_subjects.empty: df_subjects = bids_df.df[bids_df.df['participant_id'].isin(subject_lst)] subject_lst = sorted(df_subjects['filename'].to_list()) # Create a dictionary with the number of subjects for each contrast of contrast_balance tot = {contrast: df_subjects['suffix'].str.fullmatch(contrast).value_counts()[True] for contrast in contrast_params["balance"].keys()} # Create a counter that helps to balance the contrasts c = {contrast: 0 for contrast in contrast_params["balance"].keys()} # Get a list of subject_ids for multichannel_subjects (prefix filename without modality suffix and extension) subject_ids = [] for subject in subject_lst: suffix = df_subjects.loc[df_subjects['filename'] == subject]['suffix'].values[0] subject_ids.append(re.sub(r'_' + suffix + '.*', '', subject)) subject_ids = list(set(subject_ids)) # Create multichannel_subjects dictionary for each subject_id multichannel_subjects = {} if multichannel: num_contrast = len(contrast_params["contrast_lst"]) idx_dict = {} for idx, contrast in enumerate(contrast_params["contrast_lst"]): idx_dict[contrast] = idx multichannel_subjects = {subject: {"absolute_paths": [None] * num_contrast, "deriv_path": None, "roi_filename": None, "metadata": [None] * num_contrast} for subject in subject_ids} # Get all subjects path from bids_df for bounding box get_all_subj_path = bids_df.df[bids_df.df['filename'] .str.contains('|'.join(bids_df.get_subject_fnames()))]['path'].to_list() # Load bounding box from list of path bounding_box_dict = imed_obj_detect.load_bounding_boxes(object_detection_params, get_all_subj_path, slice_axis, contrast_params["contrast_lst"]) # Get all derivatives filenames from bids_df all_deriv = bids_df.get_deriv_fnames() # Create filename_pairs for subject in tqdm(subject_lst, desc="Loading dataset"): df_sub = df_subjects.loc[df_subjects['filename'] == subject] # Training & Validation: do not consider the contrasts over the threshold contained in contrast_balance contrast = df_sub['suffix'].values[0] if contrast in (contrast_params["balance"].keys()): c[contrast] = c[contrast] + 1 if c[contrast] / tot[contrast] > contrast_params["balance"][contrast]: continue if isinstance(target_suffix[0], str): target_filename, roi_filename = [None] * len(target_suffix), None else: target_filename, roi_filename = [[] for _ in range(len(target_suffix))], None derivatives = bids_df.df[bids_df.df['filename'] .str.contains('|'.join(bids_df.get_derivatives(subject, all_deriv)))]['path'].to_list() for deriv in derivatives: for idx, suffix_list in enumerate(target_suffix): # If suffix_list is a string, then only one rater annotation per class is available. # Otherwise, multiple raters segmented the same class. if isinstance(suffix_list, list): for suffix in suffix_list: if suffix in deriv: target_filename[idx].append(deriv) elif suffix_list in deriv: target_filename[idx] = deriv if not (self.roi_params["suffix"] is None) and self.roi_params["suffix"] in deriv: roi_filename = [deriv] if (not any(target_filename)) or (not (self.roi_params["suffix"] is None) and (roi_filename is None)): continue metadata = df_sub.to_dict(orient='records')[0] metadata['contrast'] = contrast if len(bounding_box_dict): # Take only one bounding box for cropping metadata['bounding_box'] = bounding_box_dict[str(df_sub['path'].values[0])][0] if metadata_choice == 'mri_params': if not all([imed_film.check_isMRIparam(m, metadata, subject, self.metadata) for m in self.metadata.keys()]): continue elif metadata_choice and metadata_choice != 'contrasts' and metadata_choice is not None: # add custom data to metadata if metadata_choice not in df_sub.columns: raise ValueError("The following metadata cannot be found: {}. " "Invalid metadata choice.".format(metadata_choice)) metadata[metadata_choice] = df_sub[metadata_choice].values[0] # Create metadata dict for OHE data_lst = sorted(set(bids_df.df[metadata_choice].dropna().values)) metadata_dict = {} for idx, data in enumerate(data_lst): metadata_dict[data] = idx metadata['metadata_dict'] = metadata_dict # Fill multichannel dictionary # subj_id is the filename without modality suffix and extension if multichannel: idx = idx_dict[df_sub['suffix'].values[0]] subj_id = re.sub(r'_' + df_sub['suffix'].values[0] + '.*', '', subject) multichannel_subjects[subj_id]["absolute_paths"][idx] = df_sub['path'].values[0] multichannel_subjects[subj_id]["deriv_path"] = target_filename multichannel_subjects[subj_id]["metadata"][idx] = metadata if roi_filename: multichannel_subjects[subj_id]["roi_filename"] = roi_filename else: self.filename_pairs.append(([df_sub['path'].values[0]], target_filename, roi_filename, [metadata])) if multichannel: for subject in multichannel_subjects.values(): if None not in subject["absolute_paths"]: self.filename_pairs.append((subject["absolute_paths"], subject["deriv_path"], subject["roi_filename"], subject["metadata"])) if self.filename_pairs == []: raise Exception('No subjects were selected - check selection of parameters on config.json (e.g. center selected + target_suffix)') super().__init__(self.filename_pairs, slice_axis, cache, transform, slice_filter_fn, task, self.roi_params, self.soft_gt)