def compute_max_patient_shape(self): """ Computes various shape statistics (min, max, and mean) and ONLY returns the max_patient_shape Args: ... Returns: max_patient_shape: tuple representing the maximum patient shape """ print("Computing shape statistics...") # iterating through entire dataset shape_list = [] for id in self.list_IDs: x_train = load_data(os.path.join(self.data_dirs[0], id)) shape_list.append(np.asarray(x_train.shape)) shapes = np.stack(shape_list) # computing stats max_patient_shape = tuple(np.max(shapes, axis=0)) mean_patient_shape = tuple(np.mean(shapes, axis=0)) min_patient_shape = tuple(np.min(shapes, axis=0)) print("Max Patient Shape: ", max_patient_shape, "\nMean Patient Shape: ", mean_patient_shape, "\nMin Patient Shape: ", min_patient_shape) # Running a quick check on a possible fail case try: assert len(max_patient_shape) == self.ndim except AssertionError: print( "Excluding the channels dimension (axis = -1) for the maximum patient shape." ) max_patient_shape = max_patient_shape[:-1] return max_patient_shape
def data_gen(self, list_IDs_temp, pos_sample): """ Generates a batch of data. Args: list_IDs_temp: batched list IDs; usually done by __getitem__ pos_sample: boolean on if you want to sample a positive image or not Returns: tuple of two numpy arrays: x, y """ images_x = [] images_y = [] for id in list_IDs_temp: # loads data as a numpy arr and then changes the type to float32 x_train = load_data(os.path.join(self.data_dirs[0], id)) y_train = load_data(os.path.join(self.data_dirs[1], id)) if not x_train.shape[-1] == self.n_channels: # Adds channel in case there is no channel dimension x_train = add_channel(x_train) if not y_train.shape[-1] == self.n_channels: # Adds channel in case there is no channel dimension y_train = add_channel(y_train) if self.n_classes > 1: # no point to run this when binary (foreground/background) y_train = get_multi_class_labels(y_train, n_labels=self.n_classes, remove_background=True) # Padding to the max patient shape (so the arrays can be stacked) if self.dynamic_padding_z: # for when you don't want to pad the slice dimension (bc that usually changes in images) pad_shape = (x_train.shape[0], ) + self.max_patient_shape elif not self.dynamic_padding_z: pad_shape = self.max_patient_shape x_train = reshape(x_train, x_train.min(), pad_shape + (self.n_channels, )) y_train = reshape(y_train, 0, pad_shape + (self.n_classes, )) assert sanity_checks(x_train, y_train) # extracting slice: if pos_sample: slice_idx = get_positive_idx(y_train)[0] elif not pos_sample: slice_idx = get_random_slice_idx(x_train) images_x.append(x_train[slice_idx]), images_y.append( y_train[slice_idx]) input_data, seg_masks = np.stack(images_x), np.stack(images_y) return (input_data, seg_masks)
def test_load_data_nii_gz(self): """ Tests that load_data can actually load all its supported data formats * 'nii': data is a .nii or .nii.gz file """ fnames_niigz = os.listdir(self.train_path) load_niigz = load_data(os.path.join(self.train_path, fnames_niigz[0])) self.assertTrue(True)
def test_load_data_npy(self): """ Tests that load_data can actually load all its supported data formats * 'npy': data is a .npy file """ fnames_npy = os.listdir(self.train_npy_path) load_npy = load_data(os.path.join(self.train_npy_path, fnames_npy[0])) self.assertTrue(True)
def data_gen(self, list_IDs_temp): """ Generates a batch of data. Args: list_IDs_temp: batched list IDs; usually done by __getitem__ pos_sample: boolean on if you want to sample a positive image or not Returns: tuple of two numpy arrays: x, y """ images_x = [] images_y = [] for id in list_IDs_temp: # loads data as a numpy arr and then changes the type to float32 x_train = load_data(os.path.join(self.data_dirs[0], id)) y_train = load_data(os.path.join(self.data_dirs[1], id)) if not x_train.shape[-1] == self.n_channels: # Adds channel in case there is no channel dimension x_train = add_channel(x_train) assert len( x_train.shape) == self.ndim + 1, "Input shape must be the \ shape (x,y, n_channels) or (x, y, z, n_channels)" if not y_train.shape[-1] == self.n_channels: # Adds channel in case there is no channel dimension y_train = add_channel(y_train) assert len(y_train.shape ) == self.ndim + 1, "Input labels must be the \ shape (x,y, n_channels) or (x, y, z, n_channels)" if self.n_classes > 1: # no point to run this when binary (foreground/background) y_train = get_multi_class_labels(y_train, n_labels=self.n_classes, remove_background=True) # Padding to the max patient shape (so the arrays can be stacked) x_train = reshape(x_train, x_train.min(), self.max_patient_shape + (self.n_channels, )) y_train = reshape(y_train, 0, self.max_patient_shape + (self.n_classes, )) # x_train.resize(max_patient_shape + (self.n_channels, )), y_train.resize(max_patient_shape + (self.n_classes, )) assert sanity_checks(x_train, y_train) images_x.append(x_train), images_y.append(y_train) input_data, seg_masks = np.stack(images_x), np.stack(images_y) return (input_data, seg_masks)
def compute_pad_value(input_dir, list_IDs): """ Computes the minimum pixel intensity of the entire dataset for the pad value (if it's not 0) Args: input_dir: directory to input images list_IDs: list of filenames """ print("Computing min/pad value...") # iterating through entire dataset min_list = [] for id in list_IDs: x_train = load_data(os.path.join(input_dir, id)) min_list.append(x_train.min()) return np.asarray(min_list).min()