def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None: index_expr = params['index_expr'] # type: expr.IndexExpression # Make sure all indexing is done with slices (Example: (16,) will be changed to (slice(16, 17, None),) which # is equivalent), otherwise the following steps will be wrong; . if any(isinstance(s, int) for s in index_expr.expression): index_expr.set_indexing([slice(s, s + 1) if isinstance(s, int) else s for s in index_expr.expression]) padded_indexing = np.asarray(index_expr.get_indexing()) + self.index_diffs padded_shape = tuple((padded_indexing[:, 1] - padded_indexing[:, 0]).tolist()) sub_indexing = padded_indexing.copy() sub_indexing[padded_indexing > 0] = 0 sub_indexing = -sub_indexing padded_indexing[padded_indexing < 0] = 0 # cannot slice outside the boundary padded_index_expr = expr.IndexExpression(padded_indexing.tolist()) padded_params = params.copy() padded_params['index_expr'] = padded_index_expr self.extractor.extract(reader, padded_params, extracted) categories = self.extractor.categories if hasattr(self.extractor, 'categories') else [self.extractor.category] for category in categories: data = extracted[category] full_pad_shape = padded_shape + data.shape[len(padded_shape):] pad_data = np.zeros(full_pad_shape, dtype=data.dtype) sub_indexing[:, 1] = sub_indexing[:, 0] + data.shape[:sub_indexing.shape[0]] sub_index_expr = expr.IndexExpression(sub_indexing.tolist()) pad_data[sub_index_expr.expression] = data extracted[category] = pad_data
def on_subject(self, params: dict): subject_index = params['subject_index'] properties = params['{}_properties'.format(self.category)] # type: conv.ImageProperties self.writer.fill(df.INFO_SHAPE, properties.size, expr.IndexExpression(subject_index)) self.writer.fill(df.INFO_ORIGIN, properties.origin, expr.IndexExpression(subject_index)) self.writer.fill(df.INFO_DIRECTION, properties.direction, expr.IndexExpression(subject_index)) self.writer.fill(df.INFO_SPACING, properties.spacing, expr.IndexExpression(subject_index))
def on_subject(self, params: dict): subject_index = params[defs.KEY_SUBJECT_INDEX] properties = params[defs.KEY_PLACEHOLDER_PROPERTIES.format( self.category)] # type: conv.ImageProperties self.writer.fill(defs.LOC_IMGPROP_SHAPE, properties.size, expr.IndexExpression(subject_index)) self.writer.fill(defs.LOC_IMGPROP_ORIGIN, properties.origin, expr.IndexExpression(subject_index)) self.writer.fill(defs.LOC_IMGPROP_DIRECTION, properties.direction, expr.IndexExpression(subject_index)) self.writer.fill(defs.LOC_IMGPROP_SPACING, properties.spacing, expr.IndexExpression(subject_index))
def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None: """see :meth:`.Extractor.extract`""" subject_index_expr = expr.IndexExpression( params[defs.KEY_SUBJECT_INDEX]) shape = reader.read(defs.LOC_IMGPROP_SHAPE, subject_index_expr).tolist() direction = reader.read(defs.LOC_IMGPROP_DIRECTION, subject_index_expr).tolist() spacing = reader.read(defs.LOC_IMGPROP_SPACING, subject_index_expr).tolist() origin = reader.read(defs.LOC_IMGPROP_ORIGIN, subject_index_expr).tolist() # todo: everything in memory? image = sitk.Image(shape, sitk.sitkUInt8) image.SetDirection(direction) image.SetSpacing(spacing) image.SetOrigin(origin) # todo number_of_components_per_pixel and pixel_id img_properties = conv.ImageProperties(image) if self.do_pickle: # pickle to prevent from problems since own class img_properties = pickle.dumps(img_properties) extracted[defs.KEY_PROPERTIES] = img_properties
def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None: """see :meth:`.Extractor.extract`""" index_expr = params[defs.KEY_INDEX_EXPR] # type: expr.IndexExpression subject_index_expr = expr.IndexExpression( params[defs.KEY_SUBJECT_INDEX]) if self.cached_file_root is None: self.cached_file_root = byte_converter.convert_to_string( reader.read(defs.LOC_FILES_ROOT)) file_root = self.cached_file_root for category in self.categories: rel_file_paths = byte_converter.convert_to_string( reader.read(defs.LOC_FILES_PLACEHOLDER.format(category), subject_index_expr)) loaded = [] for rel_file_path in rel_file_paths: file_path = os.path.join(file_root, rel_file_path) loaded.append(self.load_fn(file_path, category)) data = np.stack(loaded, axis=-1) if not self.ignore_indexing: data = data[index_expr.expression] extracted[category] = data
def on_subject(self, params: dict): subject_files = params['subject_files'] subject_index = params['subject_index'] subject = subject_files[subject_index].subject self.writer.fill(df.SUBJECT, subject, expr.IndexExpression(subject_index))
def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None: """see :meth:`.Extractor.extract`""" extracted[defs.KEY_SUBJECT_INDEX] = params[defs.KEY_SUBJECT_INDEX] subject_index_expr = expr.IndexExpression( params[defs.KEY_SUBJECT_INDEX]) extracted[defs.KEY_SUBJECT] = byte_converter.convert_to_string( reader.read(defs.LOC_SUBJECT, subject_index_expr))
def fill(self, entry: str, data, index: expr.IndexExpression=None): # special string handling (in order not to use length limited strings) if self.h5[entry].dtype is self.str_type: data = np.asarray(data, dtype=object) if index is None: index = expr.IndexExpression() self.h5[entry][index.expression] = data
def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None: subject_index_expr = expr.IndexExpression(params['subject_index']) shape = reader.read(df.INFO_SHAPE, subject_index_expr) if self.numpy_format: tmp = shape[0] shape[0] = shape[-1] shape[-1] = tmp extracted['shape'] = tuple(shape.tolist())
def zero_pad(data: np.ndarray, pad_shape, sub_indexing): pad_data = np.zeros(pad_shape, dtype=data.dtype) sub_indexing[:, 1] = sub_indexing[:, 0] + data.shape[:sub_indexing.shape[0]] sub_index_expr = expr.IndexExpression(sub_indexing.tolist()) pad_data[sub_index_expr.expression] = data return pad_data
def __call__(self, shape) -> typing.List[expr.IndexExpression]: if self.shape == shape: return self.indexing self.shape = shape # save for later comparison to avoid calculating indices if the shape is equal shape_without_voxel = shape[0:self.image_dimension] indices = np.indices(shape_without_voxel) indices = indices.reshape((indices.shape[0], np.prod(indices.shape[1:]))) indices = indices.transpose() self.indexing = [expr.IndexExpression(idx.tolist()) for idx in indices] return self.indexing
def on_subject(self, params: dict): subject_index = params['subject_index'] subject_files = params['subject_files'] subject_file = subject_files[subject_index] # type: subj.SubjectFile for category in params['categories']: for index, file_name in enumerate(subject_file.categories[category].entries.values()): relative_path = os.path.relpath(file_name, self.file_root) index_expr = expr.IndexExpression(indexing=[subject_index, index], axis=(0, 1)) self.writer.fill(df.FILES_PLACEHOLDER.format(category), relative_path, index_expr)
def on_subject(self, params: dict): subject_files = params['subject_files'] subject_index = params['subject_index'] subject_file = subject_files[subject_index] # type: subj.SubjectFile first_image_path = list( subject_file.categories['images'].entries.values())[0] grade_str = os.path.basename( os.path.dirname(os.path.dirname(first_image_path))) self.writer.fill('meta/grades', grade_str, expr.IndexExpression(subject_index))
def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None: """see :meth:`.Extractor.extract`""" subject_index_expr = expr.IndexExpression( params[defs.KEY_SUBJECT_INDEX]) shape = reader.read(defs.LOC_IMGPROP_SHAPE, subject_index_expr) if self.numpy_format: tmp = shape[0] shape[0] = shape[-1] shape[-1] = tmp extracted[defs.KEY_SHAPE] = tuple(shape.tolist())
def on_subject(self, params: dict): subject_files = params[defs.KEY_SUBJECT_FILES] subject_index = params[defs.KEY_SUBJECT_INDEX] # subject identifier/name subject = subject_files[subject_index].subject self.writer.fill(defs.LOC_SUBJECT, subject, expr.IndexExpression(subject_index)) # reserve memory for shape, not in on_start since ndim not known if not self.reserved_for_shape: for category in params[defs.KEY_CATEGORIES]: self.writer.reserve( defs.LOC_SHAPE_PLACEHOLDER.format(category), (len(subject_files), params[category].ndim), dtype=np.uint16) self.reserved_for_shape = True for category in params[defs.KEY_CATEGORIES]: shape = params[category].shape self.writer.fill(defs.LOC_SHAPE_PLACEHOLDER.format(category), shape, expr.IndexExpression(subject_index))
def __call__(self, shape) -> typing.List[pymia_expr.IndexExpression]: if self.shape == shape: return self.indexing self.shape = shape # save for later comparison to avoid calculating indices if the shape is equal size = shape[0] if size < self.no_points: raise ValueError('Shape of size {} contains not {} point'.format( size, self.no_points)) self.indexing = [] for idx in range(0, self.no_points * (size // self.no_points), self.no_points): # do expression self.indexing.append( pymia_expr.IndexExpression((idx, idx + self.no_points))) self.indexing.append( pymia_expr.IndexExpression( (size - self.no_points, size))) # will overlap with last added return self.indexing
def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None: if self.entry_base_names is None: entries = reader.get_subject_entries() self.entry_base_names = [ entry.rsplit('/', maxsplit=1)[1] for entry in entries ] subject_index = params['subject_index'] index_expr = params['index_expr'] # type: expr.IndexExpression padded_indexing = np.asarray( index_expr.get_indexing()) + self.index_diffs padded_shape = tuple( (padded_indexing[:, 1] - padded_indexing[:, 0]).tolist()) sub_indexing = padded_indexing.copy() sub_indexing[padded_indexing > 0] = 0 sub_indexing = -sub_indexing padded_indexing[ padded_indexing < 0] = 0 # cannot slice outside the boundary padded_index_expr = expr.IndexExpression(padded_indexing.tolist()) base_name = self.entry_base_names[subject_index] for category in self.categories: data = reader.read( '{}/{}'.format(df.DATA_PLACEHOLDER.format(category), base_name), padded_index_expr) full_pad_shape = padded_shape + data.shape[len(padded_shape):] pad_data = np.zeros(full_pad_shape, dtype=data.dtype) sub_indexing[:, 1] = sub_indexing[:, 0] + data.shape[:sub_indexing. shape[0]] sub_index_expr = expr.IndexExpression(sub_indexing.tolist()) pad_data[sub_index_expr.expression] = data extracted[category] = pad_data
def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None: subject_index_expr = expr.IndexExpression(params['subject_index']) if not self.cache or self.cached_file_root is None: file_root = reader.read(df.FILES_ROOT) self.cached_file_root = file_root else: file_root = self.cached_file_root extracted['file_root'] = file_root for category in self.categories: extracted['{}_files'.format(category)] = reader.read(df.FILES_PLACEHOLDER.format(category), subject_index_expr)
def on_subject(self, params: dict): """see :meth:`.Callback.on_subject`.""" subject_index = params[defs.KEY_SUBJECT_INDEX] subject_files = params[defs.KEY_SUBJECT_FILES] subject_file = subject_files[subject_index] # type: subj.SubjectFile for category in params[defs.KEY_CATEGORIES]: for index, file_name in enumerate( subject_file.categories[category].entries.values()): relative_path = os.path.relpath(file_name, self.file_root) index_expr = expr.IndexExpression( indexing=[subject_index, index], axis=(0, 1)) self.writer.fill(defs.LOC_FILES_PLACEHOLDER.format(category), relative_path, index_expr)
def __call__(self, shape) -> typing.List[expr.IndexExpression]: if shape == self.prev_shape: return self.prev_indexing shape_without_voxel = shape[:self.image_dimension] index_count = np.divide(shape_without_voxel, self.patch_shape) index_count = np.floor(index_count) if self.ignore_incomplete else np.ceil(index_count) index_count = index_count.astype('int') indices = np.indices(index_count).reshape(index_count.size, -1).T index_ranges = np.stack([indices, indices + 1], axis=-1) index_ranges *= np.asarray(self.patch_shape)[np.newaxis, :, np.newaxis] indexing = [expr.IndexExpression(idx.tolist()) for idx in index_ranges] self.prev_indexing = indexing self.prev_shape = shape return indexing
def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None: """see :meth:`.Extractor.extract`""" subject_index_expr = expr.IndexExpression( params[defs.KEY_SUBJECT_INDEX]) if not self.cache or self.cached_file_root is None: file_root = reader.read(defs.LOC_FILES_ROOT) self.cached_file_root = file_root else: file_root = self.cached_file_root extracted[defs.KEY_FILE_ROOT] = file_root for category in self.categories: extracted[defs.KEY_PLACEHOLDER_FILES.format( category)] = reader.read( defs.LOC_FILES_PLACEHOLDER.format(category), subject_index_expr)
def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None: """see :meth:`.Extractor.extract`""" index_expr = params[defs.KEY_INDEX_EXPR] # type: expr.IndexExpression # Make sure all indexing is done with slices (Example: (16,) will be changed to (slice(16, 17, None),) which # is equivalent), otherwise the following steps will be wrong; . if any(isinstance(s, int) for s in index_expr.expression): index_expr.set_indexing([ slice(s, s + 1) if isinstance(s, int) else s for s in index_expr.expression ]) padded_indexing = np.asarray( index_expr.get_indexing()) + self.index_diffs padded_shape = tuple( (padded_indexing[:, 1] - padded_indexing[:, 0]).tolist()) sub_indexing = padded_indexing.copy() sub_indexing[padded_indexing > 0] = 0 sub_indexing = -sub_indexing padded_indexing[ padded_indexing < 0] = 0 # cannot slice outside the boundary in negative (but positive works!) padded_index_expr = expr.IndexExpression(padded_indexing.tolist()) padded_params = params.copy() padded_params[defs.KEY_INDEX_EXPR] = padded_index_expr self.extractor.extract(reader, padded_params, extracted) categories = self.extractor.categories if hasattr( self.extractor, 'categories') else [self.extractor.category] for category in categories: data = extracted[category] full_pad_shape = padded_shape + data.shape[len(padded_shape):] if full_pad_shape != data.shape: # we could not fully extract the padded shape, use pad_fn to pad data extracted[category] = self.pad_fn(data, full_pad_shape, sub_indexing)
def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None: subject_index_expr = expr.IndexExpression(params['subject_index']) shape = reader.read(df.INFO_SHAPE, subject_index_expr).tolist() direction = reader.read(df.INFO_DIRECTION, subject_index_expr).tolist() spacing = reader.read(df.INFO_SPACING, subject_index_expr).tolist() origin = reader.read(df.INFO_ORIGIN, subject_index_expr).tolist() # todo: everything in memory? image = sitk.Image(shape, sitk.sitkUInt8) image.SetDirection(direction) image.SetSpacing(spacing) image.SetOrigin(origin) # todo number_of_components_per_pixel and pixel_id img_properties = conv.ImageProperties(image) if self.do_pickle: # pickle to prevent from problems since own class img_properties = pickle.dumps(img_properties) extracted['properties'] = img_properties
def on_sample_ensure_index_expression_validity(params: dict): """Ensures the validity of index expressions and the data for array slicing. This callback can be used in case :py:class:`PatchWiseIndexing` is used with argument `ignore_incomplete=True`. Note that currently only the upper boundaries are checked as it is implemented in the :py:class:`PatchWiseIndexing`. """ key = '__prediction' data = params[key] idx = params['batch_idx'] batch = params['batch'] predictions = params['predictions'] subject_index = batch['subject_index'][idx] index_expr = batch['index_expr'][idx] if isinstance(index_expr, bytes): index_expr = pickle.loads(index_expr) valid_index_expr = [] is_valid = True for idx, slicer in enumerate(index_expr.expression): if type(slicer) == slice: if slicer.stop > predictions[subject_index][key].shape[idx]: valid_stop = predictions[subject_index][key].shape[idx] is_valid = False else: valid_stop = slicer.stop valid_index_expr.append([slicer.start, valid_stop]) else: break if is_valid: return data, index_expr else: valid_index_expr = expr.IndexExpression(valid_index_expr) valid_data = data[0:valid_index_expr.expression[1].stop - valid_index_expr.expression[1].start, 0:valid_index_expr.expression[2].stop - valid_index_expr.expression[2].start, :] return valid_data, valid_index_expr
def direct_extract(self, extractor: extr.Extractor, subject_index: int, index_expr: expr.IndexExpression = None, transform: tfm.Transform = None): """Extract data directly, bypassing the extractors and transforms of the instance. The purpose of this method is to enable extraction of data that is not required for every data chunk (e.g., slice, patch, sub-volume) but only from time to time e.g., image shape, origin. Args: extractor (.Extractor): Extractor or multiple extractors (:class:`.ComposeExtractor`) extracting the desired data from the dataset. subject_index (int): Index of the subject to be extracted. index_expr (.IndexExpression): The indexing to extract a chunk of data only. Not required if only image related information (e.g., image shape, origin) should be extracted. Needed when desiring a chunk of data (e.g., slice, patch, sub-volume). transform (.Transform): Transformation(s) to be applied to the extracted data. Returns: dict: Extracted data in a dictionary. Keys are defined by the used :class:`.Extractor`. """ if index_expr is None: index_expr = expr.IndexExpression() params = {defs.KEY_SUBJECT_INDEX: subject_index, defs.KEY_INDEX_EXPR: index_expr} extracted = {} if not self.init_reader_once: with rd.get_reader(self.dataset_path) as reader: extractor.extract(reader, params, extracted) else: if self.reader is None: self.reader = rd.get_reader(self.dataset_path, direct_open=True) extractor.extract(self.reader, params, extracted) if transform: extracted = transform(extracted) return extracted
def direct_extract(self, extractor: extr.Extractor, subject_index: int, index_expr: expr.IndexExpression = None, transform: tfm.Transform = None): if index_expr is None: index_expr = expr.IndexExpression() params = {'subject_index': subject_index, 'index_expr': index_expr} extracted = {} if not self.init_reader_once: with rd.get_reader(self.dataset_path) as reader: extractor.extract(reader, params, extracted) else: if self.reader is None: self.reader = rd.get_reader(self.dataset_path, direct_open=True) extractor.extract(self.reader, params, extracted) if transform: extracted = transform(extracted) return extracted
def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None: extracted['subject_index'] = params['subject_index'] subject_index_expr = expr.IndexExpression(params['subject_index']) extracted['subject'] = reader.read(df.SUBJECT, subject_index_expr)
def __call__(self, shape) -> typing.List[expr.IndexExpression]: indexing = [] for axis in self.slice_axis: indexing.extend(expr.IndexExpression(i, axis) for i in range(shape[axis])) return indexing
def __call__(self, shape) -> typing.List[expr.IndexExpression]: return [expr.IndexExpression()]
def get_shape(self, subject_index: int) -> list: """see :meth:`.Reader.get_shape`""" return self.read(defs.LOC_SHAPE_PLACEHOLDER.format(self.category), expr.IndexExpression(subject_index)).tolist()