Exemplo n.º 1
0
    def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None:
        index_expr = params['index_expr']  # type: expr.IndexExpression

        # Make sure all indexing is done with slices (Example: (16,) will be changed to (slice(16, 17, None),) which
        # is equivalent), otherwise the following steps will be wrong; .
        if any(isinstance(s, int) for s in index_expr.expression):
            index_expr.set_indexing([slice(s, s + 1) if isinstance(s, int) else s for s in index_expr.expression])

        padded_indexing = np.asarray(index_expr.get_indexing()) + self.index_diffs
        padded_shape = tuple((padded_indexing[:, 1] - padded_indexing[:, 0]).tolist())

        sub_indexing = padded_indexing.copy()
        sub_indexing[padded_indexing > 0] = 0
        sub_indexing = -sub_indexing

        padded_indexing[padded_indexing < 0] = 0  # cannot slice outside the boundary
        padded_index_expr = expr.IndexExpression(padded_indexing.tolist())

        padded_params = params.copy()
        padded_params['index_expr'] = padded_index_expr
        self.extractor.extract(reader, padded_params, extracted)

        categories = self.extractor.categories if hasattr(self.extractor, 'categories') else [self.extractor.category]

        for category in categories:
            data = extracted[category]

            full_pad_shape = padded_shape + data.shape[len(padded_shape):]
            pad_data = np.zeros(full_pad_shape, dtype=data.dtype)
            sub_indexing[:, 1] = sub_indexing[:, 0] + data.shape[:sub_indexing.shape[0]]
            sub_index_expr = expr.IndexExpression(sub_indexing.tolist())

            pad_data[sub_index_expr.expression] = data
            extracted[category] = pad_data
Exemplo n.º 2
0
    def on_subject(self, params: dict):
        subject_index = params['subject_index']
        properties = params['{}_properties'.format(self.category)]  # type: conv.ImageProperties

        self.writer.fill(df.INFO_SHAPE, properties.size, expr.IndexExpression(subject_index))
        self.writer.fill(df.INFO_ORIGIN, properties.origin, expr.IndexExpression(subject_index))
        self.writer.fill(df.INFO_DIRECTION, properties.direction, expr.IndexExpression(subject_index))
        self.writer.fill(df.INFO_SPACING, properties.spacing, expr.IndexExpression(subject_index))
Exemplo n.º 3
0
    def on_subject(self, params: dict):
        subject_index = params[defs.KEY_SUBJECT_INDEX]
        properties = params[defs.KEY_PLACEHOLDER_PROPERTIES.format(
            self.category)]  # type: conv.ImageProperties

        self.writer.fill(defs.LOC_IMGPROP_SHAPE, properties.size,
                         expr.IndexExpression(subject_index))
        self.writer.fill(defs.LOC_IMGPROP_ORIGIN, properties.origin,
                         expr.IndexExpression(subject_index))
        self.writer.fill(defs.LOC_IMGPROP_DIRECTION, properties.direction,
                         expr.IndexExpression(subject_index))
        self.writer.fill(defs.LOC_IMGPROP_SPACING, properties.spacing,
                         expr.IndexExpression(subject_index))
Exemplo n.º 4
0
    def extract(self, reader: rd.Reader, params: dict,
                extracted: dict) -> None:
        """see :meth:`.Extractor.extract`"""
        subject_index_expr = expr.IndexExpression(
            params[defs.KEY_SUBJECT_INDEX])

        shape = reader.read(defs.LOC_IMGPROP_SHAPE,
                            subject_index_expr).tolist()
        direction = reader.read(defs.LOC_IMGPROP_DIRECTION,
                                subject_index_expr).tolist()
        spacing = reader.read(defs.LOC_IMGPROP_SPACING,
                              subject_index_expr).tolist()
        origin = reader.read(defs.LOC_IMGPROP_ORIGIN,
                             subject_index_expr).tolist()

        # todo: everything in memory?
        image = sitk.Image(shape, sitk.sitkUInt8)
        image.SetDirection(direction)
        image.SetSpacing(spacing)
        image.SetOrigin(origin)
        # todo number_of_components_per_pixel and pixel_id

        img_properties = conv.ImageProperties(image)
        if self.do_pickle:
            # pickle to prevent from problems since own class
            img_properties = pickle.dumps(img_properties)
        extracted[defs.KEY_PROPERTIES] = img_properties
Exemplo n.º 5
0
    def extract(self, reader: rd.Reader, params: dict,
                extracted: dict) -> None:
        """see :meth:`.Extractor.extract`"""
        index_expr = params[defs.KEY_INDEX_EXPR]  # type: expr.IndexExpression
        subject_index_expr = expr.IndexExpression(
            params[defs.KEY_SUBJECT_INDEX])

        if self.cached_file_root is None:
            self.cached_file_root = byte_converter.convert_to_string(
                reader.read(defs.LOC_FILES_ROOT))

        file_root = self.cached_file_root

        for category in self.categories:
            rel_file_paths = byte_converter.convert_to_string(
                reader.read(defs.LOC_FILES_PLACEHOLDER.format(category),
                            subject_index_expr))

            loaded = []
            for rel_file_path in rel_file_paths:
                file_path = os.path.join(file_root, rel_file_path)
                loaded.append(self.load_fn(file_path, category))
            data = np.stack(loaded, axis=-1)
            if not self.ignore_indexing:
                data = data[index_expr.expression]
            extracted[category] = data
Exemplo n.º 6
0
    def on_subject(self, params: dict):
        subject_files = params['subject_files']
        subject_index = params['subject_index']

        subject = subject_files[subject_index].subject
        self.writer.fill(df.SUBJECT, subject,
                         expr.IndexExpression(subject_index))
Exemplo n.º 7
0
 def extract(self, reader: rd.Reader, params: dict,
             extracted: dict) -> None:
     """see :meth:`.Extractor.extract`"""
     extracted[defs.KEY_SUBJECT_INDEX] = params[defs.KEY_SUBJECT_INDEX]
     subject_index_expr = expr.IndexExpression(
         params[defs.KEY_SUBJECT_INDEX])
     extracted[defs.KEY_SUBJECT] = byte_converter.convert_to_string(
         reader.read(defs.LOC_SUBJECT, subject_index_expr))
Exemplo n.º 8
0
    def fill(self, entry: str, data, index: expr.IndexExpression=None):
        # special string handling (in order not to use length limited strings)
        if self.h5[entry].dtype is self.str_type:
            data = np.asarray(data, dtype=object)

        if index is None:
            index = expr.IndexExpression()

        self.h5[entry][index.expression] = data
Exemplo n.º 9
0
    def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None:
        subject_index_expr = expr.IndexExpression(params['subject_index'])

        shape = reader.read(df.INFO_SHAPE, subject_index_expr)
        if self.numpy_format:
            tmp = shape[0]
            shape[0] = shape[-1]
            shape[-1] = tmp

        extracted['shape'] = tuple(shape.tolist())
Exemplo n.º 10
0
    def zero_pad(data: np.ndarray, pad_shape, sub_indexing):
        pad_data = np.zeros(pad_shape, dtype=data.dtype)

        sub_indexing[:,
                     1] = sub_indexing[:,
                                       0] + data.shape[:sub_indexing.shape[0]]
        sub_index_expr = expr.IndexExpression(sub_indexing.tolist())

        pad_data[sub_index_expr.expression] = data
        return pad_data
Exemplo n.º 11
0
    def __call__(self, shape) -> typing.List[expr.IndexExpression]:
        if self.shape == shape:
            return self.indexing

        self.shape = shape  # save for later comparison to avoid calculating indices if the shape is equal
        shape_without_voxel = shape[0:self.image_dimension]
        indices = np.indices(shape_without_voxel)
        indices = indices.reshape((indices.shape[0], np.prod(indices.shape[1:])))
        indices = indices.transpose()
        self.indexing = [expr.IndexExpression(idx.tolist()) for idx in indices]
        return self.indexing
Exemplo n.º 12
0
    def on_subject(self, params: dict):
        subject_index = params['subject_index']
        subject_files = params['subject_files']

        subject_file = subject_files[subject_index]  # type: subj.SubjectFile

        for category in params['categories']:
            for index, file_name in enumerate(subject_file.categories[category].entries.values()):
                relative_path = os.path.relpath(file_name, self.file_root)
                index_expr = expr.IndexExpression(indexing=[subject_index, index], axis=(0, 1))
                self.writer.fill(df.FILES_PLACEHOLDER.format(category), relative_path, index_expr)
    def on_subject(self, params: dict):
        subject_files = params['subject_files']
        subject_index = params['subject_index']

        subject_file = subject_files[subject_index]  # type: subj.SubjectFile
        first_image_path = list(
            subject_file.categories['images'].entries.values())[0]
        grade_str = os.path.basename(
            os.path.dirname(os.path.dirname(first_image_path)))
        self.writer.fill('meta/grades', grade_str,
                         expr.IndexExpression(subject_index))
Exemplo n.º 14
0
    def extract(self, reader: rd.Reader, params: dict,
                extracted: dict) -> None:
        """see :meth:`.Extractor.extract`"""
        subject_index_expr = expr.IndexExpression(
            params[defs.KEY_SUBJECT_INDEX])

        shape = reader.read(defs.LOC_IMGPROP_SHAPE, subject_index_expr)
        if self.numpy_format:
            tmp = shape[0]
            shape[0] = shape[-1]
            shape[-1] = tmp

        extracted[defs.KEY_SHAPE] = tuple(shape.tolist())
Exemplo n.º 15
0
    def on_subject(self, params: dict):
        subject_files = params[defs.KEY_SUBJECT_FILES]
        subject_index = params[defs.KEY_SUBJECT_INDEX]

        # subject identifier/name
        subject = subject_files[subject_index].subject
        self.writer.fill(defs.LOC_SUBJECT, subject,
                         expr.IndexExpression(subject_index))

        # reserve memory for shape, not in on_start since ndim not known
        if not self.reserved_for_shape:
            for category in params[defs.KEY_CATEGORIES]:
                self.writer.reserve(
                    defs.LOC_SHAPE_PLACEHOLDER.format(category),
                    (len(subject_files), params[category].ndim),
                    dtype=np.uint16)
            self.reserved_for_shape = True

        for category in params[defs.KEY_CATEGORIES]:
            shape = params[category].shape
            self.writer.fill(defs.LOC_SHAPE_PLACEHOLDER.format(category),
                             shape, expr.IndexExpression(subject_index))
    def __call__(self, shape) -> typing.List[pymia_expr.IndexExpression]:
        if self.shape == shape:
            return self.indexing

        self.shape = shape  # save for later comparison to avoid calculating indices if the shape is equal
        size = shape[0]
        if size < self.no_points:
            raise ValueError('Shape of size {} contains not {} point'.format(
                size, self.no_points))

        self.indexing = []

        for idx in range(0, self.no_points * (size // self.no_points),
                         self.no_points):
            # do expression
            self.indexing.append(
                pymia_expr.IndexExpression((idx, idx + self.no_points)))
        self.indexing.append(
            pymia_expr.IndexExpression(
                (size - self.no_points, size)))  # will overlap with last added

        return self.indexing
Exemplo n.º 17
0
    def extract(self, reader: rd.Reader, params: dict,
                extracted: dict) -> None:
        if self.entry_base_names is None:
            entries = reader.get_subject_entries()
            self.entry_base_names = [
                entry.rsplit('/', maxsplit=1)[1] for entry in entries
            ]

        subject_index = params['subject_index']
        index_expr = params['index_expr']  # type: expr.IndexExpression
        padded_indexing = np.asarray(
            index_expr.get_indexing()) + self.index_diffs

        padded_shape = tuple(
            (padded_indexing[:, 1] - padded_indexing[:, 0]).tolist())

        sub_indexing = padded_indexing.copy()
        sub_indexing[padded_indexing > 0] = 0
        sub_indexing = -sub_indexing

        padded_indexing[
            padded_indexing < 0] = 0  # cannot slice outside the boundary
        padded_index_expr = expr.IndexExpression(padded_indexing.tolist())

        base_name = self.entry_base_names[subject_index]
        for category in self.categories:
            data = reader.read(
                '{}/{}'.format(df.DATA_PLACEHOLDER.format(category),
                               base_name), padded_index_expr)

            full_pad_shape = padded_shape + data.shape[len(padded_shape):]
            pad_data = np.zeros(full_pad_shape, dtype=data.dtype)
            sub_indexing[:, 1] = sub_indexing[:, 0] + data.shape[:sub_indexing.
                                                                 shape[0]]
            sub_index_expr = expr.IndexExpression(sub_indexing.tolist())

            pad_data[sub_index_expr.expression] = data
            extracted[category] = pad_data
Exemplo n.º 18
0
    def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None:
        subject_index_expr = expr.IndexExpression(params['subject_index'])

        if not self.cache or self.cached_file_root is None:
            file_root = reader.read(df.FILES_ROOT)
            self.cached_file_root = file_root
        else:
            file_root = self.cached_file_root

        extracted['file_root'] = file_root

        for category in self.categories:
            extracted['{}_files'.format(category)] = reader.read(df.FILES_PLACEHOLDER.format(category),
                                                                 subject_index_expr)
Exemplo n.º 19
0
    def on_subject(self, params: dict):
        """see :meth:`.Callback.on_subject`."""
        subject_index = params[defs.KEY_SUBJECT_INDEX]
        subject_files = params[defs.KEY_SUBJECT_FILES]

        subject_file = subject_files[subject_index]  # type: subj.SubjectFile

        for category in params[defs.KEY_CATEGORIES]:
            for index, file_name in enumerate(
                    subject_file.categories[category].entries.values()):
                relative_path = os.path.relpath(file_name, self.file_root)
                index_expr = expr.IndexExpression(
                    indexing=[subject_index, index], axis=(0, 1))
                self.writer.fill(defs.LOC_FILES_PLACEHOLDER.format(category),
                                 relative_path, index_expr)
Exemplo n.º 20
0
    def __call__(self, shape) -> typing.List[expr.IndexExpression]:
        if shape == self.prev_shape:
            return self.prev_indexing

        shape_without_voxel = shape[:self.image_dimension]
        index_count = np.divide(shape_without_voxel, self.patch_shape)
        index_count = np.floor(index_count) if self.ignore_incomplete else np.ceil(index_count)
        index_count = index_count.astype('int')

        indices = np.indices(index_count).reshape(index_count.size, -1).T
        index_ranges = np.stack([indices, indices + 1], axis=-1)
        index_ranges *= np.asarray(self.patch_shape)[np.newaxis, :, np.newaxis]
        indexing = [expr.IndexExpression(idx.tolist()) for idx in index_ranges]

        self.prev_indexing = indexing
        self.prev_shape = shape
        return indexing
Exemplo n.º 21
0
    def extract(self, reader: rd.Reader, params: dict,
                extracted: dict) -> None:
        """see :meth:`.Extractor.extract`"""
        subject_index_expr = expr.IndexExpression(
            params[defs.KEY_SUBJECT_INDEX])

        if not self.cache or self.cached_file_root is None:
            file_root = reader.read(defs.LOC_FILES_ROOT)
            self.cached_file_root = file_root
        else:
            file_root = self.cached_file_root

        extracted[defs.KEY_FILE_ROOT] = file_root

        for category in self.categories:
            extracted[defs.KEY_PLACEHOLDER_FILES.format(
                category)] = reader.read(
                    defs.LOC_FILES_PLACEHOLDER.format(category),
                    subject_index_expr)
Exemplo n.º 22
0
    def extract(self, reader: rd.Reader, params: dict,
                extracted: dict) -> None:
        """see :meth:`.Extractor.extract`"""
        index_expr = params[defs.KEY_INDEX_EXPR]  # type: expr.IndexExpression

        # Make sure all indexing is done with slices (Example: (16,) will be changed to (slice(16, 17, None),) which
        # is equivalent), otherwise the following steps will be wrong; .
        if any(isinstance(s, int) for s in index_expr.expression):
            index_expr.set_indexing([
                slice(s, s + 1) if isinstance(s, int) else s
                for s in index_expr.expression
            ])

        padded_indexing = np.asarray(
            index_expr.get_indexing()) + self.index_diffs
        padded_shape = tuple(
            (padded_indexing[:, 1] - padded_indexing[:, 0]).tolist())

        sub_indexing = padded_indexing.copy()
        sub_indexing[padded_indexing > 0] = 0
        sub_indexing = -sub_indexing

        padded_indexing[
            padded_indexing <
            0] = 0  # cannot slice outside the boundary in negative (but positive works!)
        padded_index_expr = expr.IndexExpression(padded_indexing.tolist())

        padded_params = params.copy()
        padded_params[defs.KEY_INDEX_EXPR] = padded_index_expr
        self.extractor.extract(reader, padded_params, extracted)

        categories = self.extractor.categories if hasattr(
            self.extractor, 'categories') else [self.extractor.category]

        for category in categories:
            data = extracted[category]

            full_pad_shape = padded_shape + data.shape[len(padded_shape):]
            if full_pad_shape != data.shape:
                # we could not fully extract the padded shape, use pad_fn to pad data
                extracted[category] = self.pad_fn(data, full_pad_shape,
                                                  sub_indexing)
Exemplo n.º 23
0
    def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None:
        subject_index_expr = expr.IndexExpression(params['subject_index'])

        shape = reader.read(df.INFO_SHAPE, subject_index_expr).tolist()
        direction = reader.read(df.INFO_DIRECTION, subject_index_expr).tolist()
        spacing = reader.read(df.INFO_SPACING, subject_index_expr).tolist()
        origin = reader.read(df.INFO_ORIGIN, subject_index_expr).tolist()

        # todo: everything in memory?
        image = sitk.Image(shape, sitk.sitkUInt8)
        image.SetDirection(direction)
        image.SetSpacing(spacing)
        image.SetOrigin(origin)
        # todo number_of_components_per_pixel and pixel_id

        img_properties = conv.ImageProperties(image)
        if self.do_pickle:
            # pickle to prevent from problems since own class
            img_properties = pickle.dumps(img_properties)
        extracted['properties'] = img_properties
Exemplo n.º 24
0
def on_sample_ensure_index_expression_validity(params: dict):
    """Ensures the validity of index expressions and the data for array slicing.

    This callback can be used in case :py:class:`PatchWiseIndexing` is used with argument `ignore_incomplete=True`.
    Note that currently only the upper boundaries are checked as it is implemented in the :py:class:`PatchWiseIndexing`.
    """
    key = '__prediction'
    data = params[key]
    idx = params['batch_idx']
    batch = params['batch']
    predictions = params['predictions']

    subject_index = batch['subject_index'][idx]

    index_expr = batch['index_expr'][idx]
    if isinstance(index_expr, bytes):
        index_expr = pickle.loads(index_expr)

    valid_index_expr = []
    is_valid = True
    for idx, slicer in enumerate(index_expr.expression):
        if type(slicer) == slice:
            if slicer.stop > predictions[subject_index][key].shape[idx]:
                valid_stop = predictions[subject_index][key].shape[idx]
                is_valid = False
            else:
                valid_stop = slicer.stop
            valid_index_expr.append([slicer.start, valid_stop])
        else:
            break

    if is_valid:
        return data, index_expr
    else:
        valid_index_expr = expr.IndexExpression(valid_index_expr)
        valid_data = data[0:valid_index_expr.expression[1].stop - valid_index_expr.expression[1].start,
                     0:valid_index_expr.expression[2].stop - valid_index_expr.expression[2].start,
                     :]
        return valid_data, valid_index_expr
Exemplo n.º 25
0
    def direct_extract(self, extractor: extr.Extractor, subject_index: int, index_expr: expr.IndexExpression = None,
                       transform: tfm.Transform = None):
        """Extract data directly, bypassing the extractors and transforms of the instance.

        The purpose of this method is to enable extraction of data that is not required for every data chunk
        (e.g., slice, patch, sub-volume) but only from time to time e.g., image shape, origin.

        Args:
            extractor (.Extractor): Extractor or multiple extractors (:class:`.ComposeExtractor`) extracting the desired
                data from the dataset.
            subject_index (int): Index of the subject to be extracted.
            index_expr (.IndexExpression): The indexing to extract a chunk of data only.
                Not required if only image related information (e.g., image shape, origin) should be extracted.
                Needed when desiring a chunk of data (e.g., slice, patch, sub-volume).
            transform (.Transform): Transformation(s) to be applied to the extracted data.

        Returns:
            dict: Extracted data in a dictionary. Keys are defined by the used :class:`.Extractor`.
        """
        if index_expr is None:
            index_expr = expr.IndexExpression()

        params = {defs.KEY_SUBJECT_INDEX: subject_index, defs.KEY_INDEX_EXPR: index_expr}
        extracted = {}

        if not self.init_reader_once:
            with rd.get_reader(self.dataset_path) as reader:
                extractor.extract(reader, params, extracted)
        else:
            if self.reader is None:
                self.reader = rd.get_reader(self.dataset_path, direct_open=True)
            extractor.extract(self.reader, params, extracted)

        if transform:
            extracted = transform(extracted)

        return extracted
Exemplo n.º 26
0
    def direct_extract(self,
                       extractor: extr.Extractor,
                       subject_index: int,
                       index_expr: expr.IndexExpression = None,
                       transform: tfm.Transform = None):
        if index_expr is None:
            index_expr = expr.IndexExpression()

        params = {'subject_index': subject_index, 'index_expr': index_expr}
        extracted = {}

        if not self.init_reader_once:
            with rd.get_reader(self.dataset_path) as reader:
                extractor.extract(reader, params, extracted)
        else:
            if self.reader is None:
                self.reader = rd.get_reader(self.dataset_path,
                                            direct_open=True)
            extractor.extract(self.reader, params, extracted)

        if transform:
            extracted = transform(extracted)

        return extracted
Exemplo n.º 27
0
 def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None:
     extracted['subject_index'] = params['subject_index']
     subject_index_expr = expr.IndexExpression(params['subject_index'])
     extracted['subject'] = reader.read(df.SUBJECT, subject_index_expr)
Exemplo n.º 28
0
 def __call__(self, shape) -> typing.List[expr.IndexExpression]:
     indexing = []
     for axis in self.slice_axis:
         indexing.extend(expr.IndexExpression(i, axis) for i in range(shape[axis]))
     return indexing
Exemplo n.º 29
0
 def __call__(self, shape) -> typing.List[expr.IndexExpression]:
     return [expr.IndexExpression()]
Exemplo n.º 30
0
 def get_shape(self, subject_index: int) -> list:
     """see :meth:`.Reader.get_shape`"""
     return self.read(defs.LOC_SHAPE_PLACEHOLDER.format(self.category),
                      expr.IndexExpression(subject_index)).tolist()