示例#1
0
    def __init__(
            self,
            conf_path: str,
            patch_shape: Sequence[int],  # [z]yx
            transform: Callable = transforms.Identity(),
            bounds: Optional[Sequence[Sequence[int]]] = None,  # xyz
            mag: int = 1,
            in_memory: bool = True,
            epoch_size: int = 100,
            disable_memory_check: bool = False,
            verbose: bool = False):
        self.conf_path = conf_path
        self.patch_shape = np.array(patch_shape)
        self.transform = transform
        self.mag = mag
        self.in_memory = in_memory
        self.epoch_size = epoch_size
        self.disable_memory_check = disable_memory_check
        self.verbose = verbose

        self.kd = knossos_utils.KnossosDataset(self.conf_path,
                                               show_progress=self.verbose)
        self.dim = len(self.patch_shape)
        patch_shape_xyz = self.patch_shape[::-1]  # zyx -> xyz
        if self.dim == 2:
            patch_shape_xyz = np.array([*patch_shape_xyz, 1])  # z=1 for 2D
        self.patch_shape_xyz = patch_shape_xyz
        if bounds is None:
            bounds = [[0, 0, 0], self.kd.boundary]
        self.bounds = np.array(bounds)
        self.shape = self.bounds[1] - self.bounds[0]
        self.raw = None  # Will be filled with raw data if in_memory is True
        if self.in_memory:
            self._load_into_memory()
示例#2
0
    def __init__(
            self,
            inp_paths,
            target_paths,
            transform=transforms.Identity(),
            offset=None,
            in_memory=True,
            inp_dtype=np.float32,
            target_dtype=np.int64,
            epoch_multiplier=1,  # Pretend to have more data in one epoch
    ):
        super().__init__()
        self.inp_paths = inp_paths
        self.target_paths = target_paths
        self.transform = transform
        self.offset = offset
        self.in_memory = in_memory
        self.inp_dtype = inp_dtype
        self.target_dtype = target_dtype
        self.epoch_multiplier = epoch_multiplier

        if self.in_memory:
            self.inps = [
                np.array(imageio.imread(fname)).astype(np.float32)[None]
                for fname in self.inp_paths
            ]
            self.targets = [
                np.array(imageio.imread(fname)).astype(np.int64)
                for fname in self.target_paths
            ]
示例#3
0
    def __init__(
        self,
        inp_paths,
        target_paths,
        transform=transforms.Identity(),
        in_memory=True,
        inp_dtype=np.float32,
        target_dtype=np.int64,
    ):
        super().__init__()
        self.inp_paths = inp_paths
        self.target_paths = target_paths
        self.transform = transform
        self.in_memory = in_memory
        self.inp_dtype = inp_dtype
        self.target_dtype = target_dtype

        if self.in_memory:
            self.inps = [
                np.array(imageio.imread(fname)).astype(np.float32)[None]
                for fname in self.inp_paths
            ]
            self.targets = [
                np.array(imageio.imread(fname)).astype(np.int64)
                for fname in self.target_paths
            ]
示例#4
0
def get_preview_batch(h5data: Tuple[str, str],
                      preview_shape: Optional[Tuple[int, ...]] = None,
                      transform: Callable = transforms.Identity(),
                      in_memory: bool = False) -> torch.Tensor:
    fname, key = h5data
    inp_h5 = h5py.File(fname, 'r')[key]
    if in_memory:
        inp_h5 = inp_h5.value
    dim = len(preview_shape)  # 2D or 3D
    inp_shape = np.array(inp_h5.shape[-dim:])
    if preview_shape is None:  # Slice everything
        inp_lo = np.zeros_like(inp_shape)
        inp_hi = inp_shape
    else:  # Slice only a preview_shape-sized region from the center of the input
        halfshape = np.array(preview_shape) // 2
        inp_center = inp_shape // 2
        inp_lo = inp_center - halfshape
        inp_hi = inp_center + halfshape
        if np.any(inp_center < halfshape):
            raise ValueError(
                'preview_shape is too big for shape of input source.'
                f'Requested {preview_shape}, but can only deliver {tuple(inp_shape)}.'
            )
    memstr = ' (in memory)' if in_memory else ''
    logger.info(f'\nPreview data{memstr}:')
    logger.info(
        f'  input:       {fname}[{key}]: {inp_h5.shape} ({inp_h5.dtype})\n')
    inp_np = slice_h5(inp_h5, inp_lo, inp_hi, prepend_empty_axis=True)
    if inp_np.ndim == dim + 1:  # Should be dim + 2 for (N, C) dims
        inp_np = inp_np[:, None]  # Add missing C dim
    inp_np, _ = transform(inp_np, None)
    inp = torch.from_numpy(inp_np)
    return inp
示例#5
0
    def __init__(
            self,
            conf_path: str,
            patch_shape: Sequence[int],  # [z]yx
            transform: Callable = transforms.Identity(),
            bounds: Optional[Sequence[Sequence[int]]] = None,  # xyz
            mag: int = 1,
            mode: str = 'in_memory',
            epoch_size: int = 100,
            disable_memory_check: bool = False,
            verbose: bool = False,
            cache_size: int = 50,
            cache_reuses: int = 10):
        self.conf_path = conf_path
        self.patch_shape = np.array(patch_shape)
        self.transform = transform
        self.mag = mag
        self.epoch_size = epoch_size
        self.disable_memory_check = disable_memory_check
        self.verbose = verbose
        self.cache_size = cache_size
        self.cache_reusages = cache_reuses
        if mode not in ['in_memory', 'caching', 'disk']:
            raise ValueError(
                f'mode has to be one of ``in_memory``, ``caching`` or ``disk``, but is {mode}'
            )
        self.mode = mode

        self.kd = knossos_utils.KnossosDataset(self.conf_path,
                                               show_progress=self.verbose)
        self.dim = len(self.patch_shape)
        patch_shape_xyz = self.patch_shape[::-1]  # zyx -> xyz
        if self.dim == 2:
            patch_shape_xyz = np.array([*patch_shape_xyz, 1])  # z=1 for 2D
        self.patch_shape_xyz = patch_shape_xyz
        if bounds is None:
            bounds = [[0, 0, 0], self.kd.boundary]
        self.bounds = np.array(bounds)
        self.shape = self.bounds[1] - self.bounds[0]
        self.raw = None  # Will be filled with raw data if in_memory is True

        if self.mode == 'in_memory':
            self._load_into_memory()
        elif self.mode == 'caching':
            self._fill_cache()
示例#6
0
    def __init__(
        self,
        inp_path=None,
        target_path=None,
        train=True,
        inp_key='raw',
        target_key='lab',
        # offset=(0, 0, 0),
        pool=(1, 1, 1),
        transform: Callable = transforms.Identity(),
        out_channels: Optional[int] = None,
    ):
        super().__init__()
        self.transform = transform
        self.out_channels = out_channels
        cube_id = 0 if train else 2
        if inp_path is None:
            inp_path = expanduser(f'~/neuro_data_cdhw/raw_{cube_id}.h5')
        if target_path is None:
            target_path = expanduser(
                f'~/neuro_data_cdhw/barrier_int16_{cube_id}.h5')
        self.inp_file = h5py.File(os.path.expanduser(inp_path), 'r')
        self.target_file = h5py.File(os.path.expanduser(target_path), 'r')
        self.inp = self.inp_file[inp_key][()].astype(np.float32)
        self.target = self.target_file[target_key][()].astype(np.int64)
        self.target = self.target[0]  # Squeeze superfluous first dimension
        self.target = self.target[::pool[0], ::pool[1], ::
                                  pool[2]]  # Handle pooling (dirty hack TODO)

        # Cut inp and target to same size
        inp_shape = np.array(self.inp.shape[1:])
        target_shape = np.array(self.target.shape)
        diff = inp_shape - target_shape
        offset = diff // 2  # offset from image boundaries

        self.inp = self.inp[:, offset[0]:inp_shape[0] - offset[0],
                            offset[1]:inp_shape[1] - offset[1],
                            offset[2]:inp_shape[2] - offset[2], ]

        self.close_files(
        )  # Using file contents from memory -> no need to keep the file open.
示例#7
0
    def __init__(
            self,
            inp_paths,
            target_paths,
            transform=transforms.Identity(),
            offset=None,
            in_memory=True,
            inp_dtype=np.float32,
            target_dtype=np.int64,
            epoch_multiplier=1,  # Pretend to have more data in one epoch
    ):
        super().__init__()
        self.inp_paths = inp_paths
        self.target_paths = target_paths
        self.transform = transform
        self.offset = offset
        self.in_memory = in_memory
        self.inp_dtype = inp_dtype
        self.target_dtype = target_dtype
        self.epoch_multiplier = epoch_multiplier

        if self.in_memory:
            self.inps = []
            rgb_fname = None
            for fname in self.inp_paths:
                inp = imageio.imread(fname).astype(np.float32)
                if rgb_fname is not None and inp.ndim != 3:
                    raise RuntimeError(f'Mixed multi-channel {rgb_fname} and single-channel images {fname} in gt.')
                if inp.ndim == 2:
                    inp = inp[None]  # (H, W) -> (C=1, H, W)
                elif inp.ndim == 3:
                    rgb_fname = fname
                    inp = inp.transpose(2, 0, 1)  # (H, W, C) -> (C, H, W)
                else:
                    raise RuntimeError(f'Image {fname} has shape {inp.shape}, but ndim should be 2 or 3.')
                self.inps.append(inp)
            self.targets = [
                np.array(imageio.imread(fname)).astype(np.int64)
                for fname in self.target_paths
            ]
    def __init__(
            self,
            conf_path_label: str,
            conf_path_raw_data: str,
            dir_path_label: str,
            patch_shape: Sequence[int],  # [z]yx
            transform: Callable = transforms.Identity(),
            mag: int = 1,
            epoch_size: int = 100,
            label_names: Optional[Sequence[str]] = None,
            knossos_bounds: Optional[Sequence[Sequence[
                Sequence[int]]]] = None,  # xyz
            label_offset: int = 0,
            label_order: Optional[Sequence[int]] = None):
        self.conf_path_label = conf_path_label
        self.conf_path_raw_data = conf_path_raw_data
        self.patch_shape = np.array(patch_shape)
        self.dim = len(self.patch_shape)
        patch_shape_xyz = self.patch_shape[::-1]  # zyx -> xyz
        if self.dim == 2:
            patch_shape_xyz = np.array([*patch_shape_xyz, 1])  # z=1 for 2D
        self.patch_shape_xyz = patch_shape_xyz
        self.transform = transform
        self.mag = mag
        self.epoch_size = epoch_size
        self.kd = knossos_utils.KnossosDataset(self.conf_path_label,
                                               show_progress=False)
        self.inp_targets = []
        self.file_bounds = {}
        self.kzip_files_path = []
        self.dir_path = dir_path_label
        self.knossos_bounds = knossos_bounds
        self.label_offset = label_offset  # todo: verify correct handling of this offset
        self.label_order = label_order

        self._get_file_bounds(label_names)
        self._get_data()
示例#9
0
    def __init__(
            self,
            inp_paths,
            target_paths,
            transform=transforms.Identity(),
            offset: Sequence[int] = (0, 0, 0),
            in_memory=True,
            inp_dtype=np.float32,
            target_dtype=np.int64,
            epoch_multiplier=1,  # Pretend to have more data in one epoch
    ):
        super().__init__()
        self.inp_paths = inp_paths
        self.target_paths = target_paths
        self.transform = transform
        self.offset = offset
        self.in_memory = in_memory
        self.inp_dtype = inp_dtype
        self.target_dtype = target_dtype
        self.epoch_multiplier = epoch_multiplier

        def load_image(fname):
            inp = imageio.imread(fname).astype(np.float32)
            if inp.ndim == 2:
                inp = inp[None]  # (H, W) -> (C=1, H, W)
            elif inp.ndim == 3:
                inp = inp.transpose(2, 0, 1)  # (H, W, C) -> (C, H, W)
            else:
                raise RuntimeError(
                    f'Image {fname} has shape {inp.shape}, but ndim should be 2 or 3.'
                )
            return inp

        if self.in_memory:
            self.inputs = []
            rgb_fnames = {}
            gray_fnames = {}
            for input_path in self.inp_paths:
                if os.path.isdir(input_path):
                    multi_input = []
                    for channel_idx, input_file in enumerate(
                            sorted(glob.glob(str(input_path) + '/*'))):
                        inp = load_image(str(input_file))
                        if inp.shape[0] == 1:
                            gray_fnames[channel_idx] = input_file
                        elif inp.shape[0] == 3:
                            rgb_fnames[channel_idx] = input_file
                        rgb_fname = rgb_fnames.get(channel_idx)
                        if rgb_fname is not None and inp.shape[0] == 1:
                            raise RuntimeError(
                                f'GT input layer {channel_idx} has mixed multi-channel ({rgb_fname}) and single-channel images ({input_file}).'
                            )
                        gray_fname = gray_fnames.get(channel_idx)
                        if gray_fname is not None and inp.shape[0] == 3:
                            raise RuntimeError(
                                f'GT input layer {channel_idx} has mixed multi-channel ({input_file}) and single-channel images ({gray_fname}).'
                            )
                        multi_input.append(inp)
                    self.inputs.append(np.concatenate(multi_input))
                else:
                    inp = load_image(input_path)
                    if inp.shape[0] == 1:
                        gray_fnames[0] = input_path
                    elif inp.shape[0] == 3:
                        rgb_fnames[0] = input_path
                    if len(rgb_fnames) > 0 and inp.shape[0] == 1 or len(
                            gray_fnames) > 0 and inp.shape[0] == 3:
                        raise RuntimeError(
                            f'Mixed multi-channel ({rgb_fnames[0]}) and single-channel images ({gray_fnames[0]}) in gt.'
                        )
                    self.inputs.append(inp)
            self.targets = [
                np.array(imageio.imread(fname)).astype(np.int64)
                for fname in self.target_paths
            ]
示例#10
0
    def __init__(
            self,
            input_sources: List[Tuple[str, str]],
            patch_shape: Sequence[int],
            target_sources: Optional[List[Tuple[str, str]]] = None,
            offset: Sequence[int] = (0, 0, 0),
            cube_prios: Optional[Sequence[float]] = None,
            aniso_factor: int = 2,
            target_discrete_ix: Optional[List[int]] = None,
            input_discrete_ix: Optional[List[int]] = None,
            target_dtype: np.dtype = np.int64,
            train: bool = True,
            warp_prob: Union[bool, float] = False,
            warp_kwargs: Optional[Dict[str, Any]] = None,
            epoch_size: int = 100,
            transform: Callable = transforms.Identity(),
            in_memory: bool = False,
            cube_meta=_DefaultCubeMeta(),
    ):
        # Early checks
        if target_sources is not None and len(input_sources) != len(
                target_sources):
            raise ValueError(
                'If target_sources is not None, input_sources and '
                'target_sources must be lists of same length.')
        if not train:
            if warp_prob > 0:
                logger.warning(
                    'Augmentations should not be used on validation data.')

        # batch properties
        self.train = train
        self.warp_prob = warp_prob
        self.warp_kwargs = warp_kwargs if warp_kwargs is not None else {}

        # general properties
        self.input_sources = input_sources
        self.target_sources = target_sources
        self.cube_meta = cube_meta
        self.cube_prios = cube_prios
        self.aniso_factor = aniso_factor
        self.target_discrete_ix = target_discrete_ix
        self.input_discrete_ix = input_discrete_ix
        self.epoch_size = epoch_size
        self._orig_epoch_size = epoch_size  # Store original epoch_size so it can be reset later.
        self.in_memory = in_memory

        self.patch_shape = np.array(patch_shape, dtype=np.int)
        self.ndim = self.patch_shape.ndim
        self.offset = np.array(offset)
        self.target_patch_shape = self.patch_shape - self.offset * 2
        self._target_dtype = target_dtype
        self.transform = transform

        # Setup internal stuff
        self.pid = os.getpid()

        # The following fields will be filled when reading data
        self.n_labelled_pixels = 0
        self.inputs: List[DataSource] = []
        self.targets: List[DataSource] = []

        self.load_data()  # Open dataset files

        self.n_successful_warp = 0
        self.n_failed_warp = 0
        self._failed_warp_warned = False
示例#11
0
    def __init__(
            self,
            input_h5data: List[Tuple[str, str]],
            target_h5data: List[Tuple[str, str]],
            patch_shape: Sequence[int],
            cube_prios: Optional[Sequence[float]] = None,
            aniso_factor: int = 2,
            target_discrete_ix: Optional[List[int]] = None,
            train: bool = True,
            preview_shape: Optional[Sequence[int]] = None,
            warp: Union[bool, float] = False,
            warp_kwargs: Optional[Dict[str, Any]] = None,
            epoch_size: int = 100,
            transform: Callable = transforms.Identity(),
            classes: Optional[Sequence[int]] = None
    ):
        # Early checks
        if len(input_h5data) != len(target_h5data):
            raise ValueError("input_h5data and target_h5data must be lists of same length!")
        if not train:
            if warp:
                logger.warning(
                    'Augmentations should not be used on validation data.'
                )
        else:
            if preview_shape is not None:
                raise ValueError()

        # batch properties
        self.train = train
        self.warp = warp
        self.warp_kwargs = warp_kwargs

        # general properties
        input_h5data = [(expanduser(fn), key) for (fn, key) in input_h5data]
        target_h5data = [(expanduser(fn), key) for (fn, key) in target_h5data]
        self.input_h5data = input_h5data
        self.target_h5data = target_h5data
        self.cube_prios = cube_prios
        self.aniso_factor = aniso_factor
        self.target_discrete_ix = target_discrete_ix
        self.epoch_size = epoch_size
        self._orig_epoch_size = epoch_size  # Store original epoch_size so it can be reset later.
        # TODO: This is currently only used for determining num_classes. It
        #       could be used for adding support for targets that are not
        #       labelled in the expected order [0, 1, ..., num_classes - 1] or
        #       as a whitelist that excludes classes that should be ignored.
        self.classes = classes
        self.num_classes = None if classes is None else len(classes)

        self.patch_shape = np.array(patch_shape, dtype=np.int)
        self.ndim = self.patch_shape.ndim
        # TODO: Make strides and offsets for targets configurable
        # self.strides = ...
        #  strides will need to be applied *during* dataset iteration now
        #  (-> strided reading in slice_h5()... or should strides be applied
        #   with some fancy downscaling operator? Naively strided reading
        #   could mess up targets in unfortunate cases:
        #   e.g. ``[0, 1, 0, 1, 0, 1][::2] == [0, 0, 0]``, discarding all 1s).
        self.offsets = np.array([0, 0, 0])
        self.target_patch_size = self.patch_shape - self.offsets * 2
        self._target_dtype = np.int64
        # The following will be inferred when reading data
        self.n_labelled_pixels = 0

        # Actual data fields
        self.inputs = []
        self.targets = []

        self.preview_shape = preview_shape
        self._preview_batch = None


        # Setup internal stuff
        self.rng = np.random.RandomState(
            np.uint32((time.time() * 0.0001 - int(time.time() * 0.0001)) * 4294967295)
        )
        self.pid = os.getpid()

        self._sampling_weight = None
        self._training_count = None
        self._count = None
        self.n_successful_warp = 0
        self.n_failed_warp = 0
        self.n_read_failures = 0

        self.load_data()  # Open dataset files

        if transform is None:
            transform = lambda x: x
        self.transform = transform

        # Load preview data on initialization so read errors won't occur late
        # and reading doesn't have to be done by each background worker process separately.
        _ = self.preview_batch