Пример #1
0
    def _get_tiles_normal(self, tiling_scheme, dest_dtype):
        with self._get_h5ds() as dataset:
            # because the dtype conversion done by HDF5 itself can be quite slow,
            # we need to use a buffer for reading in hdf5 native dtype:
            data_flat = zeros_aligned(tiling_scheme.shape,
                                      dtype=dataset.dtype).reshape((-1, ))

            # ... and additionally a result buffer, for re-using the array used in the DataTile:
            data_flat_res = zeros_aligned(tiling_scheme.shape,
                                          dtype=dest_dtype).reshape((-1, ))

            subslices = self._get_subslices(tiling_scheme=tiling_scheme, )
            for scheme_idx, tile_slice in subslices:
                tile_slice_flat = tile_slice.flatten_nav(self.meta.shape)
                # cut buffer into the right size
                buf_size = tile_slice.shape.size
                buf = data_flat[:buf_size].reshape(tile_slice.shape)
                buf_res = data_flat_res[:buf_size].reshape(tile_slice.shape)
                dataset.read_direct(buf, source_sel=tile_slice.get())
                buf_res[:] = buf  # extra copy for faster dtype/endianess conversion
                tile_data = buf_res.reshape(tile_slice_flat.shape)
                self._preprocess(tile_data, tile_slice_flat)
                yield DataTile(
                    tile_data,
                    tile_slice=tile_slice_flat,
                    scheme_idx=scheme_idx,
                )
Пример #2
0
    def _get_tiles_normal(self,
                          crop_to,
                          full_frames,
                          dest_dtype,
                          target_size=None):
        start_at_frame = self._start_frame
        num_frames = self._num_frames
        sig_shape = self.meta.shape.sig
        sig_origin = tuple([0] * len(sig_shape))
        if crop_to is not None:
            sig_origin = tuple(crop_to.origin[-sig_shape.dims:])
            sig_shape = crop_to.shape.sig
        if full_frames:
            sig_shape = self.meta.shape.sig
        stackheight = self._get_stackheight(sig_shape=sig_shape,
                                            dest_dtype=dest_dtype,
                                            target_size=target_size)
        tile_buf_full = zeros_aligned((stackheight, ) + tuple(sig_shape),
                                      dtype=dest_dtype)

        tileshape = (stackheight, ) + tuple(sig_shape)

        with self._fileset as fileset:
            for outer_frame in range(start_at_frame,
                                     start_at_frame + num_frames, stackheight):
                if start_at_frame + num_frames - outer_frame < stackheight:
                    end_frame = start_at_frame + num_frames
                    current_stackheight = end_frame - outer_frame
                    current_tileshape = (
                        current_stackheight, ) + tuple(sig_shape)
                    tile_buf = zeros_aligned(current_tileshape,
                                             dtype=dest_dtype)
                else:
                    current_stackheight = stackheight
                    current_tileshape = tileshape
                    tile_buf = tile_buf_full
                tile_slice = Slice(origin=(outer_frame, ) + sig_origin,
                                   shape=Shape(current_tileshape,
                                               sig_dims=sig_shape.dims))
                if crop_to is not None:
                    intersection = tile_slice.intersection_with(crop_to)
                    if intersection.is_null():
                        continue
                fileset.read_images_multifile(
                    start=outer_frame,
                    stop=outer_frame + current_stackheight,
                    out=tile_buf,
                    crop_to=crop_to,
                )
                yield DataTile(data=tile_buf, tile_slice=tile_slice)
Пример #3
0
    def __call__(self):
        """
        sum frames over navigation axes
        """
        dest_dtype = np.dtype(self.partition.dtype)
        if dest_dtype.kind not in ('c', 'f'):
            dest_dtype = 'float32'
        part = zeros_aligned(self.partition.meta.shape.sig, dtype=dest_dtype)
        buf = zeros_aligned(self.partition.meta.shape.sig, dtype=dest_dtype)
        for data_tile in self.partition.get_tiles(dest_dtype=dest_dtype,
                                                  mmap=True):
            data_tile.data.sum(axis=0, out=buf)
            part[data_tile.tile_slice.get(sig_only=True)] += buf

        return [SumResultTile(data=part, )]
Пример #4
0
 def __call__(self):
     num_masks = len(self.masks)
     part = zeros_aligned((num_masks, ) + tuple(self.partition.shape.nav),
                          dtype=self.dtype)
     for data_tile in self.partition.get_tiles(mmap=True,
                                               dest_dtype=self.read_dtype):
         flat_data = data_tile.flat_data
         masks = self.masks.get(data_tile, self.mask_dtype)
         if isinstance(masks, sparse.SparseArray):
             result = sparse.dot(flat_data, masks)
         elif scipy.sparse.issparse(masks):
             # This is scipy.sparse using the old matrix interface
             # where "*" is the dot product
             result = flat_data * masks
         elif self.use_torch:
             result = torch.mm(
                 torch.from_numpy(flat_data),
                 torch.from_numpy(masks),
             ).numpy()
         else:
             result = flat_data.dot(masks)
         dest_slice = data_tile.tile_slice.shift(self.partition.slice)
         reshaped = self.reshaped_data(data=result, dest_slice=dest_slice)
         # Ellipsis to match the "number of masks" part of the result
         part[(..., ) + dest_slice.get(nav_only=True)] += reshaped
     return [
         MaskResultTile(
             data=part,
             dest_slice=self.partition.slice.get(nav_only=True),
         )
     ]
Пример #5
0
 def read_full_frame(self, frame, buf, dtype="float32", crop_to=None):
     # TODO: mmapping the whole file may confuse dask.distributed,
     # if the file is large in compraison to RAM.
     raw_data = mmap.mmap(
         fileno=self.f.fileno(),
         length=0,  # whole file
         access=mmap.ACCESS_READ,
     )
     # FIXME: can we somehow get rid of this buffer?
     block_buf = zeros_aligned(BLOCK_SHAPE, dtype=dtype).reshape((-1, ))
     for blockidx in range(BLOCKS_PER_SECTOR_PER_FRAME):
         offset = (self.first_block_offset +
                   frame * BLOCK_SIZE * BLOCKS_PER_SECTOR_PER_FRAME +
                   blockidx * BLOCK_SIZE)
         input_start = offset + HEADER_SIZE
         input_end = offset + HEADER_SIZE + DATA_SIZE
         block_x = 256 - (16 * (blockidx % 16 + 1))
         block_y = 930 * (blockidx // 16)
         decode_uint12_le(
             inp=raw_data[input_start:input_end],
             out=block_buf,
         )
         buf[:, block_y:(block_y + BLOCK_SHAPE[0]),
             block_x:(block_x +
                      BLOCK_SHAPE[1])] = block_buf.reshape((1, ) +
                                                           BLOCK_SHAPE)
Пример #6
0
 def __call__(self):
     num_masks = len(self.masks)
     dest_dtype = np.dtype(self.partition.dtype)
     if dest_dtype.kind not in ('c', 'f'):
         dest_dtype = 'float32'
     part = zeros_aligned((num_masks, ) + tuple(self.partition.shape.nav),
                          dtype=dest_dtype)
     for data_tile in self.partition.get_tiles(mmap=True,
                                               dest_dtype=dest_dtype):
         flat_data = data_tile.flat_data
         masks = self.masks[data_tile]
         if self.masks.use_sparse:
             result = sparse.dot(flat_data, masks)
         elif self.use_torch:
             result = torch.mm(
                 torch.from_numpy(flat_data),
                 torch.from_numpy(masks),
             ).numpy()
         else:
             result = flat_data.dot(masks)
         dest_slice = data_tile.tile_slice.shift(self.partition.slice)
         reshaped = self.reshaped_data(data=result, dest_slice=dest_slice)
         # Ellipsis to match the "number of masks" part of the result
         part[(..., ) + dest_slice.get(nav_only=True)] += reshaped
     return [
         MaskResultTile(
             data=part,
             dest_slice=self.partition.slice.get(nav_only=True),
         )
     ]
Пример #7
0
 def _read_full_frames(self, crop_to=None, dest_dtype="float32", roi=None):
     with contextlib.ExitStack() as stack:
         frame_buf = zeros_aligned((1, 1860, 2048), dtype=dest_dtype)
         open_sectors = [
             stack.enter_context(sector) for sector in self._sectors
         ]
         frame_offset = 0
         if roi is not None:
             roi = roi.reshape((-1, ))
             frame_offset = np.count_nonzero(roi[:self._start_frame])
         frames_read = 0
         for frame in range(self._start_frame,
                            self._start_frame + self._num_frames):
             if roi is not None and not roi[frame]:
                 continue
             origin = frame
             if roi is not None:
                 origin = frame_offset + frames_read
             tile_slice = Slice(
                 origin=(origin, 0, 0),
                 shape=Shape(frame_buf.shape, sig_dims=2),
             )
             if crop_to is not None:
                 intersection = tile_slice.intersection_with(crop_to)
                 if intersection.is_null():
                     continue
             for s in open_sectors:
                 s.read_full_frame(
                     frame=frame,
                     buf=frame_buf[:, :,
                                   s.idx * SECTOR_SIZE[1]:(s.idx + 1) *
                                   SECTOR_SIZE[1]])
             yield DataTile(data=frame_buf, tile_slice=tile_slice)
             frames_read += 1
Пример #8
0
    def __call__(self):
        # NOTE: this is a stop-gap solution that should work until this is deprecated
        # it is not optimized for performance...
        shape = self.partition.shape
        tileshape = Shape(
            (1, ) + tuple(shape.sig),  # single frames
            sig_dims=shape.sig.dims)
        tiling_scheme = TilingScheme.make_for_shape(
            tileshape=tileshape,
            dataset_shape=self.partition.meta.shape,
        )
        dtype = np.dtype(self.partition.dtype).newbyteorder(sys.byteorder)
        result = zeros_aligned(self._slice.shape, dtype=dtype)
        result = result.reshape((np.count_nonzero(self._roi)), -1)

        tiles = self.partition.get_tiles(
            tiling_scheme=tiling_scheme,
            dest_dtype=dtype,
            roi=self._roi,
        )

        for tile in tiles:
            result[tile.tile_slice.origin[0]] = tile[(..., ) + self._slice.get(
                sig_only=True)].reshape((-1, ))
        return [PickFrameResultTile(data=result)]
Пример #9
0
 def __call__(self):
     result = zeros_aligned(self._slice.shape, dtype=self.partition.dtype)
     for data_tile in self.partition.get_tiles(crop_to=self._slice,
                                               mmap=True):
         intersection = data_tile.tile_slice.intersection_with(self._slice)
         # shift to data_tile relative coordinates:
         shifted = intersection.shift(data_tile.tile_slice)
         result[intersection.shift(
             self._slice).get()] = data_tile.data[shifted.get()]
     return [PickFrameResultTile(data=result)]
Пример #10
0
    def _get_tiles_with_roi(self, crop_to, full_frames, dest_dtype, roi):
        """
        With a ROI, we yield tiles from a "compressed" navigation axis, relative to
        the beginning of the partition. Compressed means, only frames that have a 1
        in the ROI are considered, and the resulting tile slices are from a coordinate
        system that has the shape `(np.count_nonzero(roi),)`.
        """
        start_at_frame = self._start_frame
        sig_shape = self.meta.shape.sig
        sig_origin = tuple([0] * len(sig_shape))
        if crop_to is not None:
            sig_origin = tuple(crop_to.origin[-sig_shape.dims:])
            sig_shape = crop_to.shape.sig
        if full_frames:
            sig_shape = self.meta.shape.sig
        stackheight = self._get_stackheight(sig_shape=sig_shape,
                                            dest_dtype=dest_dtype)
        tile_buf = zeros_aligned((stackheight, ) + tuple(sig_shape),
                                 dtype=dest_dtype)

        frames_read = 0
        tile_idx = 0
        frame_idx = start_at_frame
        indices = _roi_to_indices(roi, self._start_frame,
                                  self._start_frame + self._num_frames)

        with self._fileset as fileset:
            outer_frame = 0
            for frame_idx in indices:
                fileset.read_images_multifile(
                    start=frame_idx,
                    stop=frame_idx + 1,
                    out=tile_buf[tile_idx].reshape((1, ) + tuple(sig_shape)),
                    crop_to=crop_to,
                )

                tile_idx += 1
                frames_read += 1

                if tile_idx == stackheight:
                    tile_slice = Slice(origin=(outer_frame, ) + sig_origin,
                                       shape=Shape(
                                           (tile_idx, ) + tuple(sig_shape),
                                           sig_dims=sig_shape.dims))
                    yield DataTile(data=tile_buf[:tile_idx, ...],
                                   tile_slice=tile_slice)
                    tile_idx = 0
                    outer_frame = frames_read
        if tile_idx != 0:
            # last frame, size != stackheight
            tile_slice = Slice(origin=(outer_frame, ) + sig_origin,
                               shape=Shape((tile_idx, ) + tuple(sig_shape),
                                           sig_dims=sig_shape.dims))
            yield DataTile(data=tile_buf[:tile_idx, ...],
                           tile_slice=tile_slice)
Пример #11
0
    def get_macrotile(self, dest_dtype="float32", roi=None):
        '''
        Return a single tile for the entire partition.

        This is useful to support process_partiton() in UDFs and to construct
        dask arrays from datasets.

        Note
        ----

        This can be inefficient if the dataset is compressed and chunked in the
        navigation axis, because you can either have forced-large macrotiles,
        or you can have read amplification effects, where a much larger amount
        of data is read from the HDF5 file than necessary.

        For example, if your chunking is :code:`(32, 32, 32, 32)`, in a
        dataset that is :code:`(128, 128, 256, 256)`, the partition must
        cover the whole of :code:`(32, 128, 256, 256)` - this is because
        partitions are contiguous in the navigation axis.

        The other possibility is to keep the partition smaller, for example
        only :code:`(3, 128, 256, 256)`. That would mean when reading a chunk
        from HDF5, we can only use 3*32 frames of the total 32*32 frames,
        a whopping ~10x read amplification.
        '''

        tileshape = self.shape
        if self._chunks is not None:
            tileshape = self._chunks

        tiling_scheme = TilingScheme.make_for_shape(
            tileshape=Shape(tileshape,
                            sig_dims=self.slice.shape.sig.dims).flatten_nav(),
            dataset_shape=self.meta.shape,
        )

        data = zeros_aligned(self.slice.adjust_for_roi(roi).shape,
                             dtype=dest_dtype)

        for tile in self.get_tiles(
                tiling_scheme=tiling_scheme,
                dest_dtype=dest_dtype,
                roi=roi,
        ):
            rel_slice = tile.tile_slice.shift(self.slice)
            data[rel_slice.get()] = tile
        tile_slice = Slice(
            origin=(self.slice.origin[0], 0, 0),
            shape=Shape(data.shape, sig_dims=2),
        )
        return DataTile(
            data,
            tile_slice=tile_slice,
            scheme_idx=0,
        )
Пример #12
0
    def __call__(self):
        num_masks = len(self.masks)
        part = zeros_aligned((num_masks,) + tuple(self.partition.shape.nav), dtype=self.dtype)

        # FIXME: tileshape negotiation!
        shape = self.partition.shape
        tileshape = Shape(
            (1,) + tuple(shape.sig),
            sig_dims=shape.sig.dims
        )
        tiling_scheme = self.tiling_scheme
        if tiling_scheme is None:
            tiling_scheme = TilingScheme.make_for_shape(
                tileshape=tileshape,
                dataset_shape=shape,  # ...
            )

        tiles = self.partition.get_tiles(
            tiling_scheme=tiling_scheme,
            dest_dtype=self.read_dtype
        )

        with set_num_threads(1):
            try:
                import torch
            except ImportError:
                torch = None
            for data_tile in tiles:
                flat_data = data_tile.flat_data
                masks = self.masks.get(data_tile, self.mask_dtype)
                if isinstance(masks, sparse.SparseArray):
                    result = sparse.dot(flat_data, masks)
                elif scipy.sparse.issparse(masks):
                    # This is scipy.sparse using the old matrix interface
                    # where "*" is the dot product
                    result = flat_data * masks
                elif self.use_torch:
                    result = torch.mm(
                        torch.from_numpy(flat_data),
                        torch.from_numpy(masks),
                    ).numpy()
                else:
                    result = flat_data.dot(masks)
                dest_slice = data_tile.tile_slice.shift(self.partition.slice)
                reshaped = self.reshaped_data(data=result, dest_slice=dest_slice)
                # Ellipsis to match the "number of masks" part of the result
                part[(...,) + dest_slice.get(nav_only=True)] += reshaped
            return [
                MaskResultTile(
                    data=part,
                    dest_slice=self.partition.slice.get(nav_only=True),
                )
            ]
Пример #13
0
    def get_macrotile(self, mmap=False, dest_dtype="float32", roi=None):
        '''
        Return a single tile for the entire partition.

        This is useful to support process_partiton() in UDFs and to construct dask arrays
        from datasets.
        '''
        num_frames = self._num_frames
        if roi is not None:
            start_frame = self._start_frame
            roi = roi.reshape((-1, ))
            num_frames = np.count_nonzero(roi[start_frame:start_frame +
                                              num_frames])
        buf = zeros_aligned((num_frames, 1860, 2048), dtype=dest_dtype)
        for index, t in enumerate(
                self._read_full_frames(dest_dtype=dest_dtype, roi=roi)):
            buf[index] = t.data

        tile_slice = Slice(
            origin=(self._start_frame, 0, 0),
            shape=Shape(buf.shape, sig_dims=2),
        )

        return DataTile(data=buf, tile_slice=tile_slice)
Пример #14
0
 def pixel_data(self):
     if not self.is_valid:
         raise ValueError("invalid block: %r" % self)
     arr = zeros_aligned((930 * 16), dtype="uint16")
     self.readinto(arr)
     return arr.reshape(930, 16)
Пример #15
0
 def zeros(self, *args, **kwargs):
     if self._enable_direct:
         return zeros_aligned(*args, **kwargs)
     return super().zeros(*args, **kwargs)
Пример #16
0
    def read_stacked(self,
                     start_at_frame,
                     num_frames,
                     stackheight=16,
                     dtype="float32",
                     crop_to=None):
        """
        Reads `stackheight` blocks into a single buffer.
        The blocks are read from consecutive frames, always
        from the same coordinates inside the sector of the frame.

        yields DataTiles of the shape (stackheight, 930, 16)
        (different tiles at the borders may be yielded if the stackheight doesn't evenly divide
        the total number of frames to read)
        """
        tileshape = (stackheight, ) + BLOCK_SHAPE
        raw_data = mmap.mmap(
            fileno=self.f.fileno(),
            length=0,  # whole file
            access=mmap.ACCESS_READ,
        )

        tile_buf_full = zeros_aligned(tileshape, dtype=dtype)
        assert DATA_SIZE % 3 == 0
        log.debug(
            "starting read_stacked with start_at_frame=%d, num_frames=%d, stackheight=%d",
            start_at_frame, num_frames, stackheight)
        for outer_frame in range(start_at_frame, start_at_frame + num_frames,
                                 stackheight):
            # log.debug("outer_frame=%d", outer_frame)
            # end of the selected frame range, calculate rest of stack:
            if start_at_frame + num_frames - outer_frame < stackheight:
                end_frame = start_at_frame + num_frames
                current_stackheight = end_frame - outer_frame
                current_tileshape = (current_stackheight, ) + BLOCK_SHAPE
                tile_buf = zeros_aligned(current_tileshape, dtype=dtype)
            else:
                current_stackheight = stackheight
                current_tileshape = tileshape
                tile_buf = tile_buf_full
            for blockidx in range(BLOCKS_PER_SECTOR_PER_FRAME):
                start_x = (self.idx + 1) * 256 - (16 * (blockidx % 16 + 1))
                start_y = 930 * (blockidx // 16)
                tile_slice = Slice(
                    origin=(
                        outer_frame,
                        start_y,
                        start_x,
                    ),
                    shape=Shape(current_tileshape, sig_dims=self.sig_dims),
                )
                if crop_to is not None:
                    intersection = tile_slice.intersection_with(crop_to)
                    if intersection.is_null():
                        continue
                offset = (
                    self.first_block_offset +
                    outer_frame * BLOCK_SIZE * BLOCKS_PER_SECTOR_PER_FRAME +
                    blockidx * BLOCK_SIZE)
                for frame in range(current_stackheight):
                    block_offset = (
                        offset +
                        frame * BLOCK_SIZE * BLOCKS_PER_SECTOR_PER_FRAME)
                    input_start = block_offset + HEADER_SIZE
                    input_end = block_offset + HEADER_SIZE + DATA_SIZE
                    out = tile_buf[frame].reshape((-1, ))
                    decode_uint12_le(
                        inp=raw_data[input_start:input_end],
                        out=out,
                    )
                yield DataTile(data=tile_buf, tile_slice=tile_slice)
        raw_data.close()
Пример #17
0
    def read_images_multifile(self, start, stop, out, crop_to=None):
        """
        Read [`start`, `stop`) images from the dataset into `out`

        start, stop: dataset-global indices

        The frames will be pre-processed and the indices may cross file boundaries.

        Pre-processing steps:

        1) convert into float
        2) offset correction (dark frame subtraction)
        3) folding
        4) apply gain map
        5) un-binning
        """

        # 1) conversion to float: happens as we write to this buffer
        raw_buffer = zeros_aligned(
            (out.shape[0], ) + tuple(self._meta['raw_frame_size']),
            dtype=out.dtype)

        super().read_images_multifile(start=start,
                                      stop=stop,
                                      out=raw_buffer,
                                      crop_to=crop_to)

        # 2) offset correction:
        if self._dark_frame is not None:
            raw_buffer -= self._dark_frame

        # 3) folding: l(eft) p(art), r(ight) p(art)
        # the right part is folded to below the left part
        # (imagine the bottom-right corner as a hinge)
        half_width = out.shape[2]
        assert out.shape[1] % 2 == 0
        half_height = out.shape[1] // 2

        lp = raw_buffer[..., :half_width]
        rp = raw_buffer[..., half_width:]
        # negative strides to flip both x and y direction:
        rp = rp[:, ::-1, ::-1]

        # 4) apply gain map:
        if self._gain_map is not None:
            gain_half = self._gain_map.shape[0] // 2
            gain_lp = self._gain_map[:gain_half, ...]
            gain_rp = self._gain_map[gain_half:, ...]
            lp *= gain_lp
            rp *= gain_rp

        # 5) un-binning:
        bin_factor = self._files[0].global_header['readoutmode']['bin']
        if bin_factor > 1:
            lp = _unbin(lp, factor=bin_factor)
            rp = _unbin(rp, factor=bin_factor)
        out[..., :half_height, :] = lp
        out[..., half_height:, :] = rp

        # FIXME: to be implemented:
        assert crop_to is None or tuple(crop_to.shape.sig) == tuple(
            out.shape[1:])

        return out