Пример #1
0
def _untangle_raw(data, hdr_info, stack_size):
    """Corrects for the tangled raw mib format.

    Only the case for quad chip is considered here.

    Parameters
    ----------
    data: dask array
        as stack with the detector array unreshaped, e.g. for a single frame 512*512: (1, 262144)
    hdr_info: dict
        info read from the header- ouput of the _parse_hdr function
    stack_size: int
        The number of frames in the data

    Returns
    -------
    untangled_data: dask array
        corrected dask array object reshaped on the detector plane, e.g. for a single frame case
        as above: (1, 512, 512)
    """
    width = hdr_info["width"]
    height = hdr_info["height"]
    width_height = width * height
    if (
        hdr_info["Counter Depth (number)"] == 24
        or hdr_info["Counter Depth (number)"] == 12
    ):
        cols = 4

    elif hdr_info["Counter Depth (number)"] == 1:
        cols = 64

    elif hdr_info["Counter Depth (number)"] == 6:
        cols = 8

    data = data.reshape((stack_size * width_height))

    data = data.reshape(stack_size, height * (height // cols), cols)

    data = da.flip(data, 2)

    if hdr_info["Assembly Size"] == "2x2":
        data = data.reshape((stack_size * width_height))
        data = data.reshape(stack_size, 512 // 2, 512 * 2)

        det1 = data[:, :, 0:256]
        det2 = data[:, :, 256:512]
        det3 = data[:, :, 512 : 512 + 256]
        det4 = data[:, :, 512 + 256 :]

        det3 = da.flip(det3, 2)
        det3 = da.flip(det3, 1)

        det4 = da.flip(det4, 2)
        det4 = da.flip(det4, 1)

        untangled_data = da.concatenate(
            (da.concatenate((det1, det3), 1), da.concatenate((det2, det4), 1)), 2
        )
    return untangled_data
Пример #2
0
def flip_dim_coord(cube, coord_name):
    """Flip (reverse) dimensional coordinate of cube."""
    logger.info("Flipping dimensional coordinate %s...", coord_name)
    coord = cube.coord(coord_name, dim_coords=True)
    coord_idx = cube.coord_dims(coord)[0]
    coord.points = np.flip(coord.points)
    if coord.bounds is not None:
        coord.bounds = np.flip(coord.bounds, axis=0)
    cube.data = da.flip(cube.core_data(), axis=coord_idx)
Пример #3
0
 def get_image_chunk_mmap(self, im_start, buffer_number):
     # (("t_value"),("<u4")), (("Milliseconds"), ("<u2")), (("Microseconds"), ("<u2"))]
     record_dtype = [("FRAME", np.uint16, (self.image_dict["ImageHeight"],
                                           self.image_dict["ImageWidth"])),
                     ("TimeStamps", bytes, 8)]
     off = 8192 + buffer_number * self.image_dict["GroupingBytes"]
     top = np.memmap(self.top,
                     dtype=record_dtype,
                     offset=off,
                     shape=self.segment_prebuffer)
     bottom = np.memmap(self.bottom,
                        dtype=record_dtype,
                        offset=off,
                        shape=self.segment_prebuffer)
     d = da.concatenate((da.flip(top["Frame"], axis=0), bottom["Frame"]),
                        axis=0)
     return d["Frame"]
Пример #4
0
    def get_dataset(self, dataset_id, ds_info):
        """Load a dataset."""
        file_key = ds_info.get('file_key', dataset_id['name'])
        dsname = 'Grid/' + file_key
        data = self.get(dsname)
        data = data.squeeze().transpose()
        if data.ndim >= 2:
            data = data.rename({data.dims[-2]: 'y', data.dims[-1]: 'x'})
        data.data = da.flip(data.data, axis=0)

        fill = data.attrs['_FillValue']
        data = data.where(data != fill)

        for key in list(data.attrs.keys()):
            val = data.attrs[key]
            if isinstance(val, h5py.h5r.Reference):
                del data.attrs[key]
            if isinstance(val, np.ndarray):
                if isinstance(val[0][0], h5py.h5r.Reference):
                    del data.attrs[key]
        return data
Пример #5
0
def _read_mib(fp, hdr_info, mmap_mode='r'):
    """Read a raw .mib file using memory mapping where the array
    is stored on disk and not directly loaded, but may be treated
    like a numpy.ndarray.



    Parameters
    ----------
    fp: str
        Filepath of .mib file to be loaded.

    hdr_info: dict
        A dictionary containing the keywords as parsed by read_hdr
    mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional
        If not None, then memory-map the file, using the given mode
        (see `numpy.memmap`).  The mode has no effect for pickled or
        zipped files.

    Returns
    -------
    data : numpy.memmap

    """

    reader_offset = 0

    width = hdr_info['width']
    height = hdr_info['height']

    offset = hdr_info['offset']
    data_length = hdr_info['data-length']
    data_type = hdr_info['data-type']
    endian = hdr_info['byte-order']
    record_by = hdr_info['record-by']

    depth = _get_mib_depth(hdr_info, fp)

    if data_type == 'signed':
        data_type = 'int'
    elif data_type == 'unsigned':
        data_type = 'uint'
    elif data_type == 'float':
        pass
    else:
        raise TypeError('Unknown "data-type" string.')

    # mib data always big-endian
    endian = '>'

    data_type += str(int(data_length))
    if data_type == 'uint1':
        data_type = 'uint8'
        data_type = np.dtype(data_type)
    else:
        data_type = np.dtype(data_type)
    data_type = data_type.newbyteorder(endian)
    if data_length == '1':
        hdr_multiplier = 1
    else:
        hdr_multiplier = (int(data_length) / 8)**-1

    hdr_bits = int(hdr_info['data offset'] * hdr_multiplier)

    data = np.memmap(fp, offset=reader_offset, dtype=data_type, mode=mmap_mode)
    data = da.from_array(data)

    if record_by == 'vector':  # spectral image
        size = (height, width, depth)
        try:
            data = data.reshape(size)
        # in case of incomplete frame:
        except ValueError:
            if hdr_info['raw'] == 'R64':

                data = data.reshape(depth)

    elif record_by == 'image':  # stack of images
        width_height = width * height

        size = (depth, height, width)

        # remove headers at the beginning of each frame and reshape

        if hdr_info['Assembly Size'] == '2x2':
            if hdr_info['Counter Depth (number)'] == 1:
                # RAW 1 bit data: the header bits are written as uint8 but the frames
                # are binary and need to be unpacked as such.
                data = data.reshape(-1, width_height / 8 + hdr_bits)
                data = data[:, hdr_bits:]
                data = np.unpackbits(data)
                data = data.reshape(depth, width, height)
            else:
                data = data.reshape(-1, width_height +
                                    hdr_bits)[:, -width_height:].reshape(
                                        depth, width, height)
        elif hdr_info['Assembly Size'] == '1x1':
            data = data.reshape(-1, width_height +
                                hdr_bits)[:, -width_height:].reshape(
                                    depth, width, height)
            data = data.reshape(depth, 256, 256)

        if hdr_info['raw'] == 'R64':
            if hdr_info['Counter Depth (number)'] == 24 or hdr_info[
                    'Counter Depth (number)'] == 12:
                COLS = 4

            if hdr_info['Counter Depth (number)'] == 1:
                COLS = 64

            if hdr_info['Counter Depth (number)'] == 6:
                COLS = 8

            data = data.reshape((depth * width_height))

            data = data.reshape(depth, height * (height // COLS), COLS)

            data = da.flip(data, 2)

            if hdr_info['Assembly Size'] == '2x2':

                data = data.reshape((depth * width_height))
                data = data.reshape(depth, 512 // 2, 512 * 2)

                det1 = data[:, :, 0:256]
                det2 = data[:, :, 256:512]
                det3 = data[:, :, 512:512 + 256]
                det4 = data[:, :, 512 + 256:]

                det3 = da.flip(det3, 2)
                det3 = da.flip(det3, 1)

                det4 = da.flip(det4, 2)
                det4 = da.flip(det4, 1)

                data = da.concatenate((da.concatenate(
                    (det1, det3), 1), da.concatenate((det2, det4), 1)), 2)

    elif record_by == 'dont-care':  # stack of images
        size = (height, width)
        data = data.reshape(size)

    return data
Пример #6
0
def get_poss_bergs_fr_raster(onedem, usedask):

    # trans=onedem.attrs['transform']
    flipax = []
    # if trans[0] < 0:
    #     flipax.append(1)
    # if trans[4] < 0:
    #     flipax.append(0)
    if pd.Series(onedem.x).is_monotonic_decreasing:
        flipax.append(1)
    if pd.Series(onedem.y).is_monotonic_increasing:
        flipax.append(0)

    fjord = onedem.attrs['fjord']
    min_area = fjord_props.get_min_berg_area(fjord)
    res = onedem.attrs['res'][
        0]  #Note: the pixel area will be inaccurate if the resolution is not the same in x and y

    if usedask == True:
        # Daskify the iceberg segmentation process. Note that dask-image has some functionality to operate
        # directly on dask arrays (e.g. dask_image.ndfilters.sobel), which would need to be put into utils.raster.py
        # https://dask-image.readthedocs.io/en/latest/dask_image.ndfilters.html
        # However, as of yet there doesn't appear to be a way to easily implement the watershed segmentation, other than in chunks

        # print(onedem)
        # see else statement with non-dask version for descriptions of what each step is doing
        def seg_wrapper(tiles):
            return raster_ops.labeled_from_segmentation(tiles, [3, 10],
                                                        resolution=res,
                                                        min_area=min_area,
                                                        flipax=[])

        def filter_wrapper(tiles, elevs):
            return raster_ops.border_filtering(tiles, elevs, flipax=[])

        elev_copy = onedem.elevation.data  # should return a dask array
        for ax in flipax:
            elev_copy = da.flip(elev_copy, axis=ax)
        # print(type(elev_copy))

        elev_overlap = da.overlap.overlap(elev_copy,
                                          depth=10,
                                          boundary='nearest')
        seglabeled_overlap = da.map_overlap(
            seg_wrapper, elev_overlap,
            trim=False)  # including depth=10 here will ADD another overlap
        print("Got labeled raster of potential icebergs for an image")
        labeled_overlap = da.map_overlap(filter_wrapper,
                                         seglabeled_overlap,
                                         elev_overlap,
                                         trim=False,
                                         dtype='int32')
        labeled_arr = da.overlap.trim_overlap(labeled_overlap, depth=10)

        # re-flip the labeled_arr so it matches the orientation of the original elev data that's within the xarray
        for ax in flipax:
            labeled_arr = da.flip(labeled_arr, axis=ax)
        # import matplotlib.pyplot as plt
        # print(plt.imshow(labeled_arr))

        try:
            del elev_copy
            del elev_overlap
            del seglabeled_overlap
            del labeled_overlap
            print("deleted the intermediate steps")
        except NameError:
            pass

        # print(da.min(labeled_arr).compute())
        # print(da.max(labeled_arr).compute())

        print("about to get the list of possible bergs")
        print(
            'Please note the transform computation is very application specific (negative y coordinates) and may need generalizing'
        )
        print(
            "this transform computation is particularly sensitive to axis order (y,x) because it is accessed by index number"
        )

        poss_bergs_list = []
        '''
        # I think that by using concatenate=True, it might not actually be using dask for the computation
        
        def get_bergs(labeled_blocks):
            # Note: features.shapes returns a generator. However, if we try to iterate through it with a for loop, the StopIteration exception
            # is not passed up into the for loop and execution hangs when it hits the end of the for loop without completing the function
            block_bergs = list(poly[0]['coordinates'][0] for poly in rasterio.features.shapes(
                                labeled_blocks.astype('int32'), transform=onedem.attrs['transform']))[:-1]
            poss_bergs_list.append(block_bergs)
        
        da.blockwise(get_bergs, '', labeled_arr, 'ij', 
                        meta=pd.DataFrame({'c':[]}), concatenate=True).compute()
        # print(poss_bergs_list[0])
        # print(type(poss_bergs_list))
        
        poss_bergs_gdf = gpd.GeoDataFrame({'geometry':[Polygon(poly) for poly in poss_bergs_list[0]]})
        
        # another approach could be to try and coerce the output from map_blocks into an array, but I suspect you'd still have the geospatial issue
        # https://github.com/dask/dask/issues/3590#issuecomment-464609620

        '''

        # URL: https://stackoverflow.com/questions/66232232/produce-vector-output-from-a-dask-array/66245347?noredirect=1#comment117119583_66245347

        @dask.delayed
        def get_bergs(labeled_blocks, pointer, chunk0, chunk1):

            print("running the dask delayed function")

            def getpx(chunkid, chunksz):
                amin = chunkid[0] * chunksz[0][0]
                amax = amin + chunksz[0][0]
                bmin = chunkid[1] * chunksz[1][0]
                bmax = bmin + chunksz[1][0]
                return (amin, amax, bmin, bmax)

            # order of all inputs (and outputs) should be y, x when axis order is used
            chunksz = (onedem.chunks['y'], onedem.chunks['x'])
            # rasterio_trans = rasterio.transform.guard_transform(onedem.attrs["transform"])
            # print(rasterio_trans)
            ymini, ymaxi, xmini, xmaxi = getpx((chunk0, chunk1), chunksz)

            # print(chunk0, chunk1)
            # print(xmini)
            # print(xmaxi)
            # print(ymini)
            # print(ymaxi)

            # use rasterio Windows and rioxarray to construct transform
            # https://rasterio.readthedocs.io/en/latest/topics/windowed-rw.html#window-transforms
            chwindow = rasterio.windows.Window(xmini, ymini, xmaxi - xmini,
                                               ymaxi - ymini)
            trans = onedem.rio.isel_window(chwindow).rio.transform(recalc=True)
            # print(trans)

            return list(
                poly[0]['coordinates'][0] for poly in rasterio.features.shapes(
                    labeled_blocks.astype('int32'), transform=trans))[:-1]

        for __, obj in enumerate(labeled_arr.to_delayed()):
            for bl in obj:
                piece = dask.delayed(get_bergs)(bl, *bl.key)
                poss_bergs_list.append(piece)
                del piece

        poss_bergs_list = dask.compute(*poss_bergs_list)
        # tried working with this instead of the for loops above
        # poss_bergs_list = dask.compute([get_bergs(bl, *bl.key) for bl in obj for __, obj in enumerate(labeled_arr.to_delayed())])[0]
        # print(poss_bergs_list)

        # unnest the list of polygons returned by using dask to polygonize
        concat_list = [
            item for sublist in poss_bergs_list for item in sublist
            if len(item) != 0
        ]
        # print(concat_list)

        poss_bergs_gdf = gpd.GeoDataFrame(
            {'geometry': [Polygon(poly) for poly in concat_list]})

        # convert to a geodataframe, combine geometries (in case any bergs were on chunk borders), and generate new polygon list
        print(poss_bergs_gdf)
        # print(poss_bergs_gdf.geometry.plot())
        poss_berg_combined = gpd.overlay(poss_bergs_gdf,
                                         poss_bergs_gdf,
                                         how='union')
        # print(poss_berg_combined)
        # print(poss_berg_combined.geometry.plot())
        poss_bergs = [berg for berg in poss_berg_combined.geometry]
        # print(poss_bergs)
        print(len(poss_bergs))

        try:
            del labeled_arr
            del poss_bergs_list
            del concat_list
            del poss_berg_combined
        except NameError:
            pass

    else:
        print("NOT USING DASK")
        # create copy of elevation values so original dataset values are not impacted by image manipulations
        # and positive/negative coordinate systems can be ignored (note flipax=[] below)
        # something wonky is happening and when I ran this code on Pangeo I needed to NOT flip the elevation values here and then switch the bounding box y value order
        # Not entirely sure what's going on, but need to be aware of this!!
        # print("Note: check for proper orientation of results depending on compute environment. Pangeo results were upside down.")
        elev_copy = np.copy(np.flip(onedem.elevation.values, axis=flipax))
        # flipax=[]

        # generate a labeled array of potential iceberg features, excluding those that are too large or small
        seglabeled_arr = raster_ops.labeled_from_segmentation(
            elev_copy, [3, 10], resolution=res, min_area=min_area, flipax=[])
        print("Got labeled raster of potential icebergs for an image")
        # remove features whose borders are >50% no data values (i.e. the "iceberg" edge is really a DEM edge)
        labeled_arr = raster_ops.border_filtering(seglabeled_arr,
                                                  elev_copy,
                                                  flipax=[]).astype(
                                                      seglabeled_arr.dtype)
        # apparently rasterio can't handle int64 inputs, which is what border_filtering returns

        # import matplotlib.pyplot as plt
        # print(plt.imshow(labeled_arr))
        # create iceberg polygons
        # somehow a < 1 pixel berg made it into this list... I'm doing a secondary filtering by area in the iceberg filter step for now
        poss_bergs = list(
            poly[0]['coordinates'][0] for poly in rasterio.features.shapes(
                labeled_arr, transform=onedem.attrs['transform']))[:-1]

        try:
            del elev_copy
            del seglabeled_arr
            del labeled_arr

        except NameError:
            pass

    return poss_bergs
Пример #7
0
    def advection_step(self, time, output_time=False):
        """Perform forward-backward advection at a single point in time.

        This routine is responsible for creating a new ParticleSet at
        the given time, and performing the forward and backward
        advection steps in the Lagrangian transformation.

        Args:
            time (float): The point in time at which to calculate filtered data.
            output_time (Optional[bool]): Whether to include "time" as
                a numpy array in the output dictionary, for doing manual analysis.

        Note:
            If ``output_time`` is True, the output object will not be compatible
            with the default filtering workflow, :func:`~filter_step`!

            If ``output_dt`` has not been set on the filtering object,
            it will default to the difference between successive time
            steps in the first grid defined in the parcels
            FieldSet. This may be a concern if using data which has
            been sampled at different frequencies in the input data
            files.

        Returns:
            Dict[str, Tuple[int, dask.array.Array]]: A dictionary of the advection
                data, mapping variable names to a pair. The first element is
                the index of the sampled timestep in the data, and the
                second element is a lazy dask array concatenating the forward
                and backward advection data.

        """

        # seed all particles at gridpoints
        ps = self.particleset(time)
        # execute the sample-only kernel to efficiently grab the initial condition
        ps.kernel = self.sample_kernel
        ps.execute(self.sample_kernel, runtime=0, dt=self.advection_dt)

        # set up the temporary output file for the initial condition and
        # forward advection
        outfile = self._advection_cache_class(ps, self.output_dt,
                                              self.sample_variables,
                                              **self._advection_cache_kwargs)

        # now the forward advection kernel can run
        outfile.set_group("forward")
        ps.kernel = self.kernel
        ps.execute(
            self.kernel,
            runtime=self.window_size,
            dt=self.advection_dt,
            output_file=outfile,
            recovery={
                parcels.ErrorCode.ErrorOutOfBounds:
                _recovery_kernel_out_of_bounds
            },
        )

        # reseed particles back on the grid, then advect backwards
        # we don't need any initial condition sampling since we've already done it
        outfile.set_group("backward")
        ps = self.particleset(time)
        ps.kernel = self.kernel
        ps.execute(
            self.kernel,
            runtime=self.window_size,
            dt=-self.advection_dt,
            output_file=outfile,
            recovery={
                parcels.ErrorCode.ErrorOutOfBounds:
                _recovery_kernel_out_of_bounds
            },
        )

        # stitch together and filter all sample variables from the temporary
        # output data
        da_out = {}
        for v in self.sample_variables:
            # load data lazily as dask arrays, for forward and backward segments
            var_array_forward = da.from_array(outfile.data("forward")[v],
                                              chunks=(None, "auto"))[:-1, :]
            var_array_backward = da.from_array(outfile.data("backward")[v],
                                               chunks=(None, "auto"))[:-1, :]

            # get an index into the middle of the array
            time_index_data = var_array_backward.shape[0] - 1

            # construct proper sequence by concatenating data and flipping the backward segment
            # for var_array_forward, skip the initial output for both the sample-only and
            # sample-advection kernels, which have meaningless data
            var_array = da.concatenate((da.flip(var_array_backward[1:, :],
                                                axis=0), var_array_forward))

            da_out[v] = (time_index_data, var_array)

        if output_time:
            da_out["time"] = np.concatenate((
                outfile.data("backward").attrs["time"][1:-1][::-1],
                outfile.data("forward").attrs["time"][:-1],
            ))

        return da_out
Пример #8
0
    def filter_step(self, time_index, time):
        """Perform forward-backward advection at a single timestep."""

        # seed all particles at gridpoints
        ps = self.particleset(time)
        # execute the sample-only kernel to efficiently grab the initial condition
        ps.kernel = self.sample_kernel
        ps.execute(self.sample_kernel, runtime=0, dt=self.advection_dt)

        # set up the temporary output file for the initial condition and
        # forward advection
        outfile = LagrangeParticleFile(ps, self.output_dt,
                                       self.sample_variables)

        # now the forward advection kernel can run
        outfile.set_group("forward")
        ps.kernel = self.kernel
        ps.execute(
            self.kernel,
            runtime=self.window_size,
            dt=self.advection_dt,
            output_file=outfile,
            recovery={
                parcels.ErrorCode.ErrorOutOfBounds:
                recovery_kernel_out_of_bounds
            },
        )

        # reseed particles back on the grid, then advect backwards
        # we don't need any initial condition sampling since we've already done it
        outfile.set_group("backward")
        ps = self.particleset(time)
        ps.kernel = self.kernel
        ps.execute(
            self.kernel,
            runtime=self.window_size,
            dt=-self.advection_dt,
            output_file=outfile,
            recovery={
                parcels.ErrorCode.ErrorOutOfBounds:
                recovery_kernel_out_of_bounds
            },
        )

        # stitch together and filter all sample variables from the temporary
        # output data
        da_out = {}
        for v in self.sample_variables:
            # load data lazily as dask arrays, for forward and backward segments
            var_array_forward = da.from_array(outfile.data("forward")[v],
                                              chunks=(None, "auto"))
            var_array_backward = da.from_array(outfile.data("backward")[v],
                                               chunks=(None, "auto"))

            # get an index into the middle of the array
            time_index_data = var_array_backward.shape[0]

            # construct proper sequence by concatenating data and flipping the backward segment
            # for var_array_forward, skip the initial output for both the sample-only and
            # sample-advection kernels, which have meaningless data
            var_array = da.concatenate((da.flip(var_array_backward[1:, :],
                                                axis=0), var_array_forward))

            def filter_select(x):
                return signal.filtfilt(*self.inertial_filter,
                                       x)[..., time_index_data]

            # apply scipy filter as a ufunc
            # mapping an array to scalar over the first axis, automatically vectorize execution
            # and allow rechunking (since we have a chunk boundary across the first axis)
            filtered = da.apply_gufunc(
                filter_select,
                "(i)->()",
                var_array,
                axis=0,
                output_dtypes=var_array.dtype,
                allow_rechunk=True,
            )

            da_out[v] = filtered.compute()

        return da_out