Пример #1
0
def array2netcdf(
    dar: xr.DataArray, path: Optional[str] = None, **kwargs
) -> Optional[bytes]:
    """
    Write a data array to netcdf format.

    A special form of this function is provided to serialize any python
    objects in the attrs dict using cloud pickle.

    Parameters
    ----------
    dar
        Data Array containing continuous data
    path
        The path to save file to disk, if None, a byte rep. is returned
    kwargs
        Keyword args passed to to_netcdf function

    Notes
    ---------
    See :function: ~xr.DataArray.to_netcdf for supported kwargs
    """
    byte_str = cloudpickle.dumps(dar.attrs)
    old_attrs = dar.attrs
    dar.attrs = {"attrs": np.frombuffer(byte_str, dtype=np.int8)}
    out = dar.to_netcdf(path=path, **kwargs)
    dar.attrs = old_attrs
    return out
Пример #2
0
def set_attrs_for_station(
        field: xr.DataArray,
        field_record: typing.Dict
) -> xr.DataArray:
    """
    设置站点垂直剖面数据属性

    Parameters
    ----------
    field
    field_record

    Returns
    -------

    """
    if "lon_0" in field.attrs:
        lon_0 = field.attrs["lon_0"]
    else:
        lon_0 = field.longitude.item()

    if "lat_0" in field.attrs:
        lat_0 = field.attrs["lat_0"]
    else:
        lat_0 = field.latitude.item()

    field.attrs = {
        "name": field_record["name"],
        "long_name": field_record["long_name"],
        "units": field_record["units"],
        "lat_0": lon_0,
        "lon_0": lat_0,
    }
    return field
Пример #3
0
    def get_dataset(self, key, info):
        """Load a dataset."""
        if self._channel != key.name:
            return

        logger.debug('Reading %s.', key.name)
        # FIXME: get this from MTD_MSIL1C.xml
        quantification_value = 10000.
        jp2 = glymur.Jp2k(self.filename)
        bitdepth = 0
        for seg in jp2.codestream.segment:
            try:
                bitdepth = max(bitdepth, seg.bitdepth[0])
            except AttributeError:
                pass

        jp2.dtype = (np.uint8 if bitdepth <= 8 else np.uint16)

        # Initialize the jp2 reader / doesn't work in a multi-threaded context.
        # jp2[0, 0]
        # data = da.from_array(jp2, chunks=CHUNK_SIZE) / quantification_value * 100

        data = da.from_delayed(delayed(jp2.read)(), jp2.shape, jp2.dtype)
        data = data.rechunk(CHUNK_SIZE) / quantification_value * 100

        proj = DataArray(data, dims=['y', 'x'])
        proj.attrs = info.copy()
        proj.attrs['units'] = '%'
        proj.attrs['platform_name'] = self.platform_name
        return proj
Пример #4
0
    def from_tree(cls, tree, ctx):
        """
        Converts basic types representing YAML trees into an 'xarray.DataArray'.

        Parameters
        ----------
        tree :
            An instance of a basic Python type (possibly nested) that
            corresponds to a YAML subtree.
        ctx :
            An instance of the 'AsdfFile' object that is being constructed.

        Returns
        -------
        xarray.DataArray :
            An instance of the 'xarray.DataArray' type.

        """
        data = tree["data"].data
        dims = tree["data"].dimensions
        coords = {}
        for coordinate in tree["coordinates"]:
            coords[coordinate.name] = (coordinate.dimensions, coordinate.data)

        obj = DataArray(data=data, coords=coords, dims=dims)

        obj.attrs = tree["attributes"]

        return obj
Пример #5
0
def scale_and_clip_dataarray(dataarray: xr.DataArray,
                             *,
                             scale_factor=1,
                             add_offset=0,
                             clip_range=None,
                             valid_range=None,
                             new_nodata=-999,
                             new_dtype='int16'):
    orig_attrs = dataarray.attrs
    nodata = dataarray.attrs['nodata']

    mask = dataarray.data == nodata

    # add another mask here for if data > 10000 then also make that nodata
    dataarray = dataarray * scale_factor + add_offset

    if clip_range is not None:
        dataarray = dataarray.clip(*clip_range)

    dataarray = dataarray.astype(new_dtype)

    dataarray.data[mask] = new_nodata
    if valid_range is not None:
        valid_min, valid_max = valid_range
        dataarray = dataarray.where(dataarray >= valid_min, new_nodata)
        dataarray = dataarray.where(dataarray <= valid_max, new_nodata)
    dataarray.attrs = orig_attrs
    dataarray.attrs['nodata'] = new_nodata

    return dataarray
Пример #6
0
    def read(self):
        """

        :return: DataArray objects populated with data read from eeg files. The size of the output is
        number of channels x number of start offsets x number of time series points
        The corresponding DataArray axes are: 'channels', 'start_offsets', 'offsets'

        """

        eventdata, read_ok_mask = self.read_file(self.dataroot, self.channels,
                                                 self.start_offsets,
                                                 self.read_size)
        # multiply by the gain
        eventdata *= self.params_dict['gain']

        eventdata = DataArray(
            eventdata,
            dims=[self.channel_name, 'start_offsets', 'offsets'],
            coords={
                self.channel_name: self.channels,
                'start_offsets': self.start_offsets.copy(),
                'offsets': np.arange(self.read_size),
                'samplerate': self.params_dict['samplerate']
            })

        from copy import deepcopy
        eventdata.attrs = deepcopy(self.params_dict)

        return eventdata, read_ok_mask
Пример #7
0
    def get_dataset(self, key, info):
        """Load a dataset."""
        if self._channel != key['name']:
            return

        logger.debug('Reading %s.', key['name'])
        # FIXME: get this from MTD_MSIL1C.xml
        quantification_value = 10000.
        jp2 = glymur.Jp2k(self.filename)
        bitdepth = 0
        for seg in jp2.codestream.segment:
            try:
                bitdepth = max(bitdepth, seg.bitdepth[0])
            except AttributeError:
                pass

        jp2.dtype = (np.uint8 if bitdepth <= 8 else np.uint16)

        # Initialize the jp2 reader / doesn't work in a multi-threaded context.
        # jp2[0, 0]
        # data = da.from_array(jp2, chunks=CHUNK_SIZE) / quantification_value * 100

        data = da.from_delayed(delayed(jp2.read)(), jp2.shape, jp2.dtype)
        data = data.rechunk(CHUNK_SIZE) / quantification_value * 100

        proj = DataArray(data, dims=['y', 'x'])
        proj.attrs = info.copy()
        proj.attrs['units'] = '%'
        proj.attrs['platform_name'] = self.platform_name
        return proj
Пример #8
0
    def read(self):
        """

        :return: DataArray objects populated with data read from eeg files. The size of the output is
        number of channels x number of start offsets x number of time series points
        The corresponding DataArray axes are: 'channels', 'start_offsets', 'offsets'

        """

        eventdata, read_ok_mask = self.read_file(self.dataroot,self.channels,self.start_offsets,self.read_size)
        # multiply by the gain
        eventdata *= self.params_dict['gain']

        eventdata = DataArray(eventdata,
                              dims=[self.channel_name, 'start_offsets', 'offsets'],
                              coords={
                                  self.channel_name: self.channels,
                                  'start_offsets': self.start_offsets.copy(),
                                  'offsets': np.arange(self.read_size),
                                  'samplerate': self.params_dict['samplerate']

                              }
                              )

        from copy import deepcopy
        eventdata.attrs = deepcopy(self.params_dict)

        return eventdata, read_ok_mask
Пример #9
0
def deg2mpm(da: xr.DataArray) -> xr.DataArray:
    """Convert ``xarray.Data[Array,set]`` from degree to meter/meter."""
    attrs = da.attrs
    da = np.tan(np.deg2rad(da))
    da.attrs = attrs
    da.name = "slope"
    da.attrs["units"] = "meters/meters"
    return da
Пример #10
0
    def _to_file(self, data: xr.DataArray) -> None:
        """Save the model sequences to file

        Create the desired parent directory and delete existing file with same name, if necessary, and then dump to file.
        """
        assert isinstance(data, xr.DataArray), "data must be data array"

        data.attrs = self._get_attributes()  # save all the model parameters
        if os.path.isfile(self._get_filename()):
            os.remove(self._get_filename())
        data.to_netcdf(self._get_filename(),
                       format="netCDF4",
                       engine="netcdf4")
Пример #11
0
def set_attrs_for_lat_section(
        field: xr.DataArray,
        field_record: typing.Dict
) -> xr.DataArray:
    if "lon_0" in field.attrs:
        lon_0 = field.attrs["lon_0"]
    else:
        lon_0 = field.longitude.item()
    field.attrs = {
        "name": field_record["name"],
        "long_name": field_record["long_name"],
        "units": field_record["units"],
        "lon_0": lon_0,
    }
    return field
Пример #12
0
    def get_dataset(self, key, info):
        """Get the dataset refered to by `key`."""
        angles = self._get_coarse_dataset(key, info)
        if angles is None:
            return

        # Fill gaps at edges of swath
        darr = DataArray(angles, dims=['y', 'x'])
        darr = darr.bfill('x')
        darr = darr.ffill('x')
        angles = darr.data

        res = self.interpolate_angles(angles, key['resolution'])

        proj = DataArray(res, dims=['y', 'x'])
        proj.attrs = info.copy()
        proj.attrs['units'] = 'degrees'
        proj.attrs['platform_name'] = self.platform_name
        return proj
Пример #13
0
    def read(self):
        """Read EEG data.

        Returns
        -------
        event_data : DataArray
            Populated with data read from eeg files. The size of the output is
            number of channels * number of start offsets * number of time series
            points. The corresponding DataArray axes are: 'channels',
            'start_offsets', 'offsets'
        read_ok_mask : np.ndarray
            Mask of chunks that were properly read.

        Notes
        -----
        This method should *not* be overridden by subclasses. Instead, override
        the :meth:`read_file` method to implement new file types (see for
        example the HDF5 reader).

        """

        eventdata, read_ok_mask = self.read_file(self.dataroot,
                                                 self.channel_labels,
                                                 self.start_offsets,
                                                 self.read_size)
        # multiply by the gain
        eventdata *= self.params_dict['gain']

        eventdata = DataArray(eventdata,
                              dims=[self.channel_name, 'start_offsets', 'offsets'],
                              coords={
                                  self.channel_name: self.channels,
                                  'start_offsets': self.start_offsets.copy(),
                                  'offsets': np.arange(self.read_size),
                                  'samplerate': self.params_dict['samplerate']
                              }
                              )

        from copy import deepcopy
        eventdata.attrs = deepcopy(self.params_dict)

        return eventdata, read_ok_mask
Пример #14
0
    def get_dataset(self, key, info):
        """Get the dataset refered to by `key`."""

        angles = self._get_coarse_dataset(key, info)
        if angles is None:
            return

        # Fill gaps at edges of swath
        darr = DataArray(angles, dims=['y', 'x'])
        darr = darr.bfill('x')
        darr = darr.ffill('x')
        angles = darr.data

        res = self.interpolate_angles(angles, key.resolution)

        proj = DataArray(res, dims=['y', 'x'])
        proj.attrs = info.copy()
        proj.attrs['units'] = 'degrees'
        proj.attrs['platform_name'] = self.platform_name
        return proj
Пример #15
0
def agg_time(array: xr.DataArray,
             ndayagg: int = 1,
             method: str = 'mean',
             firstday: pd.Timestamp = None,
             rolling: bool = False) -> xr.DataArray:
    """
    Aggegates a daily time dimension, that should be continuous, otherwise non-neighbouring values are taken together. 
    It returns a left stamped aggregation of ndays
    For non-rolling aggregation it is possible to supply a firstday, to sync the blocks with another timeseries.
    Trailing Nan's are removed.
    """
    assert (np.diff(array.time) == np.timedelta64(1, 'D')).all(
    ), "time axis should be a continuous daily to be aggregated, though nan is allowed"
    if not firstday is None:
        array = array.sel(time=slice(firstday, None))
    if rolling:
        name = array.name
        attrs = array.attrs
        f = getattr(array.rolling({'time': ndayagg}, center=False),
                    method)  # Stamped right
        array = f()
        array = array.assign_coords(
            time=array.time - pd.Timedelta(str(ndayagg - 1) + 'D')).isel(
                time=slice(ndayagg - 1,
                           None))  # Left stamping, trailing nans removed
        array.name = name
        array.attrs = attrs
    else:
        input_length = len(array.time)
        f = getattr(
            array.resample(time=str(ndayagg) + 'D',
                           closed='left',
                           label='left'), method)
        array = f(dim='time', keep_attrs=True, skipna=False)

        if (input_length % ndayagg) != 0:
            array = array.isel(
                time=slice(0, -1, None)
            )  # Remove the last aggregation, if it has not been based on the full ndayagg

    return array
Пример #16
0
def test_weighted_operations_keep_attr(operation, as_dataset, keep_attrs):

    weights = DataArray(np.random.randn(2, 2), attrs=dict(attr="weights"))
    data = DataArray(np.random.randn(2, 2))

    if as_dataset:
        data = data.to_dataset(name="data")

    data.attrs = dict(attr="weights")

    result = getattr(data.weighted(weights), operation)(keep_attrs=True)

    if operation == "sum_of_weights":
        assert weights.attrs == result.attrs
    else:
        assert data.attrs == result.attrs

    result = getattr(data.weighted(weights), operation)(keep_attrs=None)
    assert not result.attrs

    result = getattr(data.weighted(weights), operation)(keep_attrs=False)
    assert not result.attrs
Пример #17
0
def test_weighted_operations_keep_attr(operation, as_dataset, keep_attrs):

    weights = DataArray(np.random.randn(2, 2), attrs=dict(attr="weights"))
    data = DataArray(np.random.randn(2, 2))

    if as_dataset:
        data = data.to_dataset(name="data")

    data.attrs = dict(attr="weights")

    kwargs = {"keep_attrs": keep_attrs}
    if operation == "quantile":
        kwargs["q"] = 0.5

    result = getattr(data.weighted(weights), operation)(**kwargs)

    if operation == "sum_of_weights":
        assert result.attrs == (weights.attrs if keep_attrs else {})
        assert result.attrs == (weights.attrs if keep_attrs else {})
    else:
        assert result.attrs == (weights.attrs if keep_attrs else {})
        assert result.attrs == (data.attrs if keep_attrs else {})
Пример #18
0
    def _get_data(data: xr.DataArray, dataset_id: dict) -> xr.DataArray:
        """Get a dataset."""
        if dataset_id.get('resolution'):
            data.attrs['resolution'] = dataset_id['resolution']

        attrs = data.attrs.copy()

        fill = attrs.get('_FillValue')
        factor = attrs.pop('scale_factor', (np.ones(1, dtype=data.dtype))[0])
        offset = attrs.pop('add_offset', (np.zeros(1, dtype=data.dtype))[0])
        valid_range = attrs.get('valid_range', [None])
        if isinstance(valid_range, np.ndarray):
            attrs["valid_range"] = valid_range.tolist()

        flags = not data.attrs.get("SCALED", 1) and any(
            data.attrs.get("flag_values", [None]))
        if not flags:
            data = data.where(data != fill)
            data = _CLAVRxHelper._scale_data(data, factor, offset)
            # don't need _FillValue if it has been applied.
            attrs.pop('_FillValue', None)

        if all(valid_range):
            valid_min = _CLAVRxHelper._scale_data(valid_range[0], factor,
                                                  offset)
            valid_max = _CLAVRxHelper._scale_data(valid_range[1], factor,
                                                  offset)
            if flags:
                data = data.where((data >= valid_min) & (data <= valid_max),
                                  fill)
            else:
                data = data.where((data >= valid_min) & (data <= valid_max))
            attrs['valid_range'] = [valid_min, valid_max]

        data.attrs = _remove_attributes(attrs)

        return data
Пример #19
0
def grid_data(
    x,
    y,
    var,
    bins=None,
    how="mean",
    interp_lim=6,
    verbose=True,
    return_xarray=True,
):
    """
    Grids the input variable to bins for depth/dens (y) and time/dive (x).
    The bins can be specified to be non-uniform to adapt to variable sampling
    intervals of the profile. It is useful to use the ``gt.plot.bin_size``
    function to identify the sampling intervals. The bins are averaged (mean)
    by default but can also be the ``median, std, count``,

    Parameters
    ----------
    x : np.array, dtype=float, shape=[n, ]
        The horizontal values by which to bin need to be in a psudeo discrete
        format already. Dive number or ``time_average_per_dive`` are the
        standard inputs for this variable. Has ``p`` unique values.
    y : np.array, dtype=float, shape=[n, ]
        The vertical values that will be binned; typically depth, but can also
        be density or any other variable.
    bins : np.array, dtype=float; shape=[q, ], default=[0 : 1 : max_depth ]
        Define the bin edges for y with this function. If not defined, defaults
        to one meter bins.
    how : str, defualt='mean'
        the string form of a function that can be applied to pandas.Groupby
        objects. These include ``mean, median, std, count``.
    interp_lim : int, default=6
        sets the maximum extent to which NaNs will be filled.

    Returns
    -------
    glider_section : xarray.DataArray, shape=[p, q]
        A 2D section in the format specified by ``ax_xarray`` input.

    Raises
    ------
    Userwarning
        Triggers when ``x`` does not have discrete values.
    """
    from numpy import array, c_, diff, unique
    from pandas import Series, cut
    from xarray import DataArray

    xvar, yvar = x.copy(), y.copy()
    z = Series(var)
    y = array(y)
    x = array(x)

    u = unique(x).size
    s = x.size
    if (u / s) > 0.2:
        raise UserWarning(
            "The x input array must be psuedo discrete (dives or dive_time). "
            "{:.0f}% of x is unique (max 20% unique)".format(u / s * 100))

    chunk_depth = 50
    # -DB this might not work if the user uses anything other than depth, example
    # density. Chunk_depth would in that case apply to density, which will
    # probably have a range that is much smaller than 50.
    optimal_bins, avg_sample_freq = get_optimal_bins(y, chunk_depth)
    if bins is None:
        bins = optimal_bins

    # warning if bin average is smaller than average bin size
    # -DB this is not being raised as a warning. Instead just seems like useful
    # information conveyed to user. Further none of this works out if y is not
    # depth, since avg_sample freq will not make sense otherwise.
    if verbose:
        avg_bin_size = diff(bins).mean()
        print(("Mean bin size = {:.2f}\n"
               "Mean depth binned ({} m) vertical sampling frequency = {:.2f}"
               ).format(avg_bin_size, chunk_depth, avg_sample_freq))

    labels = c_[bins[:-1],
                bins[1:]].mean(axis=1)  # -DB creates the mean bin values
    bins = cut(y, bins, labels=labels)
    # -DB creates a new variable where instead of variable the bin category
    # is mentioned (sort of like a discretization)

    grp = Series(z).groupby([x, bins
                             ])  # -DB put z into the many bins (like 2D hist)
    grp_agg = getattr(
        grp, how)()  # -DB basically does grp.how() or in this case grp.mean()
    gridded = grp_agg.unstack(level=0)
    gridded = gridded.reindex(labels.astype(float))

    if interp_lim > 0:
        gridded = gridded.interpolate(limit=interp_lim).bfill(limit=interp_lim)

    if not return_xarray:
        return gridded

    if return_xarray:
        dummy = transfer_nc_attrs(getframe(), var, var, "_vert_binned")

        xda = DataArray(gridded)
        if isinstance(var, DataArray):
            xda.attrs = dummy.attrs
            xda.name = dummy.name

        if isinstance(yvar, DataArray):
            y = xda.dims[0]
            xda[y].attrs = yvar.attrs
            xda = xda.rename({y: yvar.name})

        if isinstance(xvar, DataArray):
            x = xda.dims[1]
            xda[x].attrs = xvar.attrs
            xda = xda.rename({x: xvar.name})

        return xda
Пример #20
0
    def read(self):
        """

        :return: DataArray objects populated with data read from eeg files. The size of the output is
        number of channels x number of start offsets x number of time series points
        The corresponding DataArray axes are: 'channels', 'start_offsets', 'offsets'

        """

        if self.read_size < 0:
            self.read_size = int(self.get_file_size() / self.file_format.data_size)

        # allocate space for data
        eventdata = np.empty((len(self.channels), len(self.start_offsets), self.read_size),
                             dtype=np.float) * np.nan

        read_ok_mask = np.ones(shape=(len(self.channels), len(self.start_offsets)), dtype=np.bool)

        # loop over channels
        for c, channel in enumerate(self.channels):
            try:
                eegfname = self.dataroot + '.' + channel
            except TypeError:
                eegfname = self.dataroot + '.' + channel.decode()

            with open(eegfname, 'rb') as efile:
                # loop over start offsets
                for e, start_offset in enumerate(self.start_offsets):
                    # rejecting negative offset
                    if start_offset < 0:
                        read_ok_mask[c, e] = False
                        print(('Cannot read from negative offset %d in file %s' % (start_offset, eegfname)))
                        continue

                    # seek to the position in the file
                    efile.seek(self.file_format.data_size * start_offset, 0)

                    # read the data
                    data = efile.read(int(self.file_format.data_size * self.read_size))

                    # convert from string to array based on the format
                    # hard-codes little endian
                    fmt = '<' + str(int(len(data) / self.file_format.data_size)) + self.file_format.format_string
                    data = np.array(struct.unpack(fmt, data))

                    # make sure we got some data
                    if len(data) < self.read_size:
                        read_ok_mask[c, e] = False

                        print((
                            'Cannot read full chunk of data for offset ' + str(start_offset) +
                            'End of read interval  is outside the bounds of file ' + str(eegfname)))
                    else:
                        # append it to the eventdata
                        eventdata[c, e, :] = data

        # multiply by the gain
        eventdata *= self.params_dict['gain']

        eventdata = DataArray(eventdata,
                              dims=['channels', 'start_offsets', 'offsets'],
                              coords={
                                  'channels': self.channels,
                                  'start_offsets': self.start_offsets.copy(),
                                  'offsets': np.arange(self.read_size),
                                  'samplerate': self.params_dict['samplerate']

                              }
                              )

        from copy import deepcopy
        eventdata.attrs = deepcopy(self.params_dict)

        return eventdata, read_ok_mask