Пример #1
0
 def convert(self, unit, density=998.2, molar_mass=12.011):
     """Using cf_units (UDUNITS2) convert the unit in place
     - handles ( M L-2 T-1 ) --> ( L T-1 ), assuming water
     - handles (       mol ) --> (     M ), assuming carbon
     """
     if 'units' not in self.ds[self.varname].attrs:
         msg = "Cannot convert the units of the DataArray lacking the 'units' attribute"
         raise ValueError(msg)
     src_unit = Unit(self.ds[self.varname].units)
     tar_unit = Unit(unit)
     mass_density = Unit("kg m-3")
     molar_density = Unit("g mol-1")
     if ((src_unit / tar_unit) / mass_density).is_dimensionless():
         self.ds[self.varname] /= density
         src_unit /= mass_density
     elif ((tar_unit / src_unit) / mass_density).is_dimensionless():
         self.ds[self.varname] *= density
         src_unit *= mass_density
     if ((src_unit / tar_unit) / molar_density).is_dimensionless():
         self.ds[self.varname] /= molar_mass
         src_unit /= molar_density
     elif ((tar_unit / src_unit) / molar_density).is_dimensionless():
         self.ds[self.varname] *= molar_mass
         src_unit *= molar_density
     src_unit.convert(self.ds[self.varname].data, tar_unit, inplace=True)
     self.ds[self.varname].attrs['units'] = unit
     if "bounds" in self.ds[self.varname].attrs:
         if self.ds[self.varname].attrs['bounds'] in self.ds:
             src_unit.convert(
                 self.ds[self.ds[self.varname].attrs['bounds']].data,
                 tar_unit,
                 inplace=True)
     return self
Пример #2
0
def unit_conversion_fac(from_unit, to_unit):
    """Returns multiplicative unit conversion factor for input units
    
    Note
    ----
    Input must be either instances of :class:`cf_units.Unit` class or string.
    
    Parameters
    ----------
    from_unit : :obj:`cf_units.Unit`, or :obj:`str`
        unit to be converted
    to_unit : :obj:`cf_units.Unit`, or :obj:`str`
        final unit
        
    Returns
    --------
    float
        multiplicative conversion factor
        
    Raises
    ------
    ValueError
        if units cannot be converted into each other using cf_units package
    """
    if isinstance(from_unit, str):
        from_unit = Unit(from_unit)

    return from_unit.convert(1, to_unit)
Пример #3
0
    def convert(self, tar_unit):
        unit0 = Unit(self.unit)
        unit1 = Unit(tar_unit)
        data = self.data
        self.data = unit0.convert(data, unit1)
        self.unit = tar_unit

        return self
Пример #4
0
def unit_converter(data, inunit, outunit):
    """
    Unit converter. Takes an (numpy) array, valid udunits inunits and outunits
    as strings, and returns the array in outunits.

    Parameters
    ----------
    data : array_like
    inunit : string
             unit to convert from, must be UDUNITS-compatible string
    outunit : string
              unit to conver to, must be UDUNITS-compatible string

    Returns
    -------
    out : array_like

    Example
    -------
    >>> import numpy as np
    >>> c = Converter("kg","Gt")
    >>> out = c(np.array([1,2])*1e12)
    >>> out = array([ 1.,  2.])
    """

    inunit = str(inunit)
    outunit = str(outunit)
    if isinstance(data, np.ma.MaskedArray):
        mask = data.mask
    else:
        mask = None
    data = np.array(data)
    if not (inunit == outunit):
        try:
            try:
                from cf_units import Unit

                in_unit = Unit(inunit)
                out_unit = Unit(outunit)
                outdata = in_unit.convert(data, out_unit)
            except:
                from udunits2 import Converter, System, Unit

                sys = System()
                c = Converter((Unit(sys, inunit), Unit(sys, outunit)))
                outdata = c(data)
        except:
            print("Neither cf_units or udunits2 module found, you're on your own.")
            c = 1.0 / 1e3
            outdata = c * data
    else:
        outdata = data

    if mask is not None:
        return np.ma.array(outdata, mask=mask)
    else:
        return outdata
Пример #5
0
def unit_converter(data, inunit, outunit):
    '''
    Unit converter. Takes an (numpy) array, valid udunits inunits and outunits
    as strings, and returns the array in outunits.

    Parameters
    ----------
    data : array_like
    inunit : string
             unit to convert from, must be UDUNITS-compatible string
    outunit : string
              unit to conver to, must be UDUNITS-compatible string

    Returns
    -------
    out : array_like

    Example
    -------
    >>> import numpy as np
    >>> c = Converter("kg","Gt")
    >>> out = c(np.array([1,2])*1e12)
    >>> out = array([ 1.,  2.])
    '''

    inunit = str(inunit)
    outunit = str(outunit)
    if isinstance(data, np.ma.MaskedArray):
        mask = data.mask
    else:
        mask = None
    data = np.array(data)
    if not (inunit == outunit):
        try:
            try:
                from cf_units import Unit
                in_unit  = Unit(inunit)
                out_unit  = Unit(outunit)
                outdata = in_unit.convert(data, out_unit)
            except:
                from udunits2 import Converter, System, Unit
                sys = System()
                c = Converter((Unit(sys, inunit), Unit(sys, outunit)))
                outdata = c(data)
        except:
            print(
                "Neither cf_units or udunits2 module found, you're on your own.")
            c = 1. / 1e3
            outdata = c * data
    else:
        outdata = data

    if mask is not None:
        return np.ma.array(outdata, mask=mask)
    else:
        return outdata
Пример #6
0
    def _apply_minimum_precip_rate(self, precip_cube, cube):
        """Ensure that negative precipitation rates are capped at the defined
        minimum precipitation rate.

        Args:
            precip_cube (iris.cube.Cube):
                Cube containing a precipitation rate input field.
            cube (iris.cube.Cube):
                Cube containing the precipitation rate field after combining
                with orographic enhancement.

        Returns:
            iris.cube.Cube:
                Cube containing the precipitation rate field where any
                negative precipitation rates have been capped at the defined
                minimum precipitation rate.

        """
        if self.operation == "subtract":
            original_units = Unit("mm/hr")
            threshold_in_cube_units = original_units.convert(
                self.min_precip_rate_mmh, cube.units
            )
            threshold_in_precip_cube_units = original_units.convert(
                self.min_precip_rate_mmh, precip_cube.units
            )

            # Ignore invalid warnings generated if e.g. a NaN is encountered
            # within the less than (<) comparison.
            with np.errstate(invalid="ignore"):
                # Create a mask computed from where the input precipitation
                # cube is greater or equal to the threshold and the result
                # of combining the precipitation rate input cube with the
                # orographic enhancement has generated a cube with
                # precipitation rates less than the threshold.
                mask = (precip_cube.data >= threshold_in_precip_cube_units) & (
                    cube.data <= threshold_in_cube_units
                )

                # Set any values lower than the threshold to be equal to
                # the minimum precipitation rate.
                cube.data[mask] = threshold_in_cube_units
        return cube
Пример #7
0
    def get_vertical_extent(self, ds, data_dict):
        global_atts = ds.metadata_mapping["NC_GLOBAL"]
        try:
            if global_atts["geospatial_vertical_positive"] == "down":
                sign = 1
            elif global_atts["geospatial_vertical_positive"] == "up":
                sign = -1

            units = Unit(global_atts["geospatial_vertical_units"])

            # convert to meters
            #unit_conv = Unit(ds.geospatial_vertical_units) / Unit("m")

            m = Unit("meters")
            orig_units = Unit(global_atts["geospatial_vertical_units"]) * sign

            extra_keys = {kvp["key"] for kvp in data_dict["extras"]}
            if "vertical_min" not in extra_keys:
                converted_min = orig_units.convert(
                    float(global_atts["geospatial_vertical_min"]), m)
                if not isnan(converted_min):
                    data_dict["extras"].append({
                        "key": "vertical_min",
                        "value": str(converted_min)
                    })
                else:
                    log.warning("vertical_min was NaN, skipping")
            if "vertical_max" not in extra_keys:
                converted_max = orig_units.convert(
                    float(global_atts["geospatial_vertical_max"]), m)
                if not isnan(converted_max):
                    data_dict["extras"].append({
                        "key": "vertical_max",
                        "value": str(converted_max)
                    })
                else:
                    log.warning("vertical_max was NaN, skipping")
            log.info("PASS")
        except (AttributeError, ValueError, KeyError) as e:
            log.exception(
                "Encountered attribute error when attempting to get vertical bounds of OPeNDAP dataset"
            )
Пример #8
0
    def _getGridInformation(self):
        """Looks in the model output for cell areas as well as land fractions.
        """
        def _shiftLon(lon):
            return (lon <=
                    180) * lon + (lon > 180) * (lon - 360) + (lon < -180) * 360

        # Are there cell areas associated with this model?
        area_name = None
        area_name = "area" if "area" in self.variables.keys() else area_name
        area_name = "areacella" if "areacella" in self.variables.keys(
        ) else area_name
        if area_name is not None:
            with Dataset(self.variables[area_name][0]) as f:
                A = f.variables[area_name]
                unit = Unit(A.units) if "units" in A.ncattrs() else Unit("m2")
                self.cell_areas = unit.convert(A[...], "m2", inplace=True)
        else:
            if not ("lat_bnds" in self.variables.keys()
                    and "lon_bnds" in self.variables.keys()):
                return
            with Dataset(self.variables["lat_bnds"][0]) as f:
                x = f.variables["lat_bnds"][...]
            with Dataset(self.variables["lon_bnds"][0]) as f:
                y = f.variables["lon_bnds"][...]
                s = y.mean(axis=1).argmin()
                y = np.roll(_shiftLon(y), -s, axis=0)
                if y[0, 0] > y[0, 1]: y[0, 0] = -180.
                if y[-1, 0] > y[-1, 1]: y[-1, 1] = +180.
            self.cell_areas = il.CellAreas(None, None, lat_bnds=x, lon_bnds=y)

        # Now we do the same for land fractions
        frac_name = None
        frac_name = "landfrac" if "landfrac" in self.variables.keys(
        ) else frac_name
        frac_name = "sftlf" if "sftlf" in self.variables.keys() else frac_name
        if frac_name is None:
            self.land_areas = self.cell_areas
        else:
            with Dataset(self.variables[frac_name][0]) as f:
                self.land_fraction = f.variables[frac_name][...]
            # some models represent the fraction as a percent
            if np.ma.max(self.land_fraction) > 10: self.land_fraction *= 0.01
            with np.errstate(over='ignore', under='ignore'):
                if not np.allclose(self.cell_areas.shape,
                                   self.land_fraction.shape):
                    msg = "The model %s has areacella %s which is a different shape than sftlf %s" % (
                        self.name, str(self.cell_areas.shape),
                        str(self.land_fraction.shape))
                    raise ValueError(msg)
                self.land_areas = self.cell_areas * self.land_fraction
        self.land_area = np.ma.sum(self.land_areas)
        return
Пример #9
0
 def test_gregorian_calendar_conversion_setup(self):
     # Reproduces a situation where a unit's gregorian calendar would not
     # match (using the `is` operator) to the literal string 'gregorian',
     # causing an `is not` test to return a false negative.
     cal_str = cf_units.CALENDAR_GREGORIAN
     calendar = self.MyStr(cal_str)
     self.assertIsNot(calendar, cal_str)
     u1 = Unit('hours since 1970-01-01 00:00:00', calendar=calendar)
     u2 = Unit('hours since 1969-11-30 00:00:00', calendar=calendar)
     u1point = np.array([8.], dtype=np.float32)
     expected = np.array([776.], dtype=np.float32)
     result = u1.convert(u1point, u2)
     return expected, result
Пример #10
0
 def test_gregorian_calendar_conversion_setup(self):
     # Reproduces a situation where a unit's gregorian calendar would not
     # match (using the `is` operator) to the literal string 'gregorian',
     # causing an `is not` test to return a false negative.
     cal_str = cf_units.CALENDAR_GREGORIAN
     calendar = self.MyStr(cal_str)
     self.assertIsNot(calendar, cal_str)
     u1 = Unit('hours since 1970-01-01 00:00:00', calendar=calendar)
     u2 = Unit('hours since 1969-11-30 00:00:00', calendar=calendar)
     u1point = np.array([8.], dtype=np.float32)
     expected = np.array([776.], dtype=np.float32)
     result = u1.convert(u1point, u2)
     return expected, result
Пример #11
0
 def setUp(self):
     """Set up cubes for testing. This includes a 'subtracted_cube'
     containing some negative precipitation values that should be
     set to a minimum precipitation rate threshold."""
     self.precip_cube = set_up_precipitation_rate_cubelist()[0]
     oe_cube = set_up_orographic_enhancement_cube()[0]
     # Cap orographic enhancement to be zero where there is a precipitation
     # rate of zero.
     original_units = Unit("mm/hr")
     threshold_in_cube_units = (original_units.convert(
         MIN_PRECIP_RATE_MMH, self.precip_cube.units))
     oe_cube.data[self.precip_cube.data < threshold_in_cube_units] = 0.
     self.oe_cube = oe_cube
     self.added_cube = self.precip_cube + oe_cube
     self.subtracted_cube = self.precip_cube - oe_cube
Пример #12
0
    def _apply_orographic_enhancement(self, precip_cube, oe_cube):
        """Combine the precipitation rate cube and the orographic enhancement
        cube.

        Args:
            precip_cube (iris.cube.Cube):
                Cube containing the input precipitation field.
            oe_cube (iris.cube.Cube):
                Cube containing the orographic enhancement field matching
                the validity time of the precipitation cube.

        Returns:
            iris.cube.Cube:
                Cube containing the precipitation rate field modified by the
                orographic enhancement cube.

        """
        # Convert orographic enhancement into the units of the precipitation
        # rate cube.
        oe_cube.convert_units(precip_cube.units)

        # Set orographic enhancement to be zero for points with a
        # precipitation rate of < 1/32 mm/hr.
        original_units = Unit("mm/hr")
        threshold_in_cube_units = original_units.convert(
            self.min_precip_rate_mmh, precip_cube.units
        )

        # Ignore invalid warnings generated if e.g. a NaN is encountered
        # within the less than (<) comparison.
        with np.errstate(invalid="ignore"):
            oe_cube.data[precip_cube.data < threshold_in_cube_units] = 0.0

        # Add / subtract orographic enhancement where data is not masked
        cube = precip_cube.copy()
        if self.operation == "add":
            cube.data = cube.data + oe_cube.data
        elif self.operation == "subtract":
            cube.data = cube.data - oe_cube.data
        else:
            msg = (
                "Operation '{}' not supported for combining "
                "precipitation rate and "
                "orographic enhancement.".format(self.operation)
            )
            raise ValueError(msg)

        return cube
Пример #13
0
    def test_non_gregorian_calendar_conversion_dtype(self):
        for start_dtype, exp_convert in (
            (np.float32, True),
            (np.float64, True),
            (np.int32, False),
            (np.int64, False),
            (np.int, False),
        ):
            data = np.arange(4, dtype=start_dtype)
            u1 = Unit("hours since 2000-01-01 00:00:00", calendar="360_day")
            u2 = Unit("hours since 2000-01-02 00:00:00", calendar="360_day")
            result = u1.convert(data, u2)

            if exp_convert:
                self.assertEqual(result.dtype, start_dtype)
            else:
                self.assertEqual(result.dtype, np.int64)
Пример #14
0
    def _apply_orographic_enhancement(self, precip_cube, oe_cube):
        """Combine the precipitation rate cube and the orographic enhancement
        cube.

        Args:
            precip_cube (iris.cube.Cube):
                Cube containing the input precipitation field.
            oe_cube (iris.cube.Cube):
                Cube containing the orographic enhancement field matching
                the validity time of the precipitation cube.

        Returns:
            cube (iris.cube.Cube):
                Cube containing the precipitation rate field modified by the
                orographic enhancement cube.

        """
        # Ensure the orographic enhancement cube matches the
        # dimensions of the precip_cube.
        oe_cube = check_cube_coordinates(precip_cube, oe_cube.copy())

        # Ensure that orographic enhancement is in the units of the
        # precipitation rate cube.
        oe_cube.convert_units(precip_cube.units)

        # Set orographic enhancement to be zero for points with a
        # precipitation rate of < 1/32 mm/hr.
        original_units = Unit("mm/hr")
        threshold_in_cube_units = (original_units.convert(
            self.min_precip_rate_mmh, precip_cube.units))

        # Ignore invalid warnings generated if e.g. a NaN is encountered
        # within the less than (<) comparison.
        with np.errstate(invalid='ignore'):
            oe_cube.data[precip_cube.data < threshold_in_cube_units] = 0.

        # Use CubeCombiner to combine the cubes.
        temp_cubelist = iris.cube.CubeList([precip_cube, oe_cube])
        cube = CubeCombiner(self.operation).process(temp_cubelist,
                                                    precip_cube.name())
        return cube
Пример #15
0
    def parse_date(datestr):
        '''
        Parse the time query param
        '''
        try:
            if datestr.startswith('now-'):
                p = re.compile(r'^now-(?P<val>\d+)\s*(?P<units>\w+)$')
                match = p.search(datestr)
                val = int(match.group('val'))
                units = match.group('units')
                # If not valid units, exception will throw
                unknown_unit = Unit(units)
                hrs = Unit('hours')
                # convert to hours
                num_hrs = unknown_unit.convert(val, hrs)
                dt_now = datetime.now(tz=timezone.utc)
                return dt_now - timedelta(hours=num_hrs)

            return dateparse(datestr)
        except Exception:
            return None
Пример #16
0
def convert_unit(value, unit, new_unit):
    """
    One-line unit conversion

    Parameters
    ----------
    value: ``float``
        The starting value for the conversion.

    unit: ``str``
        The starting unit for the conversion.

    new_unit: ``str``
        The desired unit for the conversion

    Returns
    -------
    new_value: ``float``
        The starting value, but converted to the new unit.
    """
    start_unit = Unit(unit)
    return start_unit.convert(value, new_unit)
Пример #17
0
from cf_units import Unit
import numpy as np
import matplotlib.pyplot as plt
from sympy import Symbol

c = Unit("deg_c")
k = Unit("deg_k")
c.convert(0, k)

# Now we test the actual use case
# for a plot.

second = Unit("second")
minute = Unit("minute")
meter = Unit("meter")
xs = [i * second for i in range(1, 10)]
ys = [meter / x for x in xs]


def plot_with_units(ax, xt, yt):
    xs, x_unit = xt
    ys, y_unit = yt
    xnums = [x.convert(1, x_unit) for x in xs]
    ynums = [y.convert(1, y_unit) for y in ys]
    print(xnums, ynums)
    ax.set_xlabel(str(x_unit))
    ax.set_ylabel(str(y_unit))
    ax.plot(xnums, ynums)


def auto_plot_with_units(ax, xt, yt):
Пример #18
0
class BasicThreshold(object):
    """Apply a threshold truth criterion to a cube.

    Calculate the threshold truth values based on a linear membership function
    around the threshold values provided. A cube will be returned with a new
    threshold dimension coordinate.

    Can operate on multiple time sequences within a cube.
    """
    def __init__(self,
                 thresholds,
                 fuzzy_factor=None,
                 fuzzy_bounds=None,
                 threshold_units=None,
                 below_thresh_ok=False):
        """
        Set up for processing an in-or-out of threshold field, including the
        generation of fuzzy_bounds which are required to threshold an input
        cube (through self.process(cube)).  If fuzzy_factor is not None, fuzzy
        bounds are calculated using the threshold value in the units in which
        it is provided.

        The usage of fuzzy_factor is exemplified as follows:

        For a 6 mm/hr threshold with a 0.75 fuzzy factor, a range of 25%
        around this threshold (between (6*0.75=) 4.5 and (6*(2-0.75)=) 7.5)
        would be generated. The probabilities of exceeding values within this
        range are scaled linearly, so that 4.5 mm/hr yields a thresholded value
        of 0 and 7.5 mm/hr yields a thresholded value of 1. Therefore, in this
        case, the thresholded exceedance probabilities between 4.5 mm/hr and
        7.5 mm/hr would follow the pattern:

        ::

            Data value | Probability
            ------------|-------------
                4.5     |   0
                5.0     |   0.167
                5.5     |   0.333
                6.0     |   0.5
                6.5     |   0.667
                7.0     |   0.833
                7.5     |   1.0

        Args:
            thresholds (list of floats or float):
                The threshold points for 'significant' datapoints.

        Keyword Args:
            fuzzy_factor (float):
                Specifies lower bound for fuzzy membership value when
                multiplied by each threshold. Upper bound is equivalent linear
                distance above threshold. If None, no fuzzy_factor is applied.
            fuzzy_bounds (list of tuples):
                Lower and upper bounds for fuzziness.
                List should be of same length as thresholds.
                Each entry in list should be a tuple of two floats
                representing the lower and upper bounds respectively.
                If None, no fuzzy_bounds are applied.
            threshold_units (string):
                Units of the threshold values. If not provided the units are
                assumed to be the same as those of the input cube.
            below_thresh_ok (boolean):
                True to count points as significant if *below* the threshold,
                False to count points as significant if *above* the threshold.

        Raises:
            ValueError: If a threshold of 0.0 is requested when using a fuzzy
                        factor.
            ValueError: If the fuzzy_factor is not greater than 0 and less
                        than 1.
            ValueError: If both fuzzy_factor and fuzzy_bounds are set
                        as this is ambiguous.
        """
        # ensure threshold is a list, even if only a single value is provided
        self.thresholds = thresholds
        if np.isscalar(thresholds):
            self.thresholds = [thresholds]

        # if necessary, set threshold units
        if threshold_units is None:
            self.threshold_units = None
        else:
            self.threshold_units = Unit(threshold_units)

        # initialise threshold coordinate name as None
        self.threshold_coord_name = None

        # read fuzzy factor or set (default) to 1 (no smoothing)
        fuzzy_factor_loc = 1.
        if fuzzy_factor is not None:
            if fuzzy_bounds is not None:
                raise ValueError(
                    "Invalid combination of keywords. Cannot specify "
                    "fuzzy_factor and fuzzy_bounds together")
            if not 0 < fuzzy_factor < 1:
                raise ValueError(
                    "Invalid fuzzy_factor: must be >0 and <1: {}".format(
                        fuzzy_factor))
            if 0 in self.thresholds:
                raise ValueError(
                    "Invalid threshold with fuzzy factor: cannot use a "
                    "multiplicative fuzzy factor with threshold == 0")
            fuzzy_factor_loc = fuzzy_factor

        # Set fuzzy-bounds.  If neither fuzzy_factor nor fuzzy_bounds is set,
        # both lower_thr and upper_thr default to the threshold value.  A test
        # of this equality is used later to determine whether to process with
        # a sharp threshold or fuzzy bounds.
        if fuzzy_bounds is None:
            self.fuzzy_bounds = []
            for thr in self.thresholds:
                lower_thr = thr * fuzzy_factor_loc
                upper_thr = thr * (2. - fuzzy_factor_loc)
                if thr < 0:
                    lower_thr, upper_thr = upper_thr, lower_thr
                self.fuzzy_bounds.append((lower_thr, upper_thr))
        else:
            self.fuzzy_bounds = fuzzy_bounds

        # ensure fuzzy_bounds is a list of tuples
        if isinstance(fuzzy_bounds, tuple):
            self.fuzzy_bounds = [fuzzy_bounds]

        # check that thresholds and fuzzy_bounds are self-consistent
        for thr, bounds in zip(self.thresholds, self.fuzzy_bounds):
            assert len(bounds) == 2, ("Invalid bounds for one threshold: {}. "
                                      "Expected 2 floats.".format(bounds))
            bounds_msg = ("Threshold must be within bounds: "
                          "!( {} <= {} <= {} )".format(bounds[0], thr,
                                                       bounds[1]))
            assert bounds[0] <= thr, bounds_msg
            assert bounds[1] >= thr, bounds_msg

        self.below_thresh_ok = below_thresh_ok

    def __repr__(self):
        """Represent the configured plugin instance as a string."""
        return ('<BasicThreshold: thresholds {}, ' + 'fuzzy_bounds {}, ' +
                'below_thresh_ok: {}>').format(self.thresholds,
                                               self.fuzzy_bounds,
                                               self.below_thresh_ok)

    def _add_threshold_coord(self, cube, threshold):
        """
        Add a scalar threshold-type coordinate to a cube containing
        thresholded data and promote the new coordinate to be the
        leading dimension of the cube.

        Args:
            cube (iris.cube.Cube):
                Cube containing thresholded data (1s and 0s)
            threshold (np.float32):
                Value at which the data has been thresholded

        Returns:
            iris.cube.Cube:
                With new "threshold" axis
        """
        try:
            coord = iris.coords.DimCoord(
                np.array([threshold], dtype=np.float32),
                standard_name=self.threshold_coord_name,
                var_name="threshold",
                units=cube.units)
        except ValueError as cause:
            if 'is not a valid standard_name' in str(cause):
                coord = iris.coords.DimCoord(
                    np.array([threshold], dtype=np.float32),
                    long_name=self.threshold_coord_name,
                    var_name="threshold",
                    units=cube.units)
            else:
                raise ValueError(cause)

        cube.add_aux_coord(coord)
        return iris.util.new_axis(cube, coord)

    def process(self, input_cube):
        """Convert each point to a truth value based on provided threshold
        values. The truth value may or may not be fuzzy depending upon if
        fuzzy_bounds are supplied.  If the plugin has a "threshold_units"
        member, this is used to convert both thresholds and fuzzy bounds into
        the units of the input cube.

        Args:
            input_cube (iris.cube.Cube):
                Cube to threshold. The code is dimension-agnostic.

        Returns:
            cube (iris.cube.Cube):
                Cube after a threshold has been applied. The data within this
                cube will contain values between 0 and 1 to indicate whether
                a given threshold has been exceeded or not.

                The cube meta-data will contain:
                * Input_cube name prepended with
                probability_of_X_above(or below)_threshold (where X is
                the diagnostic under consideration)
                * Threshold dimension coordinate with same units as input_cube
                * Threshold attribute (above or below threshold)
                * Cube units set to (1).

        Raises:
            ValueError: if a np.nan value is detected within the input cube.

        """
        # Record input cube data type to ensure consistent output, though
        # integer data must become float to enable fuzzy thresholding.
        input_cube_dtype = input_cube.dtype
        if input_cube.dtype.kind == 'i':
            input_cube_dtype = np.float32

        thresholded_cubes = iris.cube.CubeList()
        if np.isnan(input_cube.data).any():
            raise ValueError("Error: NaN detected in input cube data")

        # if necessary, convert thresholds and fuzzy bounds into cube units
        if self.threshold_units is not None:
            self.thresholds = [
                self.threshold_units.convert(threshold, input_cube.units)
                for threshold in self.thresholds
            ]
            self.fuzzy_bounds = [
                tuple([
                    self.threshold_units.convert(threshold, input_cube.units)
                    for threshold in bounds
                ]) for bounds in self.fuzzy_bounds
            ]

        # set name of threshold coordinate to match input diagnostic
        self.threshold_coord_name = input_cube.name()

        # apply fuzzy thresholding
        for threshold, bounds in zip(self.thresholds, self.fuzzy_bounds):
            cube = input_cube.copy()
            # if upper and lower bounds are equal, set a deterministic 0/1
            # probability based on exceedance of the threshold
            if bounds[0] == bounds[1]:
                truth_value = cube.data > threshold
            # otherwise, scale exceedance probabilities linearly between 0/1
            # at the min/max fuzzy bounds and 0.5 at the threshold value
            else:
                truth_value = np.where(
                    cube.data < threshold,
                    rescale(cube.data,
                            data_range=(bounds[0], threshold),
                            scale_range=(0., 0.5),
                            clip=True),
                    rescale(cube.data,
                            data_range=(threshold, bounds[1]),
                            scale_range=(0.5, 1.),
                            clip=True),
                )
            truth_value = truth_value.astype(input_cube_dtype)
            # if requirement is for probabilities below threshold (rather than
            # above), invert the exceedance probability
            if self.below_thresh_ok:
                truth_value = 1. - truth_value

            cube.data = truth_value
            # Overwrite masked values that have been thresholded
            # with the un-thresholded values from the input cube.
            if np.ma.is_masked(cube.data):
                cube.data[input_cube.data.mask] = (
                    input_cube.data[input_cube.data.mask])
            cube = self._add_threshold_coord(cube, threshold)
            thresholded_cubes.append(cube)

        cube, = thresholded_cubes.concatenate()
        # TODO: Correct when formal cf-standards exists
        # Force the metadata to temporary conventions
        if self.below_thresh_ok:
            cube.attributes.update({'relative_to_threshold': 'below'})
            cube.rename("probability_of_{}_below_threshold".format(
                cube.name()))
        else:
            cube.attributes.update({'relative_to_threshold': 'above'})
            cube.rename("probability_of_{}_above_threshold".format(
                cube.name()))
        cube.units = Unit(1)

        cube = enforce_coordinate_ordering(cube,
                                           ["realization", "percentile_over"])

        return cube
Пример #19
0
class BasicThreshold(BasePlugin):

    """Apply a threshold truth criterion to a cube.

    Calculate the threshold truth values based on a linear membership function
    around the threshold values provided. A cube will be returned with a new
    threshold dimension coordinate.

    Can operate on multiple time sequences within a cube.
    """

    def __init__(self, thresholds, fuzzy_factor=None,
                 fuzzy_bounds=None, threshold_units=None,
                 comparison_operator='>'):
        """
        Set up for processing an in-or-out of threshold field, including the
        generation of fuzzy_bounds which are required to threshold an input
        cube (through self.process(cube)).  If fuzzy_factor is not None, fuzzy
        bounds are calculated using the threshold value in the units in which
        it is provided.

        The usage of fuzzy_factor is exemplified as follows:

        For a 6 mm/hr threshold with a 0.75 fuzzy factor, a range of 25%
        around this threshold (between (6*0.75=) 4.5 and (6*(2-0.75)=) 7.5)
        would be generated. The probabilities of exceeding values within this
        range are scaled linearly, so that 4.5 mm/hr yields a thresholded value
        of 0 and 7.5 mm/hr yields a thresholded value of 1. Therefore, in this
        case, the thresholded exceedance probabilities between 4.5 mm/hr and
        7.5 mm/hr would follow the pattern:

        ::

            Data value | Probability
            ------------|-------------
                4.5     |   0
                5.0     |   0.167
                5.5     |   0.333
                6.0     |   0.5
                6.5     |   0.667
                7.0     |   0.833
                7.5     |   1.0

        Args:
            thresholds (list of float or float):
                The threshold points for 'significant' datapoints.
            fuzzy_factor (float):
                Specifies lower bound for fuzzy membership value when
                multiplied by each threshold. Upper bound is equivalent linear
                distance above threshold. If None, no fuzzy_factor is applied.
            fuzzy_bounds (list of tuple):
                Lower and upper bounds for fuzziness.
                List should be of same length as thresholds.
                Each entry in list should be a tuple of two floats
                representing the lower and upper bounds respectively.
                If None, no fuzzy_bounds are applied.
            threshold_units (str):
                Units of the threshold values. If not provided the units are
                assumed to be the same as those of the input cube.
            comparison_operator (str):
                Indicates the comparison_operator to use with the threshold.
                e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to
                evaluate data < threshold. When using fuzzy thresholds, there
                is no difference between < and <= or > and >=.
                Valid choices: > >= < <= gt ge lt le.

        Raises:
            ValueError: If a threshold of 0.0 is requested when using a fuzzy
                        factor.
            ValueError: If the fuzzy_factor is not greater than 0 and less
                        than 1.
            ValueError: If both fuzzy_factor and fuzzy_bounds are set
                        as this is ambiguous.
        """
        # ensure threshold is a list, even if only a single value is provided
        self.thresholds = thresholds
        if np.isscalar(thresholds):
            self.thresholds = [thresholds]

        # if necessary, set threshold units
        if threshold_units is None:
            self.threshold_units = None
        else:
            self.threshold_units = Unit(threshold_units)

        # initialise threshold coordinate name as None
        self.threshold_coord_name = None

        # read fuzzy factor or set (default) to 1 (no smoothing)
        fuzzy_factor_loc = 1.
        if fuzzy_factor is not None:
            if fuzzy_bounds is not None:
                raise ValueError(
                    "Invalid combination of keywords. Cannot specify "
                    "fuzzy_factor and fuzzy_bounds together")
            if not 0 < fuzzy_factor < 1:
                raise ValueError(
                    "Invalid fuzzy_factor: must be >0 and <1: {}".format(
                        fuzzy_factor))
            if 0 in self.thresholds:
                raise ValueError(
                    "Invalid threshold with fuzzy factor: cannot use a "
                    "multiplicative fuzzy factor with threshold == 0")
            fuzzy_factor_loc = fuzzy_factor

        # Set fuzzy-bounds.  If neither fuzzy_factor nor fuzzy_bounds is set,
        # both lower_thr and upper_thr default to the threshold value.  A test
        # of this equality is used later to determine whether to process with
        # a sharp threshold or fuzzy bounds.
        if fuzzy_bounds is None:
            self.fuzzy_bounds = []
            for thr in self.thresholds:
                lower_thr = thr * fuzzy_factor_loc
                upper_thr = thr * (2. - fuzzy_factor_loc)
                if thr < 0:
                    lower_thr, upper_thr = upper_thr, lower_thr
                self.fuzzy_bounds.append((lower_thr, upper_thr))
        else:
            self.fuzzy_bounds = fuzzy_bounds

        # ensure fuzzy_bounds is a list of tuples
        if isinstance(fuzzy_bounds, tuple):
            self.fuzzy_bounds = [fuzzy_bounds]

        # check that thresholds and fuzzy_bounds are self-consistent
        for thr, bounds in zip(self.thresholds, self.fuzzy_bounds):
            if len(bounds) != 2:
                raise ValueError("Invalid bounds for one threshold: {}."
                                 " Expected 2 floats.".format(bounds))
            if bounds[0] > thr or bounds[1] < thr:
                bounds_msg = ("Threshold must be within bounds: "
                              "!( {} <= {} <= {} )".format(bounds[0],
                                                           thr, bounds[1]))
                raise ValueError(bounds_msg)

        # Dict of known logical comparisons. Each key contains a dict of
        # {'function': The operator function for this comparison_operator,
        #  'spp_string': Comparison_Operator string for use in CF-convention
        #                meta-data}
        self.comparison_operator_dict = {}
        self.comparison_operator_dict.update(dict.fromkeys(
            ['ge', 'GE', '>='], {'function': operator.ge,
                                 'spp_string': 'above'}))
        self.comparison_operator_dict.update(dict.fromkeys(
            ['gt', 'GT', '>'], {'function': operator.gt,
                                'spp_string': 'above'}))
        self.comparison_operator_dict.update(dict.fromkeys(
            ['le', 'LE', '<='], {'function': operator.le,
                                 'spp_string': 'below'}))
        self.comparison_operator_dict.update(dict.fromkeys(
            ['lt', 'LT', '<'], {'function': operator.lt,
                                'spp_string': 'below'}))
        self.comparison_operator_string = comparison_operator
        self._decode_comparison_operator_string()

    def __repr__(self):
        """Represent the configured plugin instance as a string."""
        return (
            '<BasicThreshold: thresholds {}, ' +
            'fuzzy_bounds {}, ' +
            'method: data {} threshold>'
        ).format(self.thresholds, self.fuzzy_bounds,
                 self.comparison_operator_string)

    def _add_threshold_coord(self, cube, threshold):
        """
        Add a scalar threshold-type coordinate to a cube containing
        thresholded data and promote the new coordinate to be the
        leading dimension of the cube.

        Args:
            cube (iris.cube.Cube):
                Cube containing thresholded data (1s and 0s)
            threshold (float):
                Value at which the data has been thresholded

        Returns:
            iris.cube.Cube:
                With new "threshold" axis
        """
        coord = iris.coords.DimCoord(np.array([threshold], dtype=np.float32),
                                     units=cube.units)
        coord.rename(self.threshold_coord_name)
        coord.var_name = "threshold"

        # Use an spp__relative_to_threshold attribute, as an extension to the
        # CF-conventions.
        coord.attributes.update({'spp__relative_to_threshold':
                                 self.comparison_operator['spp_string']})

        cube.add_aux_coord(coord)
        return iris.util.new_axis(cube, coord)

    def _decode_comparison_operator_string(self):
        """Sets self.comparison_operator based on
        self.comparison_operator_string. This is a dict containing the keys
        'function' and 'spp_string'.
        Raises errors if invalid options are found.

        Raises:
            ValueError: If self.comparison_operator_string does not match a
                        defined method.
        """
        try:
            self.comparison_operator = self.comparison_operator_dict[
                self.comparison_operator_string]
        except KeyError:
            msg = (f'String "{self.comparison_operator_string}" '
                   'does not match any known comparison_operator method')
            raise ValueError(msg)

    def process(self, input_cube):
        """Convert each point to a truth value based on provided threshold
        values. The truth value may or may not be fuzzy depending upon if
        fuzzy_bounds are supplied.  If the plugin has a "threshold_units"
        member, this is used to convert both thresholds and fuzzy bounds into
        the units of the input cube.

        Args:
            input_cube (iris.cube.Cube):
                Cube to threshold. The code is dimension-agnostic.

        Returns:
            iris.cube.Cube:
                Cube after a threshold has been applied. The data within this
                cube will contain values between 0 and 1 to indicate whether
                a given threshold has been exceeded or not.

                The cube meta-data will contain:
                * Input_cube name prepended with
                probability_of_X_above(or below)_threshold (where X is
                the diagnostic under consideration)
                * Threshold dimension coordinate with same units as input_cube
                * Threshold attribute (above or below threshold)
                * Cube units set to (1).

        Raises:
            ValueError: if a np.nan value is detected within the input cube.

        """
        # Record input cube data type to ensure consistent output, though
        # integer data must become float to enable fuzzy thresholding.
        input_cube_dtype = input_cube.dtype
        if input_cube.dtype.kind == 'i':
            input_cube_dtype = np.float32

        thresholded_cubes = iris.cube.CubeList()
        if np.isnan(input_cube.data).any():
            raise ValueError("Error: NaN detected in input cube data")

        # if necessary, convert thresholds and fuzzy bounds into cube units
        if self.threshold_units is not None:
            self.thresholds = [self.threshold_units.convert(threshold,
                                                            input_cube.units)
                               for threshold in self.thresholds]
            self.fuzzy_bounds = [tuple([
                self.threshold_units.convert(threshold, input_cube.units)
                for threshold in bounds]) for bounds in self.fuzzy_bounds]

        # set name of threshold coordinate to match input diagnostic
        self.threshold_coord_name = input_cube.name()

        # apply fuzzy thresholding
        for threshold, bounds in zip(self.thresholds, self.fuzzy_bounds):
            cube = input_cube.copy()
            # if upper and lower bounds are equal, set a deterministic 0/1
            # probability based on exceedance of the threshold
            if bounds[0] == bounds[1]:
                truth_value = self.comparison_operator['function'](
                    cube.data, threshold)
            # otherwise, scale exceedance probabilities linearly between 0/1
            # at the min/max fuzzy bounds and 0.5 at the threshold value
            else:
                truth_value = np.where(
                    cube.data < threshold,
                    rescale(cube.data,
                            data_range=(bounds[0], threshold),
                            scale_range=(0., 0.5),
                            clip=True),
                    rescale(cube.data,
                            data_range=(threshold, bounds[1]),
                            scale_range=(0.5, 1.),
                            clip=True),
                )
                # if requirement is for probabilities below threshold (rather
                # than above), invert the exceedance probability
                if 'below' in self.comparison_operator['spp_string']:
                    truth_value = 1. - truth_value
            truth_value = truth_value.astype(input_cube_dtype)

            cube.data = truth_value
            # Overwrite masked values that have been thresholded
            # with the un-thresholded values from the input cube.
            if np.ma.is_masked(cube.data):
                cube.data[input_cube.data.mask] = (
                    input_cube.data[input_cube.data.mask])
            cube = self._add_threshold_coord(cube, threshold)
            thresholded_cubes.append(cube)

        cube, = thresholded_cubes.concatenate()

        cube.rename(
            "probability_of_{}_{}_threshold".format(
                cube.name(),
                self.comparison_operator['spp_string']))
        cube.units = Unit(1)

        cube = enforce_coordinate_ordering(
            cube, ["realization", "percentile"])

        return cube
Пример #20
0
    def create_data_object(self, filenames, variable):
        logging.debug("Creating data object for variable " + variable)

        variables = [("ER2_IMU/Longitude", "x"), ("ER2_IMU/Latitude", "y"),
                     ("ER2_IMU/gps_time", "t"), ("State/Pressure", "p"),
                     ("DataProducts/Altitude", "z"), ("header/date", ""),
                     (variable, '')]

        logging.info("Listing coordinates: " + str(variables))

        var_data = read_many_files_individually(filenames,
                                                [v[0] for v in variables])

        date_times = []
        for times, date in zip(var_data['ER2_IMU/gps_time'],
                               var_data['header/date']):
            # Date is stored as an array (of length 92??) of floats with format: yyyymmdd
            date_str = str(int(date[0]))
            t_unit = Unit('hours since {}-{}-{} 00:00:00'.format(
                date_str[0:4], date_str[4:6], date_str[6:8]))
            date_times.append(
                t_unit.convert(get_data(times), cis_standard_time_unit))

        # time_data = utils.concatenate([get_data(i) for i in var_data['ER2_IMU/gps_time']])
        # date_str = str(int(var_data['header/date'][0][0]))
        # Flatten the data by taking the 0th column of the transpose
        time_coord = DimCoord(utils.concatenate(date_times).T[0],
                              standard_name='time',
                              units=cis_standard_time_unit)

        # TODO This won't work for multiple files since the altitude bins are different for each flight...
        alt_data = utils.concatenate(
            [get_data(i) for i in var_data["DataProducts/Altitude"]])
        alt_coord = DimCoord(alt_data[0], standard_name='altitude', units='m')

        pres_data = utils.concatenate(
            [get_data(i) for i in var_data["State/Pressure"]])
        pres_coord = AuxCoord(pres_data,
                              standard_name='air_pressure',
                              units='atm')
        # Fix the air-pressure units
        pres_coord.convert_units('hPa')

        lat_data = utils.concatenate(
            [get_data(i) for i in var_data['ER2_IMU/Latitude']])
        lat_coord = AuxCoord(lat_data.T[0], standard_name='latitude')

        lon_data = utils.concatenate(
            [get_data(i) for i in var_data['ER2_IMU/Longitude']])
        lon_coord = AuxCoord(lon_data.T[0], standard_name='longitude')

        data = utils.concatenate([get_data(i) for i in var_data[variable]])
        metadata = get_metadata(var_data[variable][0])

        cube = Cube(np.ma.masked_invalid(data),
                    long_name=metadata.misc['Description'],
                    units=self.clean_units(metadata.units),
                    dim_coords_and_dims=[(alt_coord, 1), (time_coord, 0)],
                    aux_coords_and_dims=[(lat_coord, (0, )),
                                         (lon_coord, (0, )),
                                         (pres_coord, (0, 1))])
        gd = GriddedData.make_from_cube(cube)
        return gd
Пример #21
0
 def test_non_gregorian_calendar_conversion_dtype(self):
     data = np.arange(4, dtype=np.float32)
     u1 = Unit('hours since 2000-01-01 00:00:00', calendar='360_day')
     u2 = Unit('hours since 2000-01-02 00:00:00', calendar='360_day')
     result = u1.convert(data, u2)
     self.assertEqual(result.dtype, np.float32)
Пример #22
0
 def test_non_gregorian_calendar_conversion_dtype(self):
     data = np.arange(4, dtype=np.float32)
     u1 = Unit('hours since 2000-01-01 00:00:00', calendar='360_day')
     u2 = Unit('hours since 2000-01-02 00:00:00', calendar='360_day')
     result = u1.convert(data, u2)
     self.assertEqual(result.dtype, np.float32)
Пример #23
0
def extract_vars(nc_file,
                 var_name,
                 time_idx=None,
                 target_units=None,
                 time_variable='time',
                 vertical_variable='height'):
    """
    Return a :class:`xarray.DataArray` object for the desired variable in a single NetCDF file object.

    Adapted from wrf.util

    :param ncfile: (:class:`netCDF4.Dataset`, :class:`Nio.NioFile`): An open netCDF file
    :param var_name: (:obj:`str`) The variable name.
    :param time_idx: (:obj:`int` or :data:`wrf.ALL_TIMES`, optional): The desired time index. This value can be a positive integer, negative integer,
            or None to return all times in the file or sequence. The default is None (return all idxs).
    :param  target_units: (:obj:`str`) If not None, attempt to convert units to this format using cf_units.
    :returns: :class:`xarray.DataArray`:  An array object that contains metadata.

    """
    multitime = is_multi_time(time_idx)
    time_idx_or_slice = time_idx if not multitime else slice(None)
    try:
        var = nc_file.variables[var_name]
    except KeyError:
        raise ValueError('No variable named {} available in {}'.format(
            var_name, nc_file.filepath()))  # TODO: refactor to ValidationError
    if len(var.shape) > 1:
        data = var[time_idx_or_slice, :]
    else:
        data = var[time_idx_or_slice]

    if target_units is not None and hasattr(var, 'units'):
        try:
            u = Unit(var.units)
            data = u.convert(data, target_units)
        except ValueError:
            logger.warning('Could not parse units "{}" for variable {}'.format(
                var.units, var_name))

    # Want to preserve the time dimension
    if not multitime:
        if len(var.shape) > 1:
            data = data[np.newaxis, :]
        else:
            data = data[np.newaxis]

    attrs = OrderedDict()
    for dkey, val in var.__dict__.items():
        # scipy.io adds these but don't want them
        if dkey in ("data", "_shape", "_size", "_typecode", "_attributes",
                    "maskandscale", "dimensions"):
            continue

        _dkey = dkey if isinstance(dkey, str) else dkey.decode()
        attrs[_dkey] = val

    dimnames = var.dimensions[-data.ndim:]

    coords = OrderedDict()

    if dimnames[
            0] == time_variable:  # TODO needs to work around this step for ozone climatology
        t = extract_times(nc_file, time_idx, time_variable)
        if not multitime:
            t = [t]
        coords[dimnames[0]] = t

    if len(dimnames) == 2 and dimnames[1] == vertical_variable:
        t = extract_vars(nc_file,
                         vertical_variable,
                         slice(None),
                         target_units='m')
        coords['height'] = t

    data_array = DataArray(data,
                           name=nc_file,
                           dims=dimnames,
                           coords=coords,
                           attrs=attrs)

    return data_array