def convert(self, unit, density=998.2, molar_mass=12.011): """Using cf_units (UDUNITS2) convert the unit in place - handles ( M L-2 T-1 ) --> ( L T-1 ), assuming water - handles ( mol ) --> ( M ), assuming carbon """ if 'units' not in self.ds[self.varname].attrs: msg = "Cannot convert the units of the DataArray lacking the 'units' attribute" raise ValueError(msg) src_unit = Unit(self.ds[self.varname].units) tar_unit = Unit(unit) mass_density = Unit("kg m-3") molar_density = Unit("g mol-1") if ((src_unit / tar_unit) / mass_density).is_dimensionless(): self.ds[self.varname] /= density src_unit /= mass_density elif ((tar_unit / src_unit) / mass_density).is_dimensionless(): self.ds[self.varname] *= density src_unit *= mass_density if ((src_unit / tar_unit) / molar_density).is_dimensionless(): self.ds[self.varname] /= molar_mass src_unit /= molar_density elif ((tar_unit / src_unit) / molar_density).is_dimensionless(): self.ds[self.varname] *= molar_mass src_unit *= molar_density src_unit.convert(self.ds[self.varname].data, tar_unit, inplace=True) self.ds[self.varname].attrs['units'] = unit if "bounds" in self.ds[self.varname].attrs: if self.ds[self.varname].attrs['bounds'] in self.ds: src_unit.convert( self.ds[self.ds[self.varname].attrs['bounds']].data, tar_unit, inplace=True) return self
def unit_conversion_fac(from_unit, to_unit): """Returns multiplicative unit conversion factor for input units Note ---- Input must be either instances of :class:`cf_units.Unit` class or string. Parameters ---------- from_unit : :obj:`cf_units.Unit`, or :obj:`str` unit to be converted to_unit : :obj:`cf_units.Unit`, or :obj:`str` final unit Returns -------- float multiplicative conversion factor Raises ------ ValueError if units cannot be converted into each other using cf_units package """ if isinstance(from_unit, str): from_unit = Unit(from_unit) return from_unit.convert(1, to_unit)
def convert(self, tar_unit): unit0 = Unit(self.unit) unit1 = Unit(tar_unit) data = self.data self.data = unit0.convert(data, unit1) self.unit = tar_unit return self
def unit_converter(data, inunit, outunit): """ Unit converter. Takes an (numpy) array, valid udunits inunits and outunits as strings, and returns the array in outunits. Parameters ---------- data : array_like inunit : string unit to convert from, must be UDUNITS-compatible string outunit : string unit to conver to, must be UDUNITS-compatible string Returns ------- out : array_like Example ------- >>> import numpy as np >>> c = Converter("kg","Gt") >>> out = c(np.array([1,2])*1e12) >>> out = array([ 1., 2.]) """ inunit = str(inunit) outunit = str(outunit) if isinstance(data, np.ma.MaskedArray): mask = data.mask else: mask = None data = np.array(data) if not (inunit == outunit): try: try: from cf_units import Unit in_unit = Unit(inunit) out_unit = Unit(outunit) outdata = in_unit.convert(data, out_unit) except: from udunits2 import Converter, System, Unit sys = System() c = Converter((Unit(sys, inunit), Unit(sys, outunit))) outdata = c(data) except: print("Neither cf_units or udunits2 module found, you're on your own.") c = 1.0 / 1e3 outdata = c * data else: outdata = data if mask is not None: return np.ma.array(outdata, mask=mask) else: return outdata
def unit_converter(data, inunit, outunit): ''' Unit converter. Takes an (numpy) array, valid udunits inunits and outunits as strings, and returns the array in outunits. Parameters ---------- data : array_like inunit : string unit to convert from, must be UDUNITS-compatible string outunit : string unit to conver to, must be UDUNITS-compatible string Returns ------- out : array_like Example ------- >>> import numpy as np >>> c = Converter("kg","Gt") >>> out = c(np.array([1,2])*1e12) >>> out = array([ 1., 2.]) ''' inunit = str(inunit) outunit = str(outunit) if isinstance(data, np.ma.MaskedArray): mask = data.mask else: mask = None data = np.array(data) if not (inunit == outunit): try: try: from cf_units import Unit in_unit = Unit(inunit) out_unit = Unit(outunit) outdata = in_unit.convert(data, out_unit) except: from udunits2 import Converter, System, Unit sys = System() c = Converter((Unit(sys, inunit), Unit(sys, outunit))) outdata = c(data) except: print( "Neither cf_units or udunits2 module found, you're on your own.") c = 1. / 1e3 outdata = c * data else: outdata = data if mask is not None: return np.ma.array(outdata, mask=mask) else: return outdata
def _apply_minimum_precip_rate(self, precip_cube, cube): """Ensure that negative precipitation rates are capped at the defined minimum precipitation rate. Args: precip_cube (iris.cube.Cube): Cube containing a precipitation rate input field. cube (iris.cube.Cube): Cube containing the precipitation rate field after combining with orographic enhancement. Returns: iris.cube.Cube: Cube containing the precipitation rate field where any negative precipitation rates have been capped at the defined minimum precipitation rate. """ if self.operation == "subtract": original_units = Unit("mm/hr") threshold_in_cube_units = original_units.convert( self.min_precip_rate_mmh, cube.units ) threshold_in_precip_cube_units = original_units.convert( self.min_precip_rate_mmh, precip_cube.units ) # Ignore invalid warnings generated if e.g. a NaN is encountered # within the less than (<) comparison. with np.errstate(invalid="ignore"): # Create a mask computed from where the input precipitation # cube is greater or equal to the threshold and the result # of combining the precipitation rate input cube with the # orographic enhancement has generated a cube with # precipitation rates less than the threshold. mask = (precip_cube.data >= threshold_in_precip_cube_units) & ( cube.data <= threshold_in_cube_units ) # Set any values lower than the threshold to be equal to # the minimum precipitation rate. cube.data[mask] = threshold_in_cube_units return cube
def get_vertical_extent(self, ds, data_dict): global_atts = ds.metadata_mapping["NC_GLOBAL"] try: if global_atts["geospatial_vertical_positive"] == "down": sign = 1 elif global_atts["geospatial_vertical_positive"] == "up": sign = -1 units = Unit(global_atts["geospatial_vertical_units"]) # convert to meters #unit_conv = Unit(ds.geospatial_vertical_units) / Unit("m") m = Unit("meters") orig_units = Unit(global_atts["geospatial_vertical_units"]) * sign extra_keys = {kvp["key"] for kvp in data_dict["extras"]} if "vertical_min" not in extra_keys: converted_min = orig_units.convert( float(global_atts["geospatial_vertical_min"]), m) if not isnan(converted_min): data_dict["extras"].append({ "key": "vertical_min", "value": str(converted_min) }) else: log.warning("vertical_min was NaN, skipping") if "vertical_max" not in extra_keys: converted_max = orig_units.convert( float(global_atts["geospatial_vertical_max"]), m) if not isnan(converted_max): data_dict["extras"].append({ "key": "vertical_max", "value": str(converted_max) }) else: log.warning("vertical_max was NaN, skipping") log.info("PASS") except (AttributeError, ValueError, KeyError) as e: log.exception( "Encountered attribute error when attempting to get vertical bounds of OPeNDAP dataset" )
def _getGridInformation(self): """Looks in the model output for cell areas as well as land fractions. """ def _shiftLon(lon): return (lon <= 180) * lon + (lon > 180) * (lon - 360) + (lon < -180) * 360 # Are there cell areas associated with this model? area_name = None area_name = "area" if "area" in self.variables.keys() else area_name area_name = "areacella" if "areacella" in self.variables.keys( ) else area_name if area_name is not None: with Dataset(self.variables[area_name][0]) as f: A = f.variables[area_name] unit = Unit(A.units) if "units" in A.ncattrs() else Unit("m2") self.cell_areas = unit.convert(A[...], "m2", inplace=True) else: if not ("lat_bnds" in self.variables.keys() and "lon_bnds" in self.variables.keys()): return with Dataset(self.variables["lat_bnds"][0]) as f: x = f.variables["lat_bnds"][...] with Dataset(self.variables["lon_bnds"][0]) as f: y = f.variables["lon_bnds"][...] s = y.mean(axis=1).argmin() y = np.roll(_shiftLon(y), -s, axis=0) if y[0, 0] > y[0, 1]: y[0, 0] = -180. if y[-1, 0] > y[-1, 1]: y[-1, 1] = +180. self.cell_areas = il.CellAreas(None, None, lat_bnds=x, lon_bnds=y) # Now we do the same for land fractions frac_name = None frac_name = "landfrac" if "landfrac" in self.variables.keys( ) else frac_name frac_name = "sftlf" if "sftlf" in self.variables.keys() else frac_name if frac_name is None: self.land_areas = self.cell_areas else: with Dataset(self.variables[frac_name][0]) as f: self.land_fraction = f.variables[frac_name][...] # some models represent the fraction as a percent if np.ma.max(self.land_fraction) > 10: self.land_fraction *= 0.01 with np.errstate(over='ignore', under='ignore'): if not np.allclose(self.cell_areas.shape, self.land_fraction.shape): msg = "The model %s has areacella %s which is a different shape than sftlf %s" % ( self.name, str(self.cell_areas.shape), str(self.land_fraction.shape)) raise ValueError(msg) self.land_areas = self.cell_areas * self.land_fraction self.land_area = np.ma.sum(self.land_areas) return
def test_gregorian_calendar_conversion_setup(self): # Reproduces a situation where a unit's gregorian calendar would not # match (using the `is` operator) to the literal string 'gregorian', # causing an `is not` test to return a false negative. cal_str = cf_units.CALENDAR_GREGORIAN calendar = self.MyStr(cal_str) self.assertIsNot(calendar, cal_str) u1 = Unit('hours since 1970-01-01 00:00:00', calendar=calendar) u2 = Unit('hours since 1969-11-30 00:00:00', calendar=calendar) u1point = np.array([8.], dtype=np.float32) expected = np.array([776.], dtype=np.float32) result = u1.convert(u1point, u2) return expected, result
def test_gregorian_calendar_conversion_setup(self): # Reproduces a situation where a unit's gregorian calendar would not # match (using the `is` operator) to the literal string 'gregorian', # causing an `is not` test to return a false negative. cal_str = cf_units.CALENDAR_GREGORIAN calendar = self.MyStr(cal_str) self.assertIsNot(calendar, cal_str) u1 = Unit('hours since 1970-01-01 00:00:00', calendar=calendar) u2 = Unit('hours since 1969-11-30 00:00:00', calendar=calendar) u1point = np.array([8.], dtype=np.float32) expected = np.array([776.], dtype=np.float32) result = u1.convert(u1point, u2) return expected, result
def setUp(self): """Set up cubes for testing. This includes a 'subtracted_cube' containing some negative precipitation values that should be set to a minimum precipitation rate threshold.""" self.precip_cube = set_up_precipitation_rate_cubelist()[0] oe_cube = set_up_orographic_enhancement_cube()[0] # Cap orographic enhancement to be zero where there is a precipitation # rate of zero. original_units = Unit("mm/hr") threshold_in_cube_units = (original_units.convert( MIN_PRECIP_RATE_MMH, self.precip_cube.units)) oe_cube.data[self.precip_cube.data < threshold_in_cube_units] = 0. self.oe_cube = oe_cube self.added_cube = self.precip_cube + oe_cube self.subtracted_cube = self.precip_cube - oe_cube
def _apply_orographic_enhancement(self, precip_cube, oe_cube): """Combine the precipitation rate cube and the orographic enhancement cube. Args: precip_cube (iris.cube.Cube): Cube containing the input precipitation field. oe_cube (iris.cube.Cube): Cube containing the orographic enhancement field matching the validity time of the precipitation cube. Returns: iris.cube.Cube: Cube containing the precipitation rate field modified by the orographic enhancement cube. """ # Convert orographic enhancement into the units of the precipitation # rate cube. oe_cube.convert_units(precip_cube.units) # Set orographic enhancement to be zero for points with a # precipitation rate of < 1/32 mm/hr. original_units = Unit("mm/hr") threshold_in_cube_units = original_units.convert( self.min_precip_rate_mmh, precip_cube.units ) # Ignore invalid warnings generated if e.g. a NaN is encountered # within the less than (<) comparison. with np.errstate(invalid="ignore"): oe_cube.data[precip_cube.data < threshold_in_cube_units] = 0.0 # Add / subtract orographic enhancement where data is not masked cube = precip_cube.copy() if self.operation == "add": cube.data = cube.data + oe_cube.data elif self.operation == "subtract": cube.data = cube.data - oe_cube.data else: msg = ( "Operation '{}' not supported for combining " "precipitation rate and " "orographic enhancement.".format(self.operation) ) raise ValueError(msg) return cube
def test_non_gregorian_calendar_conversion_dtype(self): for start_dtype, exp_convert in ( (np.float32, True), (np.float64, True), (np.int32, False), (np.int64, False), (np.int, False), ): data = np.arange(4, dtype=start_dtype) u1 = Unit("hours since 2000-01-01 00:00:00", calendar="360_day") u2 = Unit("hours since 2000-01-02 00:00:00", calendar="360_day") result = u1.convert(data, u2) if exp_convert: self.assertEqual(result.dtype, start_dtype) else: self.assertEqual(result.dtype, np.int64)
def _apply_orographic_enhancement(self, precip_cube, oe_cube): """Combine the precipitation rate cube and the orographic enhancement cube. Args: precip_cube (iris.cube.Cube): Cube containing the input precipitation field. oe_cube (iris.cube.Cube): Cube containing the orographic enhancement field matching the validity time of the precipitation cube. Returns: cube (iris.cube.Cube): Cube containing the precipitation rate field modified by the orographic enhancement cube. """ # Ensure the orographic enhancement cube matches the # dimensions of the precip_cube. oe_cube = check_cube_coordinates(precip_cube, oe_cube.copy()) # Ensure that orographic enhancement is in the units of the # precipitation rate cube. oe_cube.convert_units(precip_cube.units) # Set orographic enhancement to be zero for points with a # precipitation rate of < 1/32 mm/hr. original_units = Unit("mm/hr") threshold_in_cube_units = (original_units.convert( self.min_precip_rate_mmh, precip_cube.units)) # Ignore invalid warnings generated if e.g. a NaN is encountered # within the less than (<) comparison. with np.errstate(invalid='ignore'): oe_cube.data[precip_cube.data < threshold_in_cube_units] = 0. # Use CubeCombiner to combine the cubes. temp_cubelist = iris.cube.CubeList([precip_cube, oe_cube]) cube = CubeCombiner(self.operation).process(temp_cubelist, precip_cube.name()) return cube
def parse_date(datestr): ''' Parse the time query param ''' try: if datestr.startswith('now-'): p = re.compile(r'^now-(?P<val>\d+)\s*(?P<units>\w+)$') match = p.search(datestr) val = int(match.group('val')) units = match.group('units') # If not valid units, exception will throw unknown_unit = Unit(units) hrs = Unit('hours') # convert to hours num_hrs = unknown_unit.convert(val, hrs) dt_now = datetime.now(tz=timezone.utc) return dt_now - timedelta(hours=num_hrs) return dateparse(datestr) except Exception: return None
def convert_unit(value, unit, new_unit): """ One-line unit conversion Parameters ---------- value: ``float`` The starting value for the conversion. unit: ``str`` The starting unit for the conversion. new_unit: ``str`` The desired unit for the conversion Returns ------- new_value: ``float`` The starting value, but converted to the new unit. """ start_unit = Unit(unit) return start_unit.convert(value, new_unit)
from cf_units import Unit import numpy as np import matplotlib.pyplot as plt from sympy import Symbol c = Unit("deg_c") k = Unit("deg_k") c.convert(0, k) # Now we test the actual use case # for a plot. second = Unit("second") minute = Unit("minute") meter = Unit("meter") xs = [i * second for i in range(1, 10)] ys = [meter / x for x in xs] def plot_with_units(ax, xt, yt): xs, x_unit = xt ys, y_unit = yt xnums = [x.convert(1, x_unit) for x in xs] ynums = [y.convert(1, y_unit) for y in ys] print(xnums, ynums) ax.set_xlabel(str(x_unit)) ax.set_ylabel(str(y_unit)) ax.plot(xnums, ynums) def auto_plot_with_units(ax, xt, yt):
class BasicThreshold(object): """Apply a threshold truth criterion to a cube. Calculate the threshold truth values based on a linear membership function around the threshold values provided. A cube will be returned with a new threshold dimension coordinate. Can operate on multiple time sequences within a cube. """ def __init__(self, thresholds, fuzzy_factor=None, fuzzy_bounds=None, threshold_units=None, below_thresh_ok=False): """ Set up for processing an in-or-out of threshold field, including the generation of fuzzy_bounds which are required to threshold an input cube (through self.process(cube)). If fuzzy_factor is not None, fuzzy bounds are calculated using the threshold value in the units in which it is provided. The usage of fuzzy_factor is exemplified as follows: For a 6 mm/hr threshold with a 0.75 fuzzy factor, a range of 25% around this threshold (between (6*0.75=) 4.5 and (6*(2-0.75)=) 7.5) would be generated. The probabilities of exceeding values within this range are scaled linearly, so that 4.5 mm/hr yields a thresholded value of 0 and 7.5 mm/hr yields a thresholded value of 1. Therefore, in this case, the thresholded exceedance probabilities between 4.5 mm/hr and 7.5 mm/hr would follow the pattern: :: Data value | Probability ------------|------------- 4.5 | 0 5.0 | 0.167 5.5 | 0.333 6.0 | 0.5 6.5 | 0.667 7.0 | 0.833 7.5 | 1.0 Args: thresholds (list of floats or float): The threshold points for 'significant' datapoints. Keyword Args: fuzzy_factor (float): Specifies lower bound for fuzzy membership value when multiplied by each threshold. Upper bound is equivalent linear distance above threshold. If None, no fuzzy_factor is applied. fuzzy_bounds (list of tuples): Lower and upper bounds for fuzziness. List should be of same length as thresholds. Each entry in list should be a tuple of two floats representing the lower and upper bounds respectively. If None, no fuzzy_bounds are applied. threshold_units (string): Units of the threshold values. If not provided the units are assumed to be the same as those of the input cube. below_thresh_ok (boolean): True to count points as significant if *below* the threshold, False to count points as significant if *above* the threshold. Raises: ValueError: If a threshold of 0.0 is requested when using a fuzzy factor. ValueError: If the fuzzy_factor is not greater than 0 and less than 1. ValueError: If both fuzzy_factor and fuzzy_bounds are set as this is ambiguous. """ # ensure threshold is a list, even if only a single value is provided self.thresholds = thresholds if np.isscalar(thresholds): self.thresholds = [thresholds] # if necessary, set threshold units if threshold_units is None: self.threshold_units = None else: self.threshold_units = Unit(threshold_units) # initialise threshold coordinate name as None self.threshold_coord_name = None # read fuzzy factor or set (default) to 1 (no smoothing) fuzzy_factor_loc = 1. if fuzzy_factor is not None: if fuzzy_bounds is not None: raise ValueError( "Invalid combination of keywords. Cannot specify " "fuzzy_factor and fuzzy_bounds together") if not 0 < fuzzy_factor < 1: raise ValueError( "Invalid fuzzy_factor: must be >0 and <1: {}".format( fuzzy_factor)) if 0 in self.thresholds: raise ValueError( "Invalid threshold with fuzzy factor: cannot use a " "multiplicative fuzzy factor with threshold == 0") fuzzy_factor_loc = fuzzy_factor # Set fuzzy-bounds. If neither fuzzy_factor nor fuzzy_bounds is set, # both lower_thr and upper_thr default to the threshold value. A test # of this equality is used later to determine whether to process with # a sharp threshold or fuzzy bounds. if fuzzy_bounds is None: self.fuzzy_bounds = [] for thr in self.thresholds: lower_thr = thr * fuzzy_factor_loc upper_thr = thr * (2. - fuzzy_factor_loc) if thr < 0: lower_thr, upper_thr = upper_thr, lower_thr self.fuzzy_bounds.append((lower_thr, upper_thr)) else: self.fuzzy_bounds = fuzzy_bounds # ensure fuzzy_bounds is a list of tuples if isinstance(fuzzy_bounds, tuple): self.fuzzy_bounds = [fuzzy_bounds] # check that thresholds and fuzzy_bounds are self-consistent for thr, bounds in zip(self.thresholds, self.fuzzy_bounds): assert len(bounds) == 2, ("Invalid bounds for one threshold: {}. " "Expected 2 floats.".format(bounds)) bounds_msg = ("Threshold must be within bounds: " "!( {} <= {} <= {} )".format(bounds[0], thr, bounds[1])) assert bounds[0] <= thr, bounds_msg assert bounds[1] >= thr, bounds_msg self.below_thresh_ok = below_thresh_ok def __repr__(self): """Represent the configured plugin instance as a string.""" return ('<BasicThreshold: thresholds {}, ' + 'fuzzy_bounds {}, ' + 'below_thresh_ok: {}>').format(self.thresholds, self.fuzzy_bounds, self.below_thresh_ok) def _add_threshold_coord(self, cube, threshold): """ Add a scalar threshold-type coordinate to a cube containing thresholded data and promote the new coordinate to be the leading dimension of the cube. Args: cube (iris.cube.Cube): Cube containing thresholded data (1s and 0s) threshold (np.float32): Value at which the data has been thresholded Returns: iris.cube.Cube: With new "threshold" axis """ try: coord = iris.coords.DimCoord( np.array([threshold], dtype=np.float32), standard_name=self.threshold_coord_name, var_name="threshold", units=cube.units) except ValueError as cause: if 'is not a valid standard_name' in str(cause): coord = iris.coords.DimCoord( np.array([threshold], dtype=np.float32), long_name=self.threshold_coord_name, var_name="threshold", units=cube.units) else: raise ValueError(cause) cube.add_aux_coord(coord) return iris.util.new_axis(cube, coord) def process(self, input_cube): """Convert each point to a truth value based on provided threshold values. The truth value may or may not be fuzzy depending upon if fuzzy_bounds are supplied. If the plugin has a "threshold_units" member, this is used to convert both thresholds and fuzzy bounds into the units of the input cube. Args: input_cube (iris.cube.Cube): Cube to threshold. The code is dimension-agnostic. Returns: cube (iris.cube.Cube): Cube after a threshold has been applied. The data within this cube will contain values between 0 and 1 to indicate whether a given threshold has been exceeded or not. The cube meta-data will contain: * Input_cube name prepended with probability_of_X_above(or below)_threshold (where X is the diagnostic under consideration) * Threshold dimension coordinate with same units as input_cube * Threshold attribute (above or below threshold) * Cube units set to (1). Raises: ValueError: if a np.nan value is detected within the input cube. """ # Record input cube data type to ensure consistent output, though # integer data must become float to enable fuzzy thresholding. input_cube_dtype = input_cube.dtype if input_cube.dtype.kind == 'i': input_cube_dtype = np.float32 thresholded_cubes = iris.cube.CubeList() if np.isnan(input_cube.data).any(): raise ValueError("Error: NaN detected in input cube data") # if necessary, convert thresholds and fuzzy bounds into cube units if self.threshold_units is not None: self.thresholds = [ self.threshold_units.convert(threshold, input_cube.units) for threshold in self.thresholds ] self.fuzzy_bounds = [ tuple([ self.threshold_units.convert(threshold, input_cube.units) for threshold in bounds ]) for bounds in self.fuzzy_bounds ] # set name of threshold coordinate to match input diagnostic self.threshold_coord_name = input_cube.name() # apply fuzzy thresholding for threshold, bounds in zip(self.thresholds, self.fuzzy_bounds): cube = input_cube.copy() # if upper and lower bounds are equal, set a deterministic 0/1 # probability based on exceedance of the threshold if bounds[0] == bounds[1]: truth_value = cube.data > threshold # otherwise, scale exceedance probabilities linearly between 0/1 # at the min/max fuzzy bounds and 0.5 at the threshold value else: truth_value = np.where( cube.data < threshold, rescale(cube.data, data_range=(bounds[0], threshold), scale_range=(0., 0.5), clip=True), rescale(cube.data, data_range=(threshold, bounds[1]), scale_range=(0.5, 1.), clip=True), ) truth_value = truth_value.astype(input_cube_dtype) # if requirement is for probabilities below threshold (rather than # above), invert the exceedance probability if self.below_thresh_ok: truth_value = 1. - truth_value cube.data = truth_value # Overwrite masked values that have been thresholded # with the un-thresholded values from the input cube. if np.ma.is_masked(cube.data): cube.data[input_cube.data.mask] = ( input_cube.data[input_cube.data.mask]) cube = self._add_threshold_coord(cube, threshold) thresholded_cubes.append(cube) cube, = thresholded_cubes.concatenate() # TODO: Correct when formal cf-standards exists # Force the metadata to temporary conventions if self.below_thresh_ok: cube.attributes.update({'relative_to_threshold': 'below'}) cube.rename("probability_of_{}_below_threshold".format( cube.name())) else: cube.attributes.update({'relative_to_threshold': 'above'}) cube.rename("probability_of_{}_above_threshold".format( cube.name())) cube.units = Unit(1) cube = enforce_coordinate_ordering(cube, ["realization", "percentile_over"]) return cube
class BasicThreshold(BasePlugin): """Apply a threshold truth criterion to a cube. Calculate the threshold truth values based on a linear membership function around the threshold values provided. A cube will be returned with a new threshold dimension coordinate. Can operate on multiple time sequences within a cube. """ def __init__(self, thresholds, fuzzy_factor=None, fuzzy_bounds=None, threshold_units=None, comparison_operator='>'): """ Set up for processing an in-or-out of threshold field, including the generation of fuzzy_bounds which are required to threshold an input cube (through self.process(cube)). If fuzzy_factor is not None, fuzzy bounds are calculated using the threshold value in the units in which it is provided. The usage of fuzzy_factor is exemplified as follows: For a 6 mm/hr threshold with a 0.75 fuzzy factor, a range of 25% around this threshold (between (6*0.75=) 4.5 and (6*(2-0.75)=) 7.5) would be generated. The probabilities of exceeding values within this range are scaled linearly, so that 4.5 mm/hr yields a thresholded value of 0 and 7.5 mm/hr yields a thresholded value of 1. Therefore, in this case, the thresholded exceedance probabilities between 4.5 mm/hr and 7.5 mm/hr would follow the pattern: :: Data value | Probability ------------|------------- 4.5 | 0 5.0 | 0.167 5.5 | 0.333 6.0 | 0.5 6.5 | 0.667 7.0 | 0.833 7.5 | 1.0 Args: thresholds (list of float or float): The threshold points for 'significant' datapoints. fuzzy_factor (float): Specifies lower bound for fuzzy membership value when multiplied by each threshold. Upper bound is equivalent linear distance above threshold. If None, no fuzzy_factor is applied. fuzzy_bounds (list of tuple): Lower and upper bounds for fuzziness. List should be of same length as thresholds. Each entry in list should be a tuple of two floats representing the lower and upper bounds respectively. If None, no fuzzy_bounds are applied. threshold_units (str): Units of the threshold values. If not provided the units are assumed to be the same as those of the input cube. comparison_operator (str): Indicates the comparison_operator to use with the threshold. e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to evaluate data < threshold. When using fuzzy thresholds, there is no difference between < and <= or > and >=. Valid choices: > >= < <= gt ge lt le. Raises: ValueError: If a threshold of 0.0 is requested when using a fuzzy factor. ValueError: If the fuzzy_factor is not greater than 0 and less than 1. ValueError: If both fuzzy_factor and fuzzy_bounds are set as this is ambiguous. """ # ensure threshold is a list, even if only a single value is provided self.thresholds = thresholds if np.isscalar(thresholds): self.thresholds = [thresholds] # if necessary, set threshold units if threshold_units is None: self.threshold_units = None else: self.threshold_units = Unit(threshold_units) # initialise threshold coordinate name as None self.threshold_coord_name = None # read fuzzy factor or set (default) to 1 (no smoothing) fuzzy_factor_loc = 1. if fuzzy_factor is not None: if fuzzy_bounds is not None: raise ValueError( "Invalid combination of keywords. Cannot specify " "fuzzy_factor and fuzzy_bounds together") if not 0 < fuzzy_factor < 1: raise ValueError( "Invalid fuzzy_factor: must be >0 and <1: {}".format( fuzzy_factor)) if 0 in self.thresholds: raise ValueError( "Invalid threshold with fuzzy factor: cannot use a " "multiplicative fuzzy factor with threshold == 0") fuzzy_factor_loc = fuzzy_factor # Set fuzzy-bounds. If neither fuzzy_factor nor fuzzy_bounds is set, # both lower_thr and upper_thr default to the threshold value. A test # of this equality is used later to determine whether to process with # a sharp threshold or fuzzy bounds. if fuzzy_bounds is None: self.fuzzy_bounds = [] for thr in self.thresholds: lower_thr = thr * fuzzy_factor_loc upper_thr = thr * (2. - fuzzy_factor_loc) if thr < 0: lower_thr, upper_thr = upper_thr, lower_thr self.fuzzy_bounds.append((lower_thr, upper_thr)) else: self.fuzzy_bounds = fuzzy_bounds # ensure fuzzy_bounds is a list of tuples if isinstance(fuzzy_bounds, tuple): self.fuzzy_bounds = [fuzzy_bounds] # check that thresholds and fuzzy_bounds are self-consistent for thr, bounds in zip(self.thresholds, self.fuzzy_bounds): if len(bounds) != 2: raise ValueError("Invalid bounds for one threshold: {}." " Expected 2 floats.".format(bounds)) if bounds[0] > thr or bounds[1] < thr: bounds_msg = ("Threshold must be within bounds: " "!( {} <= {} <= {} )".format(bounds[0], thr, bounds[1])) raise ValueError(bounds_msg) # Dict of known logical comparisons. Each key contains a dict of # {'function': The operator function for this comparison_operator, # 'spp_string': Comparison_Operator string for use in CF-convention # meta-data} self.comparison_operator_dict = {} self.comparison_operator_dict.update(dict.fromkeys( ['ge', 'GE', '>='], {'function': operator.ge, 'spp_string': 'above'})) self.comparison_operator_dict.update(dict.fromkeys( ['gt', 'GT', '>'], {'function': operator.gt, 'spp_string': 'above'})) self.comparison_operator_dict.update(dict.fromkeys( ['le', 'LE', '<='], {'function': operator.le, 'spp_string': 'below'})) self.comparison_operator_dict.update(dict.fromkeys( ['lt', 'LT', '<'], {'function': operator.lt, 'spp_string': 'below'})) self.comparison_operator_string = comparison_operator self._decode_comparison_operator_string() def __repr__(self): """Represent the configured plugin instance as a string.""" return ( '<BasicThreshold: thresholds {}, ' + 'fuzzy_bounds {}, ' + 'method: data {} threshold>' ).format(self.thresholds, self.fuzzy_bounds, self.comparison_operator_string) def _add_threshold_coord(self, cube, threshold): """ Add a scalar threshold-type coordinate to a cube containing thresholded data and promote the new coordinate to be the leading dimension of the cube. Args: cube (iris.cube.Cube): Cube containing thresholded data (1s and 0s) threshold (float): Value at which the data has been thresholded Returns: iris.cube.Cube: With new "threshold" axis """ coord = iris.coords.DimCoord(np.array([threshold], dtype=np.float32), units=cube.units) coord.rename(self.threshold_coord_name) coord.var_name = "threshold" # Use an spp__relative_to_threshold attribute, as an extension to the # CF-conventions. coord.attributes.update({'spp__relative_to_threshold': self.comparison_operator['spp_string']}) cube.add_aux_coord(coord) return iris.util.new_axis(cube, coord) def _decode_comparison_operator_string(self): """Sets self.comparison_operator based on self.comparison_operator_string. This is a dict containing the keys 'function' and 'spp_string'. Raises errors if invalid options are found. Raises: ValueError: If self.comparison_operator_string does not match a defined method. """ try: self.comparison_operator = self.comparison_operator_dict[ self.comparison_operator_string] except KeyError: msg = (f'String "{self.comparison_operator_string}" ' 'does not match any known comparison_operator method') raise ValueError(msg) def process(self, input_cube): """Convert each point to a truth value based on provided threshold values. The truth value may or may not be fuzzy depending upon if fuzzy_bounds are supplied. If the plugin has a "threshold_units" member, this is used to convert both thresholds and fuzzy bounds into the units of the input cube. Args: input_cube (iris.cube.Cube): Cube to threshold. The code is dimension-agnostic. Returns: iris.cube.Cube: Cube after a threshold has been applied. The data within this cube will contain values between 0 and 1 to indicate whether a given threshold has been exceeded or not. The cube meta-data will contain: * Input_cube name prepended with probability_of_X_above(or below)_threshold (where X is the diagnostic under consideration) * Threshold dimension coordinate with same units as input_cube * Threshold attribute (above or below threshold) * Cube units set to (1). Raises: ValueError: if a np.nan value is detected within the input cube. """ # Record input cube data type to ensure consistent output, though # integer data must become float to enable fuzzy thresholding. input_cube_dtype = input_cube.dtype if input_cube.dtype.kind == 'i': input_cube_dtype = np.float32 thresholded_cubes = iris.cube.CubeList() if np.isnan(input_cube.data).any(): raise ValueError("Error: NaN detected in input cube data") # if necessary, convert thresholds and fuzzy bounds into cube units if self.threshold_units is not None: self.thresholds = [self.threshold_units.convert(threshold, input_cube.units) for threshold in self.thresholds] self.fuzzy_bounds = [tuple([ self.threshold_units.convert(threshold, input_cube.units) for threshold in bounds]) for bounds in self.fuzzy_bounds] # set name of threshold coordinate to match input diagnostic self.threshold_coord_name = input_cube.name() # apply fuzzy thresholding for threshold, bounds in zip(self.thresholds, self.fuzzy_bounds): cube = input_cube.copy() # if upper and lower bounds are equal, set a deterministic 0/1 # probability based on exceedance of the threshold if bounds[0] == bounds[1]: truth_value = self.comparison_operator['function']( cube.data, threshold) # otherwise, scale exceedance probabilities linearly between 0/1 # at the min/max fuzzy bounds and 0.5 at the threshold value else: truth_value = np.where( cube.data < threshold, rescale(cube.data, data_range=(bounds[0], threshold), scale_range=(0., 0.5), clip=True), rescale(cube.data, data_range=(threshold, bounds[1]), scale_range=(0.5, 1.), clip=True), ) # if requirement is for probabilities below threshold (rather # than above), invert the exceedance probability if 'below' in self.comparison_operator['spp_string']: truth_value = 1. - truth_value truth_value = truth_value.astype(input_cube_dtype) cube.data = truth_value # Overwrite masked values that have been thresholded # with the un-thresholded values from the input cube. if np.ma.is_masked(cube.data): cube.data[input_cube.data.mask] = ( input_cube.data[input_cube.data.mask]) cube = self._add_threshold_coord(cube, threshold) thresholded_cubes.append(cube) cube, = thresholded_cubes.concatenate() cube.rename( "probability_of_{}_{}_threshold".format( cube.name(), self.comparison_operator['spp_string'])) cube.units = Unit(1) cube = enforce_coordinate_ordering( cube, ["realization", "percentile"]) return cube
def create_data_object(self, filenames, variable): logging.debug("Creating data object for variable " + variable) variables = [("ER2_IMU/Longitude", "x"), ("ER2_IMU/Latitude", "y"), ("ER2_IMU/gps_time", "t"), ("State/Pressure", "p"), ("DataProducts/Altitude", "z"), ("header/date", ""), (variable, '')] logging.info("Listing coordinates: " + str(variables)) var_data = read_many_files_individually(filenames, [v[0] for v in variables]) date_times = [] for times, date in zip(var_data['ER2_IMU/gps_time'], var_data['header/date']): # Date is stored as an array (of length 92??) of floats with format: yyyymmdd date_str = str(int(date[0])) t_unit = Unit('hours since {}-{}-{} 00:00:00'.format( date_str[0:4], date_str[4:6], date_str[6:8])) date_times.append( t_unit.convert(get_data(times), cis_standard_time_unit)) # time_data = utils.concatenate([get_data(i) for i in var_data['ER2_IMU/gps_time']]) # date_str = str(int(var_data['header/date'][0][0])) # Flatten the data by taking the 0th column of the transpose time_coord = DimCoord(utils.concatenate(date_times).T[0], standard_name='time', units=cis_standard_time_unit) # TODO This won't work for multiple files since the altitude bins are different for each flight... alt_data = utils.concatenate( [get_data(i) for i in var_data["DataProducts/Altitude"]]) alt_coord = DimCoord(alt_data[0], standard_name='altitude', units='m') pres_data = utils.concatenate( [get_data(i) for i in var_data["State/Pressure"]]) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='atm') # Fix the air-pressure units pres_coord.convert_units('hPa') lat_data = utils.concatenate( [get_data(i) for i in var_data['ER2_IMU/Latitude']]) lat_coord = AuxCoord(lat_data.T[0], standard_name='latitude') lon_data = utils.concatenate( [get_data(i) for i in var_data['ER2_IMU/Longitude']]) lon_coord = AuxCoord(lon_data.T[0], standard_name='longitude') data = utils.concatenate([get_data(i) for i in var_data[variable]]) metadata = get_metadata(var_data[variable][0]) cube = Cube(np.ma.masked_invalid(data), long_name=metadata.misc['Description'], units=self.clean_units(metadata.units), dim_coords_and_dims=[(alt_coord, 1), (time_coord, 0)], aux_coords_and_dims=[(lat_coord, (0, )), (lon_coord, (0, )), (pres_coord, (0, 1))]) gd = GriddedData.make_from_cube(cube) return gd
def test_non_gregorian_calendar_conversion_dtype(self): data = np.arange(4, dtype=np.float32) u1 = Unit('hours since 2000-01-01 00:00:00', calendar='360_day') u2 = Unit('hours since 2000-01-02 00:00:00', calendar='360_day') result = u1.convert(data, u2) self.assertEqual(result.dtype, np.float32)
def test_non_gregorian_calendar_conversion_dtype(self): data = np.arange(4, dtype=np.float32) u1 = Unit('hours since 2000-01-01 00:00:00', calendar='360_day') u2 = Unit('hours since 2000-01-02 00:00:00', calendar='360_day') result = u1.convert(data, u2) self.assertEqual(result.dtype, np.float32)
def extract_vars(nc_file, var_name, time_idx=None, target_units=None, time_variable='time', vertical_variable='height'): """ Return a :class:`xarray.DataArray` object for the desired variable in a single NetCDF file object. Adapted from wrf.util :param ncfile: (:class:`netCDF4.Dataset`, :class:`Nio.NioFile`): An open netCDF file :param var_name: (:obj:`str`) The variable name. :param time_idx: (:obj:`int` or :data:`wrf.ALL_TIMES`, optional): The desired time index. This value can be a positive integer, negative integer, or None to return all times in the file or sequence. The default is None (return all idxs). :param target_units: (:obj:`str`) If not None, attempt to convert units to this format using cf_units. :returns: :class:`xarray.DataArray`: An array object that contains metadata. """ multitime = is_multi_time(time_idx) time_idx_or_slice = time_idx if not multitime else slice(None) try: var = nc_file.variables[var_name] except KeyError: raise ValueError('No variable named {} available in {}'.format( var_name, nc_file.filepath())) # TODO: refactor to ValidationError if len(var.shape) > 1: data = var[time_idx_or_slice, :] else: data = var[time_idx_or_slice] if target_units is not None and hasattr(var, 'units'): try: u = Unit(var.units) data = u.convert(data, target_units) except ValueError: logger.warning('Could not parse units "{}" for variable {}'.format( var.units, var_name)) # Want to preserve the time dimension if not multitime: if len(var.shape) > 1: data = data[np.newaxis, :] else: data = data[np.newaxis] attrs = OrderedDict() for dkey, val in var.__dict__.items(): # scipy.io adds these but don't want them if dkey in ("data", "_shape", "_size", "_typecode", "_attributes", "maskandscale", "dimensions"): continue _dkey = dkey if isinstance(dkey, str) else dkey.decode() attrs[_dkey] = val dimnames = var.dimensions[-data.ndim:] coords = OrderedDict() if dimnames[ 0] == time_variable: # TODO needs to work around this step for ozone climatology t = extract_times(nc_file, time_idx, time_variable) if not multitime: t = [t] coords[dimnames[0]] = t if len(dimnames) == 2 and dimnames[1] == vertical_variable: t = extract_vars(nc_file, vertical_variable, slice(None), target_units='m') coords['height'] = t data_array = DataArray(data, name=nc_file, dims=dimnames, coords=coords, attrs=attrs) return data_array