def _regrid_variable(self, var_cube, unit): """ Sorts spatial coordinates in ascending order, regrids the input variable onto the topography grid and converts to the required units. This function does not modify the input variable cube. Args: var_cube (iris.cube.Cube): Cube containing input variable data unit (str): Required unit for this variable Returns: out_cube (iris.cube.Cube): Cube containing regridded variable data """ for axis in ['x', 'y']: var_cube = sort_coord_in_cube(var_cube, var_cube.coord(axis=axis)) var_cube = enforce_coordinate_ordering( var_cube, [var_cube.coord(axis='y').name(), var_cube.coord(axis='x').name()]) regridder = iris.analysis.Linear() out_cube = (var_cube.copy(var_cube.data.astype(np.float32))).regrid( self.topography, regridder) out_cube.convert_units(unit) return out_cube
def _setup_coords_for_broadcast(self, cube_list): """ Adds a scalar DimCoord to any subsequent cube in cube_list so that they all include all of the coords specified in self.broadcast_coords in the right order. Args: cube_list: (iris.cube.CubeList) Returns: iris.cube.CubeList Updated version of cube_list """ for coord in self.broadcast_coords: target_cube = cube_list[0] try: if coord == "threshold": target_coord = find_threshold_coordinate(target_cube) else: target_coord = target_cube.coord(coord) except CoordinateNotFoundError: raise CoordinateNotFoundError( f"Cannot find coord {coord} in {repr(target_cube)} to broadcast to." ) new_list = CubeList([]) for cube in cube_list: try: found_coord = cube.coord(target_coord) except CoordinateNotFoundError: new_coord = target_coord.copy([0], bounds=None) cube = cube.copy() cube.add_aux_coord(new_coord, None) cube = iris.util.new_axis(cube, new_coord) enforce_coordinate_ordering(cube, [ d.name() for d in target_cube.coords(dim_coords=True) ]) else: if found_coord not in cube.dim_coords: # We don't expect the coord to already exist in a scalar form as # this would indicate that the broadcast-from cube is only valid # for part of the new dimension and therefore should be rejected. raise TypeError( f"Cannot broadcast to coord {coord} as it already exists as an AuxCoord" ) new_list.append(cube) cube_list = new_list return cube_list
def test_do_not_promote_scalar(self): """Test that a cube with the expected data contents is returned when the probabilistic dimension is a scalar coordinate, which is not promoted to a dimension coordinate.""" cube = self.cube[0, :, :, :] result = enforce_coordinate_ordering(cube, "realization") self.assertFalse(result.coord_dims("realization")) self.assertArrayAlmostEqual(result.data, cube.data)
def remove_cube_halo(cube, halo_radius): """ Remove halo of halo_radius from a cube. This function converts the halo radius into the number of grid points in the x and y coordinate that need to be removed. It then calls remove_halo_from_cube which only acts on a cube with x and y coordinates so we need to slice the cube and them merge the cube back together ensuring the resulting cube has the same dimension coordinates. Args: cube (iris.cube.Cube): Cube on extended grid halo_radius (float): Size of border to remove, in metres Returns: iris.cube.Cube: New cube with the halo removed. """ halo_size_x = convert_distance_into_number_of_grid_cells(cube, halo_radius, axis='x') halo_size_y = convert_distance_into_number_of_grid_cells(cube, halo_radius, axis='y') result_slices = iris.cube.CubeList() for cube_slice in cube.slices([cube.coord(axis='y'), cube.coord(axis='x')]): cube_halo = remove_halo_from_cube(cube_slice, halo_size_x, halo_size_y) result_slices.append(cube_halo) result = result_slices.merge_cube() # re-promote any scalar dimensions lost in slice / merge req_dims = [coord.name() for coord in cube.coords(dim_coords=True)] present_dims = [coord.name() for coord in result.coords(dim_coords=True)] for coord in req_dims: if coord not in present_dims: result = iris.util.new_axis(result, coord) # re-order (needed if scalar dimensions have been re-added) enforce_coordinate_ordering(result, req_dims) return result
def _setup_coords_for_broadcast(self, cube_list): """ Adds a scalar threshold to any subsequent cube in cube_list so that they all match the dimensions, in order, of the first cube in the list Args: cube_list (list of iris.cube.Cube) Returns: iris.cube.CubeList Updated version of cube_list Raises: CoordinateNotFoundError: if there is no threshold coordinate on the first cube in the list TypeError: if there is a scalar threshold coordinate on any of the later cubes, which would indicate that the cube is only valid for a single threshold and should not be broadcast to all thresholds. """ target_cube = cube_list[0] try: target_coord = find_threshold_coordinate(target_cube) except CoordinateNotFoundError: raise CoordinateNotFoundError( f"Cannot find coord threshold in {repr(target_cube)} to broadcast to" ) new_list = CubeList([]) for cube in cube_list: try: found_coord = cube.coord(target_coord) except CoordinateNotFoundError: new_coord = target_coord.copy([0], bounds=None) cube = cube.copy() cube.add_aux_coord(new_coord, None) cube = iris.util.new_axis(cube, new_coord) enforce_coordinate_ordering( cube, [d.name() for d in target_cube.coords(dim_coords=True)]) else: if found_coord not in cube.dim_coords: msg = "Cannot broadcast to coord threshold as it already exists as an AuxCoord" raise TypeError(msg) new_list.append(cube) return new_list
def test_move_coordinate_to_start_with_list(self): """Test that a cube with the expected data contents is returned when the time coordinate is reordered to be the first coordinate in the cube. The coordinate name to be reordered is specified as a list.""" expected = self.cube.copy() expected.transpose([1, 0, 2, 3]) result = enforce_coordinate_ordering(self.cube, ["time"]) self.assertEqual(result.coord_dims("time")[0], 0) self.assertArrayAlmostEqual(result.data, expected.data)
def test_force_promotion_of_scalar(self): """Test that a cube with the expected data contents is returned when the probabilistic dimension is a scalar coordinate, which is promoted to a dimension coordinate.""" cube = self.cube[0, :, :, :] result = enforce_coordinate_ordering( cube, "realization", promote_scalar=True) self.assertEqual(result.coord_dims("realization")[0], 0) self.assertArrayAlmostEqual(result.data, [cube.data])
def test_move_coordinate_to_end(self): """Test that a cube with the expected data contents is returned when the realization coordinate is reordered to be the last coordinate in the cube.""" expected = self.cube.copy() expected.transpose([1, 2, 3, 0]) cube = self.cube.copy() result = enforce_coordinate_ordering(cube, "realization", anchor="end") self.assertEqual(result.coord_dims("realization")[0], 3) self.assertArrayAlmostEqual(result.data, expected.data)
def load_cube(filepath, constraints=None, no_lazy_load=False): """Load the filepath provided using Iris into a cube. Args: filepath (str): Filepath that will be loaded. constraints (iris.Constraint, str or None): Constraint to be applied when loading from the input filepath. This can be in the form of an iris.Constraint or could be a string that is intended to match the name of the cube. The default is None. no_lazy_load (bool) If True, bypass cube deferred (lazy) loading and load the whole cube into memory. This can increase performance at the cost of memory. If False (default) then lazy load. Returns: cube (iris.cube.Cube): Cube that has been loaded from the input filepath given the constraints provided. """ # Remove metadata prefix cube if present constraints = iris.Constraint( cube_func=lambda cube: cube.long_name != 'prefixes') & constraints cube = iris.load_cube(filepath, constraint=constraints) # Remove metadata prefix cube attributes if 'bald__isPrefixedBy' in cube.attributes.keys(): cube.attributes.pop('bald__isPrefixedBy') # Ensure the probabilistic coordinates are the first coordinates within a # cube and are in the specified order. cube = enforce_coordinate_ordering( cube, ["realization", "percentile_over", "threshold"]) # Ensure the y and x dimensions are the last dimensions within the cube. y_name = cube.coord(axis="y").name() x_name = cube.coord(axis="x").name() cube = enforce_coordinate_ordering(cube, [y_name, x_name], anchor="end") if no_lazy_load: # Force the cube's data into memory by touching the .data attribute. cube.data return cube
def test_partial_names(self): """Test that a cube with the expected data contents is returned when the names provided are partial matches of the names of the coordinates within the cube.""" expected = self.cube.copy() expected.transpose([1, 0, 2, 3]) cube = self.cube.copy() result = enforce_coordinate_ordering(cube, ["tim", "realiz"]) self.assertEqual(result.coord_dims("time")[0], 0) self.assertEqual(result.coord_dims("realization")[0], 1) self.assertArrayAlmostEqual(result.data, expected.data)
def test_include_extra_coordinates(self): """Test that a cube with the expected data contents is returned when extra coordinates are passed in for reordering but these coordinates are not present within the cube.""" expected = self.cube.copy() expected.transpose([1, 0, 2, 3]) result = enforce_coordinate_ordering( self.cube, ["time", "realization", "nonsense"]) self.assertEqual(result.coord_dims("time")[0], 0) self.assertEqual(result.coord_dims("realization")[0], 1) self.assertArrayAlmostEqual(result.data, expected.data)
def test_partial_names(self): """Test that a cube with the expected data contents is returned when the names provided are partial matches of the names of the coordinates within the cube.""" # remove coordinate that causes multiple partial matches # TODO remove this functionality and test (never called operationally) self.cube.remove_coord("forecast_reference_time") expected = self.cube.copy() expected.transpose([1, 0, 2, 3]) result = enforce_coordinate_ordering(self.cube, ["tim", "realiz"]) self.assertEqual(result.coord_dims("time")[0], 0) self.assertEqual(result.coord_dims("realization")[0], 1) self.assertArrayAlmostEqual(result.data, expected.data)
def test_full_reordering(self): """Test that a cube with the expected data contents is returned when all the coordinates within the cube are reordered into the order specified by the names within the input list.""" expected = self.cube.copy() expected.transpose([2, 0, 3, 1]) result = enforce_coordinate_ordering( self.cube, ["latitude", "realization", "longitude", "time"]) self.assertEqual(result.coord_dims("latitude")[0], 0) self.assertEqual(result.coord_dims("realization")[0], 1) self.assertEqual(result.coord_dims("longitude")[0], 2) self.assertEqual(result.coord_dims("time")[0], 3) self.assertArrayAlmostEqual(result.data, expected.data)
def test_move_multiple_coordinate_to_end_with_list(self): """Test that a cube with the expected data contents is returned when the time and realization coordinates are reordered to be the last coordinates in the cube. The coordinate name to be reordered is specified as a list.""" expected = self.cube.copy() expected.transpose([2, 3, 1, 0]) result = enforce_coordinate_ordering(self.cube, ["time", "realization"], anchor="end") self.assertEqual(result.coord_dims("time")[0], 2) self.assertEqual(result.coord_dims("realization")[0], 3) self.assertArrayAlmostEqual(result.data, expected.data)
def test_coordinate_reordering_with_different_alphas(self): """Test that x and y alphas still apply to the right coordinate when the input cube spatial dimensions are (x, y) not (y, x)""" alpha_y = 0.5 * self.alpha_x enforce_coordinate_ordering(self.cube, ["realization", "longitude", "latitude"]) plugin = RecursiveFilter(alpha_x=self.alpha_x, alpha_y=alpha_y, iterations=self.iterations) result = plugin.process(self.cube) expected_result = np.array( [[0.01620921, 0.03978802, 0.10592333, 0.03978982, 0.01621686], [0.02866841, 0.06457599, 0.15184643, 0.06457873, 0.02868005], [0.05077430, 0.10290188, 0.19869247, 0.10290585, 0.05079120], [0.02881413, 0.06486591, 0.15238355, 0.06486866, 0.02882582], [0.01657352, 0.04051282, 0.10726611, 0.04051464, 0.01658128]]) self.assertSequenceEqual( [x.name() for x in result.coords(dim_coords=True)], ["realization", "longitude", "latitude"]) self.assertArrayAlmostEqual(result.data[0], expected_result)
def process(self, input_cubes: CubeList) -> Cube: """Check input cubes, then calculate a probability of freezing rain diagnostic. Ensure that, if a realization coordinate is present on the resulting cube, it is made the leading dimension. Args: input_cubes: Contains exactly three cubes, a rain rate or accumulation, a sleet rate or accumulation, and an instantaneous or period temperature. Accumulations and periods must all represent the same length of time. Returns: Cube of freezing rain probabilties. """ self._get_input_cubes(input_cubes) self._extract_common_realizations() freezing_rain_cube = self._calculate_freezing_rain_probability() enforce_coordinate_ordering(freezing_rain_cube, "realization", anchor_start=True) return freezing_rain_cube
def extract_diagnostic_data(coordinate_cube, diagnostic_cube): """ Extracts diagnostic data from the desired grid points in the diagnostic cube. The neighbour finding routine that produces the coordinate cube works in x-y order. As such, the diagnostic cube is changed to match before the indices are used to extract data. Args: coordinate_cube (iris.cube.Cube): A cube containing the x and y grid coordinates for the grid point neighbours. diagnostic_cube (iris.cube.Cube): A cube of diagnostic data from which spot data is being taken. Returns: numpy.ndarray: An array of diagnostic values at the grid coordinates found within the coordinate cube. """ enforce_coordinate_ordering(diagnostic_cube, [ diagnostic_cube.coord(axis='x').name(), diagnostic_cube.coord(axis='y').name() ]) spot_values = diagnostic_cube.data[tuple(coordinate_cube.data.T)] return spot_values
def remove_cube_halo(cube, halo_radius): """ Remove halo of halo_radius from a cube. This function converts the halo radius into the number of grid points in the x and y coordinate that need to be removed. It then calls remove_halo_from_cube which only acts on a cube with x and y coordinates so we need to slice the cube and them merge the cube back together ensuring the resulting cube has the same dimension coordinates. Args: cube (iris.cube.Cube): Cube on extended grid halo_radius (float): Size of border to remove, in metres Returns: result (iris.cube.Cube): New cube with the halo removed. """ halo_size_x, halo_size_y = convert_distance_into_number_of_grid_cells( cube, halo_radius) result_slices = iris.cube.CubeList() for cube_slice in cube.slices([cube.coord(axis='y'), cube.coord(axis='x')]): cube_halo = remove_halo_from_cube(cube_slice, halo_size_x, halo_size_y) result_slices.append(cube_halo) result = result_slices.merge_cube() req_coords = [] for coord in cube.coords(dim_coords=True): req_coords.append(coord.name()) result = enforce_coordinate_ordering( result, req_coords, promote_scalar=True) return result
def _mean_and_variance_to_percentiles(calibrated_forecast_predictor, calibrated_forecast_variance, percentiles): """ Function returning percentiles based on the supplied mean and variance. The percentiles are created by assuming a Gaussian distribution and calculating the value of the phenomenon at specific points within the distribution. Args: calibrated_forecast_predictor (cube): Predictor for the calibrated forecast i.e. the mean. calibrated_forecast_variance (cube): Variance for the calibrated forecast. percentiles (List): Percentiles at which to calculate the value of the phenomenon at. Returns: percentile_cube (Iris cube): Cube containing the values for the phenomenon at each of the percentiles requested. """ calibrated_forecast_predictor = (enforce_coordinate_ordering( calibrated_forecast_predictor, "realization")) calibrated_forecast_variance = (enforce_coordinate_ordering( calibrated_forecast_variance, "realization")) calibrated_forecast_predictor_data = ( calibrated_forecast_predictor.data.flatten()) calibrated_forecast_variance_data = ( calibrated_forecast_variance.data.flatten()) # Convert percentiles into fractions. percentiles = np.array([x / 100.0 for x in percentiles], dtype=np.float32) result = np.zeros( (len(percentiles), calibrated_forecast_predictor_data.shape[0]), dtype=np.float32) # Loop over percentiles, and use a normal distribution with the mean # and variance to calculate the values at each percentile. for index, percentile in enumerate(percentiles): percentile_list = np.repeat( percentile, len(calibrated_forecast_predictor_data)) result[index, :] = norm.ppf( percentile_list, loc=calibrated_forecast_predictor_data, scale=np.sqrt(calibrated_forecast_variance_data)) # If percent point function (PPF) returns NaNs, fill in # mean instead of NaN values. NaN will only be generated if the # variance is zero. Therefore, if the variance is zero, the mean # value is used for all gridpoints with a NaN. if np.any(calibrated_forecast_variance_data == 0): nan_index = np.argwhere(np.isnan(result[index, :])) result[index, nan_index] = ( calibrated_forecast_predictor_data[nan_index]) if np.any(np.isnan(result)): msg = ("NaNs are present within the result for the {} " "percentile. Unable to calculate the percent point " "function.") raise ValueError(msg) # Convert percentiles back into percentages. percentiles = [x * 100.0 for x in percentiles] # Reshape forecast_at_percentiles, so the percentiles dimension is # first, and any other dimension coordinates follow. result = (restore_non_probabilistic_dimensions( result, calibrated_forecast_predictor, "realization", len(percentiles))) for template_cube in calibrated_forecast_predictor.slices_over( "realization"): template_cube.remove_coord("realization") break percentile_cube = create_cube_with_percentiles(percentiles, template_cube, result) # Remove cell methods aimed at removing cell methods associated with # finding the ensemble mean, which are no longer relevant. percentile_cube.cell_methods = {} return percentile_cube
def _probabilities_to_percentiles(self, forecast_probabilities, percentiles, bounds_pairing): """ Conversion of probabilities to percentiles through the construction of an cumulative distribution function. This is effectively constructed by linear interpolation from the probabilities associated with each threshold to a set of percentiles. Args: forecast_probabilities (Iris cube): Cube with a threshold coordinate. percentiles (Numpy array): Array of percentiles, at which the corresponding values will be calculated. bounds_pairing (Tuple): Lower and upper bound to be used as the ends of the cumulative distribution function. Returns: percentile_cube (Iris cube): Cube containing values for the required diagnostic e.g. air_temperature at the required percentiles. """ threshold_coord = forecast_probabilities.coord("threshold") threshold_unit = forecast_probabilities.coord("threshold").units threshold_points = threshold_coord.points # Ensure that the percentile dimension is first, so that the # conversion to a 2d array produces data in the desired order. forecast_probabilities = (enforce_coordinate_ordering( forecast_probabilities, threshold_coord.name())) prob_slices = convert_cube_data_to_2d(forecast_probabilities, coord=threshold_coord.name()) # The requirement below for a monotonically changing probability # across thresholds can be thwarted by precision errors of order 1E-10, # as such, here we round to a precision of 9 decimal places. prob_slices = np.around(prob_slices, 9) # Invert probabilities for data thresholded above thresholds. relation = forecast_probabilities.attributes['relative_to_threshold'] if relation == 'above': probabilities_for_cdf = 1 - prob_slices elif relation == 'below': probabilities_for_cdf = prob_slices else: msg = ("Probabilities to percentiles only implemented for " "thresholds above or below a given value." "The relation to threshold is given as {}".format(relation)) raise NotImplementedError(msg) threshold_points, probabilities_for_cdf = ( self._add_bounds_to_thresholds_and_probabilities( threshold_points, probabilities_for_cdf, bounds_pairing)) if np.any(np.diff(probabilities_for_cdf) < 0): msg = ("The probability values used to construct the " "Cumulative Distribution Function (CDF) " "must be ascending i.e. in order to yield " "a monotonically increasing CDF." "The probabilities are {}".format(probabilities_for_cdf)) warnings.warn(msg) # Convert percentiles into fractions. percentiles = np.array([x / 100.0 for x in percentiles], dtype=np.float32) forecast_at_percentiles = (np.empty( (len(percentiles), probabilities_for_cdf.shape[0]), dtype=np.float32)) for index in range(probabilities_for_cdf.shape[0]): forecast_at_percentiles[:, index] = np.interp( percentiles, probabilities_for_cdf[index, :], threshold_points) # Convert percentiles back into percentages. percentiles = np.array([x * 100.0 for x in percentiles], dtype=np.float32) # Reshape forecast_at_percentiles, so the percentiles dimension is # first, and any other dimension coordinates follow. forecast_at_percentiles = (restore_non_probabilistic_dimensions( forecast_at_percentiles, forecast_probabilities, threshold_coord.name(), len(percentiles))) for template_cube in forecast_probabilities.slices_over( threshold_coord.name()): template_cube.rename(template_cube.name().replace( "probability_of_", "")) template_cube.remove_coord(threshold_coord.name()) template_cube.attributes.pop('relative_to_threshold') break percentile_cube = create_cube_with_percentiles( percentiles, template_cube, forecast_at_percentiles, custom_name='percentile', cube_unit=threshold_unit) return percentile_cube
def _interpolate_percentiles(self, forecast_at_percentiles, desired_percentiles, bounds_pairing, percentile_coord): """ Interpolation of forecast for a set of percentiles from an initial set of percentiles to a new set of percentiles. This is constructed by linearly interpolating between the original set of percentiles to a new set of percentiles. Args: forecast_at_percentiles (Iris CubeList or Iris Cube): Cube or CubeList expected to contain a percentile coordinate. desired_percentiles (Numpy array): Array of the desired percentiles. bounds_pairing (Tuple): Lower and upper bound to be used as the ends of the cumulative distribution function. percentile_coord (String): Name of required percentile coordinate. Returns: percentile_cube (iris cube.Cube): Cube containing values for the required diagnostic e.g. air_temperature at the required percentiles. """ original_percentiles = ( forecast_at_percentiles.coord(percentile_coord).points) # Ensure that the percentile dimension is first, so that the # conversion to a 2d array produces data in the desired order. forecast_at_percentiles = (enforce_coordinate_ordering( forecast_at_percentiles, percentile_coord)) forecast_at_reshaped_percentiles = convert_cube_data_to_2d( forecast_at_percentiles, coord=percentile_coord) original_percentiles, forecast_at_reshaped_percentiles = ( self._add_bounds_to_percentiles_and_forecast_at_percentiles( original_percentiles, forecast_at_reshaped_percentiles, bounds_pairing)) forecast_at_interpolated_percentiles = (np.empty( (len(desired_percentiles), forecast_at_reshaped_percentiles.shape[0]), dtype=np.float32)) for index in range(forecast_at_reshaped_percentiles.shape[0]): forecast_at_interpolated_percentiles[:, index] = np.interp( desired_percentiles, original_percentiles, forecast_at_reshaped_percentiles[index, :]) # Reshape forecast_at_percentiles, so the percentiles dimension is # first, and any other dimension coordinates follow. forecast_at_percentiles_data = (restore_non_probabilistic_dimensions( forecast_at_interpolated_percentiles, forecast_at_percentiles, percentile_coord, len(desired_percentiles))) for template_cube in forecast_at_percentiles.slices_over( percentile_coord): template_cube.remove_coord(percentile_coord) break percentile_cube = create_cube_with_percentiles( desired_percentiles, template_cube, forecast_at_percentiles_data, custom_name=percentile_coord) return percentile_cube
def process(self, post_processed_forecast, raw_forecast, random_ordering=False, random_seed=None): """ Reorder post-processed forecast using the ordering of the raw ensemble. Args: post_processed_forecast (Iris Cube or CubeList): The cube or cubelist containing the post-processed forecast realizations. raw_forecast (Iris Cube or CubeList): The cube or cubelist containing the raw (not post-processed) forecast. random_ordering (Logical): If random_ordering is True, the post-processed forecasts are reordered randomly, rather than using the ordering of the raw ensemble. random_seed (Integer or None): If random_seed is an integer, the integer value is used for the random seed. If random_seed is None, no random seed is set, so the random values generated are not reproducible. Returns: post-processed_forecast_realizations (cube): Cube containing the new ensemble realizations where all points within the dataset have been reordered in comparison to the input percentiles. """ if isinstance(post_processed_forecast, iris.cube.CubeList): percentile_coord = (find_percentile_coordinate( post_processed_forecast[0]).name()) else: percentile_coord = ( find_percentile_coordinate(post_processed_forecast).name()) post_processed_forecast_percentiles = concatenate_cubes( post_processed_forecast, coords_to_slice_over=[percentile_coord]) post_processed_forecast_percentiles = (enforce_coordinate_ordering( post_processed_forecast_percentiles, percentile_coord)) raw_forecast_realizations = concatenate_cubes(raw_forecast) raw_forecast_realizations = enforce_coordinate_ordering( raw_forecast_realizations, "realization") raw_forecast_realizations = (self._recycle_raw_ensemble_realizations( post_processed_forecast_percentiles, raw_forecast_realizations, percentile_coord)) post_processed_forecast_realizations = self.rank_ecc( post_processed_forecast_percentiles, raw_forecast_realizations, random_ordering=random_ordering, random_seed=random_seed) post_processed_forecast_realizations = ( RebadgePercentilesAsRealizations.process( post_processed_forecast_realizations)) post_processed_forecast_realizations = (enforce_coordinate_ordering( post_processed_forecast_realizations, "realization")) return post_processed_forecast_realizations
def process(self, input_cube): """Convert each point to a truth value based on provided threshold values. The truth value may or may not be fuzzy depending upon if fuzzy_bounds are supplied. If the plugin has a "threshold_units" member, this is used to convert both thresholds and fuzzy bounds into the units of the input cube. Args: input_cube (iris.cube.Cube): Cube to threshold. The code is dimension-agnostic. Returns: iris.cube.Cube: Cube after a threshold has been applied. The data within this cube will contain values between 0 and 1 to indicate whether a given threshold has been exceeded or not. The cube meta-data will contain: * Input_cube name prepended with probability_of_X_above(or below)_threshold (where X is the diagnostic under consideration) * Threshold dimension coordinate with same units as input_cube * Threshold attribute ("greater_than", "greater_than_or_equal_to", "less_than", or less_than_or_equal_to" depending on the operator) * Cube units set to (1). Raises: ValueError: if a np.nan value is detected within the input cube. """ if np.isnan(input_cube.data).any(): raise ValueError("Error: NaN detected in input cube data") if self.threshold_units is not None: self.thresholds = [ self.threshold_units.convert(threshold, input_cube.units) for threshold in self.thresholds ] self.fuzzy_bounds = [ tuple([ self.threshold_units.convert(threshold, input_cube.units) for threshold in bounds ]) for bounds in self.fuzzy_bounds ] self.threshold_coord_name = input_cube.name() thresholded_cubes = iris.cube.CubeList() for threshold, bounds in zip(self.thresholds, self.fuzzy_bounds): cube = input_cube.copy() # if upper and lower bounds are equal, set a deterministic 0/1 # probability based on exceedance of the threshold if bounds[0] == bounds[1]: truth_value = self.comparison_operator["function"](cube.data, threshold) # otherwise, scale exceedance probabilities linearly between 0/1 # at the min/max fuzzy bounds and 0.5 at the threshold value else: truth_value = np.where( cube.data < threshold, rescale( cube.data, data_range=(bounds[0], threshold), scale_range=(0.0, 0.5), clip=True, ), rescale( cube.data, data_range=(threshold, bounds[1]), scale_range=(0.5, 1.0), clip=True, ), ) # if requirement is for probabilities less_than or # less_than_or_equal_to the threshold (rather than # greater_than or greater_than_or_equal_to), invert # the exceedance probability if "less_than" in self.comparison_operator["spp_string"]: truth_value = 1.0 - truth_value truth_value = truth_value.astype(FLOAT_DTYPE) if np.ma.is_masked(cube.data): # update unmasked points only cube.data[~input_cube.data.mask] = truth_value[~input_cube. data.mask] else: cube.data = truth_value self._add_threshold_coord(cube, threshold) for func in self.each_threshold_func: cube = func(cube) thresholded_cubes.append(cube) (cube, ) = thresholded_cubes.merge() self._update_metadata(cube) enforce_coordinate_ordering(cube, ["realization", "percentile"]) return cube
def process(self, cube, weights=None): """Calculate weighted blend across the chosen coord, for either probabilistic or percentile data. If there is a percentile coordinate on the cube, it will blend using the PercentileBlendingAggregator but the percentile coordinate must have at least two points. Args: cube (iris.cube.Cube): Cube to blend across the coord. weights (iris.cube.Cube): Cube of blending weights. This will have 1 or 3 dimensions, corresponding either to blend dimension on the input cube with or without and additional 2 spatial dimensions. If None, the input cube is blended with equal weights across the blending dimension. Returns: iris.cube.Cube: Containing the weighted blend across the chosen coordinate (typically forecast reference time or model). Raises: TypeError : If the first argument not a cube. CoordinateNotFoundError : If coordinate to be collapsed not found in cube. CoordinateNotFoundError : If coordinate to be collapsed not found in provided weights cube. ValueError : If coordinate to be collapsed is not a dimension. """ if not isinstance(cube, iris.cube.Cube): msg = ("The first argument must be an instance of iris.cube.Cube " "but is {}.".format(type(cube))) raise TypeError(msg) if not cube.coords(self.blend_coord): msg = "Coordinate to be collapsed not found in cube." raise CoordinateNotFoundError(msg) output_dims = get_dim_coord_names( next(cube.slices_over(self.blend_coord))) self.blend_coord = find_blend_dim_coord(cube, self.blend_coord) # Ensure input cube and weights cube are ordered equivalently along # blending coordinate. cube = sort_coord_in_cube(cube, self.blend_coord) if weights is not None: if not weights.coords(self.blend_coord): msg = "Coordinate to be collapsed not found in weights cube." raise CoordinateNotFoundError(msg) weights = sort_coord_in_cube(weights, self.blend_coord) # Check that the time coordinate is single valued if required. self.check_compatible_time_points(cube) # Do blending and update metadata if self.check_percentile_coord(cube): enforce_coordinate_ordering(cube, [self.blend_coord, "percentile"]) result = self.percentile_weighted_mean(cube, weights) else: enforce_coordinate_ordering(cube, [self.blend_coord]) result = self.weighted_mean(cube, weights) # Reorder resulting dimensions to match input enforce_coordinate_ordering(result, output_dims) return result
def process(self, temperature, orography, land_sea_mask, model_id_attr=None): """Calculates the lapse rate from the temperature and orography cubes. Args: temperature (iris.cube.Cube): Cube of air temperatures (K). orography (iris.cube.Cube): Cube containing orography data (metres) land_sea_mask (iris.cube.Cube): Cube containing a binary land-sea mask. True for land-points and False for Sea. model_id_attr (str): Name of the attribute used to identify the source model for blending. This is inherited from the input temperature cube. Returns: iris.cube.Cube: Cube containing lapse rate (K m-1) Raises ------ TypeError: If input cubes are not cubes ValueError: If input cubes are the wrong units. """ if not isinstance(temperature, iris.cube.Cube): msg = "Temperature input is not a cube, but {}" raise TypeError(msg.format(type(temperature))) if not isinstance(orography, iris.cube.Cube): msg = "Orography input is not a cube, but {}" raise TypeError(msg.format(type(orography))) if not isinstance(land_sea_mask, iris.cube.Cube): msg = "Land/Sea mask input is not a cube, but {}" raise TypeError(msg.format(type(land_sea_mask))) # Converts cube units. temperature_cube = temperature.copy() temperature_cube.convert_units("K") orography.convert_units("metres") # Extract x/y co-ordinates. x_coord = temperature_cube.coord(axis="x").name() y_coord = temperature_cube.coord(axis="y").name() # Extract orography and land/sea mask data. orography_data = next(orography.slices([y_coord, x_coord])).data land_sea_mask_data = next(land_sea_mask.slices([y_coord, x_coord])).data # Fill sea points with NaN values. orography_data = np.where(land_sea_mask_data, orography_data, np.nan) # Create list of arrays over "realization" coordinate has_realization_dimension = False original_dimension_order = None if temperature_cube.coords("realization", dim_coords=True): original_dimension_order = get_dim_coord_names(temperature_cube) enforce_coordinate_ordering(temperature_cube, "realization") temp_data_slices = temperature_cube.data has_realization_dimension = True else: temp_data_slices = [temperature_cube.data] # Calculate lapse rate for each realization lapse_rate_data = [] for temperature_data in temp_data_slices: lapse_rate_array = self._generate_lapse_rate_array( temperature_data, orography_data, land_sea_mask_data) lapse_rate_data.append(lapse_rate_array) lapse_rate_data = np.array(lapse_rate_data) if not has_realization_dimension: lapse_rate_data = np.squeeze(lapse_rate_data) attributes = generate_mandatory_attributes([temperature], model_id_attr=model_id_attr) lapse_rate_cube = create_new_diagnostic_cube( "air_temperature_lapse_rate", "K m-1", temperature_cube, attributes, data=lapse_rate_data, ) if original_dimension_order: enforce_coordinate_ordering(lapse_rate_cube, original_dimension_order) return lapse_rate_cube
def process( self, neighbour_cube: Cube, diagnostic_cube: Cube, new_title: Optional[str] = None, ) -> Cube: """ Create a spot data cube containing diagnostic data extracted at the coordinates provided by the neighbour cube. .. See the documentation for more details about the inputs and output. .. include:: /extended_documentation/spotdata/spot_extraction/ spot_extraction_examples.rst Args: neighbour_cube: A cube containing information about the spot data sites and their grid point neighbours. diagnostic_cube: A cube of diagnostic data from which spot data is being taken. new_title: New title for spot-extracted data. If None, this attribute is reset to a default value, since it has no prescribed standard and may therefore contain grid information that is no longer correct after spot-extraction. Returns: A cube containing diagnostic data for each spot site, as well as information about the sites themselves. """ # Check we are using a matched neighbour/diagnostic cube pair check_grid_match([neighbour_cube, diagnostic_cube]) # Get the unique_site_id if it is present on the neighbour cbue unique_site_id_data = self.check_for_unique_id(neighbour_cube) if unique_site_id_data: unique_site_id = unique_site_id_data[0] unique_site_id_key = unique_site_id_data[1] else: unique_site_id, unique_site_id_key = None, None # Ensure diagnostic cube is y-x order as neighbour cube expects. enforce_coordinate_ordering( diagnostic_cube, [ diagnostic_cube.coord(axis="y").name(), diagnostic_cube.coord(axis="x").name(), ], anchor_start=False, ) coordinate_cube = self.extract_coordinates(neighbour_cube) x_indices, y_indices = coordinate_cube.data spot_values = diagnostic_cube.data[..., y_indices, x_indices] additional_dims = [] if len(spot_values.shape) > 1: additional_dims = diagnostic_cube.dim_coords[:-2] scalar_coords, nonscalar_coords = self.get_aux_coords( diagnostic_cube, x_indices, y_indices ) spotdata_cube = self.build_diagnostic_cube( neighbour_cube, diagnostic_cube, spot_values, scalar_coords=scalar_coords, auxiliary_coords=nonscalar_coords, additional_dims=additional_dims, unique_site_id=unique_site_id, unique_site_id_key=unique_site_id_key, ) # Copy attributes from the diagnostic cube that describe the data's # provenance spotdata_cube.attributes = diagnostic_cube.attributes spotdata_cube.attributes["model_grid_hash"] = neighbour_cube.attributes[ "model_grid_hash" ] # Remove the unique_site_id coordinate attribute as it is internal # metadata only if unique_site_id is not None: spotdata_cube.coord(unique_site_id_key).attributes.pop(UNIQUE_ID_ATTRIBUTE) # Remove grid attributes and update title for attr in MOSG_GRID_ATTRIBUTES: spotdata_cube.attributes.pop(attr, None) spotdata_cube.attributes["title"] = ( MANDATORY_ATTRIBUTE_DEFAULTS["title"] if new_title is None else new_title ) # Copy cell methods spotdata_cube.cell_methods = diagnostic_cube.cell_methods return spotdata_cube
def process(self, cube: Cube, mask: Optional[Cube] = None) -> CubeList: """ This creates the smoothing_coefficient cubes. It returns one for the x direction and one for the y direction. It uses the DifferenceBetweenAdjacentGridSquares plugin to calculate an average gradient across each grid square. These gradients are then used to calculate "smoothing_coefficient" arrays that are normalised between a user-specified max and min. Args: cube: A 2D field of orography on the grid for which smoothing_coefficients are to be generated. mask: A mask that defines where the smoothing coefficients should be zeroed. The mask must have the same spatial dimensions as the orography cube. How the mask is used to zero smoothing coefficients is determined by the plugin configuration arguments. Returns: - A cube of orography-dependent smoothing_coefficients calculated in the x direction. - A cube of orography-dependent smoothing_coefficients calculated in the y direction. """ if not isinstance(cube, iris.cube.Cube): raise ValueError( "OrographicSmoothingCoefficients() expects cube " "input, got {}".format(type(cube)) ) if len(cube.data.shape) != 2: raise ValueError( "Expected orography on 2D grid, got {} dims".format( len(cube.data.shape) ) ) if mask is not None and ( mask.coords(dim_coords=True) != cube.coords(dim_coords=True) ): raise ValueError( "If a mask is provided it must have the same grid as the " "orography field." ) # Enforce coordinate order for simpler processing. original_order = [crd.name() for crd in cube.coords(dim_coords=True)] target_order = [cube.coord(axis="y").name(), cube.coord(axis="x").name()] enforce_coordinate_ordering(cube, target_order) # Returns two cubes, ordered gradient in x and gradient in y. gradients = GradientBetweenAdjacentGridSquares()(cube) # Calculate unscaled smoothing coefficients. smoothing_coefficients = iris.cube.CubeList() iterator = zip( gradients, ["smoothing_coefficient_x", "smoothing_coefficient_y"] ) for gradient, name in iterator: coefficient_data = self.unnormalised_smoothing_coefficients(gradient) smoothing_coefficients.append( self.create_coefficient_cube( coefficient_data, gradient, name, cube.attributes.copy() ) ) # Scale the smoothing coefficients between provided values. smoothing_coefficients = self.scale_smoothing_coefficients( smoothing_coefficients ) # If a mask has been provided, zero coefficients where required. if mask is not None: enforce_coordinate_ordering(mask, target_order) self.zero_masked(*smoothing_coefficients, mask) for smoothing_coefficient in smoothing_coefficients: enforce_coordinate_ordering(smoothing_coefficient, original_order) return smoothing_coefficients
def load_cube(filepath, constraints=None, no_lazy_load=False, allow_none=False): """Load the filepath provided using Iris into a cube. Args: filepath (str or list): Filepath that will be loaded or list of filepaths that can be merged into a single cube upon loading. constraints (iris.Constraint, str or None): Constraint to be applied when loading from the input filepath. This can be in the form of an iris.Constraint or could be a string that is intended to match the name of the cube. The default is None. no_lazy_load (bool): If True, bypass cube deferred (lazy) loading and load the whole cube into memory. This can increase performance at the cost of memory. If False (default) then lazy load. allow_none (bool): If True, when the filepath is None, returns None. If False, normal error handling applies. Default is False. Returns: cube (iris.cube.Cube): Cube that has been loaded from the input filepath given the constraints provided. """ if filepath is None and allow_none: return None # Remove metadata prefix cube if present constraints = iris.Constraint( cube_func=lambda cube: cube.long_name != 'prefixes') & constraints # Load each file individually to avoid partial merging (not used # iris.load_raw() due to issues with time representation) if isinstance(filepath, str): cubes = iris.load(filepath, constraints=constraints) else: cubes = iris.cube.CubeList([]) for item in filepath: cubes.extend(iris.load(item, constraints=constraints)) # Merge loaded cubes if not cubes: message = "No cubes found using contraints {}".format(constraints) raise ValueError(message) elif len(cubes) == 1: cube = cubes[0] else: cube = merge_cubes(cubes) # Remove metadata prefix cube attributes if 'bald__isPrefixedBy' in cube.attributes.keys(): cube.attributes.pop('bald__isPrefixedBy') # Ensure the probabilistic coordinates are the first coordinates within a # cube and are in the specified order. cube = enforce_coordinate_ordering( cube, ["realization", "percentile", "threshold"]) # Ensure the y and x dimensions are the last dimensions within the cube. y_name = cube.coord(axis="y").name() x_name = cube.coord(axis="x").name() cube = enforce_coordinate_ordering(cube, [y_name, x_name], anchor="end") if no_lazy_load: # Force the cube's data into memory by touching the .data attribute. cube.data return cube
def crps_minimiser_wrapper(self, initial_guess, forecast_predictor, truth, forecast_var, predictor_of_mean_flag, distribution): """ Function to pass a given minimisation function to the scipy minimize function to estimate optimised values for the coefficients. Args: initial_guess (List): List of optimised coefficients. Order of coefficients is [c, d, a, b]. forecast_predictor (iris.cube.Cube): Cube containing the fields to be used as the predictor, either the ensemble mean or the ensemble realizations. truth (iris.cube.Cube): Cube containing the field, which will be used as truth. forecast_var (iris.cube.Cube): Cube containg the field containing the ensemble variance. predictor_of_mean_flag (String): String to specify the input to calculate the calibrated mean. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. distribution (String): String used to access the appropriate minimisation function within self.minimisation_dict. Returns: optimised_coeffs (List): List of optimised coefficients. Order of coefficients is [c, d, a, b]. """ def calculate_percentage_change_in_last_iteration(allvecs): """ Calculate the percentage change that has occurred within the last iteration of the minimisation. If the percentage change between the last iteration and the last-but-one iteration exceeds the threshold, a warning message is printed. Args: allvecs : List List of numpy arrays containing the optimised coefficients, after each iteration. """ last_iteration_percentage_change = np.absolute( (allvecs[-1] - allvecs[-2]) / allvecs[-2]) * 100 if (np.any(last_iteration_percentage_change > self.TOLERATED_PERCENTAGE_CHANGE)): np.set_printoptions(suppress=True) msg = ("\nThe final iteration resulted in a percentage change " "that is greater than the accepted threshold of 5% " "i.e. {}. " "\nA satisfactory minimisation has not been achieved. " "\nLast iteration: {}, " "\nLast-but-one iteration: {}" "\nAbsolute difference: {}\n").format( last_iteration_percentage_change, allvecs[-1], allvecs[-2], np.absolute(allvecs[-2] - allvecs[-1])) warnings.warn(msg) try: minimisation_function = self.minimisation_dict[distribution] except KeyError as err: msg = ("Distribution requested {} is not supported in {}" "Error message is {}".format(distribution, self.minimisation_dict, err)) raise KeyError(msg) # Ensure predictor_of_mean_flag is valid. check_predictor_of_mean_flag(predictor_of_mean_flag) if predictor_of_mean_flag.lower() in ["mean"]: forecast_predictor_data = forecast_predictor.data.flatten() truth_data = truth.data.flatten() forecast_var_data = forecast_var.data.flatten() elif predictor_of_mean_flag.lower() in ["realizations"]: truth_data = truth.data.flatten() forecast_predictor = (enforce_coordinate_ordering( forecast_predictor, "realization")) forecast_predictor_data = convert_cube_data_to_2d( forecast_predictor) forecast_var_data = forecast_var.data.flatten() initial_guess = np.array(initial_guess, dtype=np.float32) forecast_predictor_data = forecast_predictor_data.astype(np.float32) forecast_var_data = forecast_var_data.astype(np.float32) truth_data = truth_data.astype(np.float32) sqrt_pi = np.sqrt(np.pi).astype(np.float32) optimised_coeffs = minimize(minimisation_function, initial_guess, args=(forecast_predictor_data, truth_data, forecast_var_data, sqrt_pi, predictor_of_mean_flag), method="Nelder-Mead", options={ "maxiter": self.MAX_ITERATIONS, "return_all": True }) if not optimised_coeffs.success: msg = ("Minimisation did not result in convergence after " "{} iterations. \n{}".format(self.MAX_ITERATIONS, optimised_coeffs.message)) warnings.warn(msg) calculate_percentage_change_in_last_iteration(optimised_coeffs.allvecs) return optimised_coeffs.x
def _apply_params(self, forecast_predictors, forecast_vars, optimised_coeffs, coeff_names, predictor_of_mean_flag): """ Function to apply EMOS coefficients to all required dates. Args: forecast_predictors (Iris cube): Cube containing the forecast predictor e.g. ensemble mean or ensemble realizations. forecast_vars (Iris cube.): Cube containing the forecast variance e.g. ensemble variance. optimised_coeffs (List): Coefficients for all dates. coeff_names (List): Coefficient names. predictor_of_mean_flag (String): String to specify the input to calculate the calibrated mean. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. Returns: (tuple) : tuple containing: **calibrated_forecast_predictor_all_dates** (CubeList): List of cubes containing the calibrated forecast predictor. **calibrated_forecast_var_all_dates** (CubeList): List of cubes containing the calibrated forecast variance. **calibrated_forecast_coefficients_all_dates** (CubeList): List of cubes containing the coefficients used for calibration. """ calibrated_forecast_predictor_all_dates = iris.cube.CubeList() calibrated_forecast_var_all_dates = iris.cube.CubeList() calibrated_forecast_coefficients_all_dates = iris.cube.CubeList() for forecast_predictor, forecast_var in zip( forecast_predictors.slices_over("time"), forecast_vars.slices_over("time")): date = iris_time_to_datetime( forecast_predictor.coord("time").copy())[0] constr = iris.Constraint(time=date) forecast_predictor_at_date = forecast_predictor.extract(constr) forecast_var_at_date = forecast_var.extract(constr) # If the coefficients are not available for the date, use the # raw ensemble forecast as the calibrated ensemble forecast. if date not in optimised_coeffs.keys(): msg = ("Ensemble calibration not available " "for forecasts with start time of {}. " "Coefficients not available".format( date.strftime("%Y%m%d%H%M"))) warnings.warn(msg) calibrated_forecast_predictor_at_date = ( forecast_predictor_at_date.copy()) calibrated_forecast_var_at_date = forecast_var_at_date.copy() optimised_coeffs[date] = np.full(len(coeff_names), np.nan) coeff_cubes = self._create_coefficient_cube( forecast_predictor_at_date, optimised_coeffs, coeff_names) else: optimised_coeffs_at_date = (optimised_coeffs[date]) # Assigning coefficients to coefficient names. if len(optimised_coeffs_at_date) == len(coeff_names): optimised_coeffs_at_date = dict( zip(coeff_names, optimised_coeffs_at_date)) elif len(optimised_coeffs_at_date) > len(coeff_names): excess_beta = ( optimised_coeffs_at_date[len(coeff_names):].tolist()) optimised_coeffs_at_date = (dict( list(zip(coeff_names, optimised_coeffs_at_date)))) optimised_coeffs_at_date["beta"] = np.array( [optimised_coeffs_at_date["beta"]] + excess_beta) else: msg = ("Number of coefficient names {} with names {} " "is not equal to the number of " "optimised_coeffs_at_date values {} " "with values {} or the number of " "coefficients is not greater than the " "number of coefficient names. Can not continue " "if the number of coefficient names out number " "the number of coefficients".format( len(coeff_names), coeff_names, len(optimised_coeffs_at_date), optimised_coeffs_at_date)) raise ValueError(msg) if predictor_of_mean_flag.lower() in ["mean"]: # Calculate predicted mean = a + b*X, where X is the # raw ensemble mean. In this case, b = beta. beta = [ optimised_coeffs_at_date["a"], optimised_coeffs_at_date["beta"] ] forecast_predictor_flat = ( forecast_predictor_at_date.data.flatten()) new_col = np.ones(forecast_predictor_flat.shape) all_data = np.column_stack( (new_col, forecast_predictor_flat)) predicted_mean = np.dot(all_data, beta) calibrated_forecast_predictor_at_date = ( forecast_predictor_at_date) elif predictor_of_mean_flag.lower() in ["realizations"]: # Calculate predicted mean = a + b*X, where X is the # raw ensemble mean. In this case, b = beta^2. beta = np.concatenate([[optimised_coeffs_at_date["a"]], optimised_coeffs_at_date["beta"]**2 ]) forecast_predictor = (enforce_coordinate_ordering( forecast_predictor, "realization")) forecast_predictor_flat = ( convert_cube_data_to_2d(forecast_predictor_at_date)) forecast_var_flat = forecast_var_at_date.data.flatten() new_col = np.ones(forecast_var_flat.shape) all_data = (np.column_stack( (new_col, forecast_predictor_flat))) predicted_mean = np.dot(all_data, beta) # Calculate mean of ensemble realizations, as only the # calibrated ensemble mean will be returned. calibrated_forecast_predictor_at_date = ( forecast_predictor_at_date.collapsed( "realization", iris.analysis.MEAN)) xlen = len(forecast_predictor_at_date.coord(axis="x").points) ylen = len(forecast_predictor_at_date.coord(axis="y").points) predicted_mean = np.reshape(predicted_mean, (ylen, xlen)) calibrated_forecast_predictor_at_date.data = predicted_mean # Calculating the predicted variance, based on the # raw variance S^2, where predicted variance = c + dS^2, # where c = (gamma)^2 and d = (delta)^2 predicted_var = (optimised_coeffs_at_date["gamma"]**2 + optimised_coeffs_at_date["delta"]**2 * forecast_var_at_date.data) calibrated_forecast_var_at_date = forecast_var_at_date calibrated_forecast_var_at_date.data = predicted_var coeff_cubes = self._create_coefficient_cube( calibrated_forecast_predictor_at_date, optimised_coeffs[date], coeff_names) calibrated_forecast_predictor_all_dates.append( calibrated_forecast_predictor_at_date) calibrated_forecast_var_all_dates.append( calibrated_forecast_var_at_date) calibrated_forecast_coefficients_all_dates.extend(coeff_cubes) return (calibrated_forecast_predictor_all_dates, calibrated_forecast_var_all_dates, calibrated_forecast_coefficients_all_dates)