def test_percentile_length_too_short(self): """ Test that the plugin raises the default ValueError, if the number of percentiles is fewer than the length of the zeroth dimension within the cube. """ cube = self.current_temperature_forecast_cube cube_data = self.cube_data + 2 percentiles = [10, 50] msg = "Unequal lengths" with self.assertRaisesRegexp(ValueError, msg): create_cube_with_percentiles(percentiles, cube, cube_data)
def test_incompatible_percentiles(self): """ Test that the plugin fails if the percentile values requested are not numbers. """ cube = self.current_temperature_forecast_cube percentiles = ["cat", "dog", "elephant"] cube_data = np.zeros( [len(percentiles), len(cube.coord("time").points), len(cube.coord("latitude").points), len(cube.coord("longitude").points)]) msg = "could not convert string to float" with self.assertRaisesRegex(ValueError, msg): create_cube_with_percentiles(percentiles, cube, cube_data)
def test_basic(self): """Test that the plugin returns an Iris.cube.Cube.""" cube = self.current_temperature_forecast_cube cube_data = self.cube_data + 2 percentiles = [10, 50, 90] result = create_cube_with_percentiles(percentiles, cube, cube_data) self.assertIsInstance(result, Cube)
def test_resulting_cube_units(self): """Test that the plugin returns a cube of suitable units.""" cube = self.current_temperature_forecast_cube cube_data = self.cube_data + 2 percentiles = [10, 50, 90] result = create_cube_with_percentiles(percentiles, cube, cube_data) self.assertEqual(result.units, cube.units)
def test_changed_cube_units(self): """Test that the plugin returns a cube with chosen units.""" cube = self.current_temperature_forecast_cube cube_data = self.cube_data + 2 percentiles = [10, 50, 90] result = create_cube_with_percentiles( percentiles, cube, cube_data, cube_unit='1') self.assertEqual(result.units, Unit('1'))
def test_metadata_copy(self): """ Test that the metadata dictionaries within the input cube, are also present on the output cube. """ cube = self.current_temperature_forecast_cube cube.attributes = {"source": "ukv"} cube_data = self.cube_data + 2 percentiles = [10, 50, 90] result = create_cube_with_percentiles(percentiles, cube, cube_data) self.assertDictEqual(cube.metadata._asdict(), result.metadata._asdict())
def test_percentile_points(self): """ Test that the plugin returns an Iris.cube.Cube with a percentile coordinate with the desired points. """ cube = self.current_temperature_forecast_cube cube_data = self.cube_data + 2 percentiles = [10, 50, 90] result = create_cube_with_percentiles(percentiles, cube, cube_data) self.assertIsInstance( result.coord("percentile"), DimCoord) self.assertArrayAlmostEqual( result.coord("percentile").points, percentiles)
def test_many_percentiles(self): """ Test that the plugin returns an Iris.cube.Cube with many percentiles. """ cube = self.current_temperature_forecast_cube percentiles = np.linspace(0, 100, 100) cube_data = np.zeros( [len(percentiles), len(cube.coord("time").points), len(cube.coord("latitude").points), len(cube.coord("longitude").points)]) result = create_cube_with_percentiles( percentiles, cube, cube_data) self.assertEqual(cube_data.shape, result.data.shape)
def test_coordinate_copy(self): """ Test that the coordinates within the input cube, are also present on the output cube. """ cube = self.current_temperature_forecast_cube cube.attributes = {"source": "ukv"} cube_data = self.cube_data + 2 percentiles = [10, 50, 90] result = create_cube_with_percentiles(percentiles, cube, cube_data) for coord in cube.coords(): if coord not in result.coords(): msg = ("Coordinate: {} not found in cube {}".format( coord, result)) raise CoordinateNotFoundError(msg)
def test_spot_forecasts_percentile_points(self): """ Test that the plugin returns a Cube with a percentile dimension coordinate and that the percentile dimension has the expected points for an input spot forecast. """ cube = self.current_temperature_spot_forecast_cube cube_data = self.cube_spot_data + 2 percentiles = [10, 50, 90] result = create_cube_with_percentiles(percentiles, cube, cube_data) self.assertIsInstance(result, Cube) self.assertIsInstance(result.coord("percentile_over_realization"), DimCoord) self.assertArrayAlmostEqual( result.coord("percentile_over_realization").points, percentiles)
def _mean_and_variance_to_percentiles(calibrated_forecast_predictor, calibrated_forecast_variance, percentiles): """ Function returning percentiles based on the supplied mean and variance. The percentiles are created by assuming a Gaussian distribution and calculating the value of the phenomenon at specific points within the distribution. Args: calibrated_forecast_predictor (cube): Predictor for the calibrated forecast i.e. the mean. calibrated_forecast_variance (cube): Variance for the calibrated forecast. percentiles (List): Percentiles at which to calculate the value of the phenomenon at. Returns: percentile_cube (Iris cube): Cube containing the values for the phenomenon at each of the percentiles requested. """ calibrated_forecast_predictor = (enforce_coordinate_ordering( calibrated_forecast_predictor, "realization")) calibrated_forecast_variance = (enforce_coordinate_ordering( calibrated_forecast_variance, "realization")) calibrated_forecast_predictor_data = ( calibrated_forecast_predictor.data.flatten()) calibrated_forecast_variance_data = ( calibrated_forecast_variance.data.flatten()) # Convert percentiles into fractions. percentiles = np.array([x / 100.0 for x in percentiles], dtype=np.float32) result = np.zeros( (len(percentiles), calibrated_forecast_predictor_data.shape[0]), dtype=np.float32) # Loop over percentiles, and use a normal distribution with the mean # and variance to calculate the values at each percentile. for index, percentile in enumerate(percentiles): percentile_list = np.repeat( percentile, len(calibrated_forecast_predictor_data)) result[index, :] = norm.ppf( percentile_list, loc=calibrated_forecast_predictor_data, scale=np.sqrt(calibrated_forecast_variance_data)) # If percent point function (PPF) returns NaNs, fill in # mean instead of NaN values. NaN will only be generated if the # variance is zero. Therefore, if the variance is zero, the mean # value is used for all gridpoints with a NaN. if np.any(calibrated_forecast_variance_data == 0): nan_index = np.argwhere(np.isnan(result[index, :])) result[index, nan_index] = ( calibrated_forecast_predictor_data[nan_index]) if np.any(np.isnan(result)): msg = ("NaNs are present within the result for the {} " "percentile. Unable to calculate the percent point " "function.") raise ValueError(msg) # Convert percentiles back into percentages. percentiles = [x * 100.0 for x in percentiles] # Reshape forecast_at_percentiles, so the percentiles dimension is # first, and any other dimension coordinates follow. result = (restore_non_probabilistic_dimensions( result, calibrated_forecast_predictor, "realization", len(percentiles))) for template_cube in calibrated_forecast_predictor.slices_over( "realization"): template_cube.remove_coord("realization") break percentile_cube = create_cube_with_percentiles(percentiles, template_cube, result) # Remove cell methods aimed at removing cell methods associated with # finding the ensemble mean, which are no longer relevant. percentile_cube.cell_methods = {} return percentile_cube
def _probabilities_to_percentiles(self, forecast_probabilities, percentiles, bounds_pairing): """ Conversion of probabilities to percentiles through the construction of an cumulative distribution function. This is effectively constructed by linear interpolation from the probabilities associated with each threshold to a set of percentiles. Args: forecast_probabilities (Iris cube): Cube with a threshold coordinate. percentiles (Numpy array): Array of percentiles, at which the corresponding values will be calculated. bounds_pairing (Tuple): Lower and upper bound to be used as the ends of the cumulative distribution function. Returns: percentile_cube (Iris cube): Cube containing values for the required diagnostic e.g. air_temperature at the required percentiles. """ threshold_coord = forecast_probabilities.coord("threshold") threshold_unit = forecast_probabilities.coord("threshold").units threshold_points = threshold_coord.points # Ensure that the percentile dimension is first, so that the # conversion to a 2d array produces data in the desired order. forecast_probabilities = (enforce_coordinate_ordering( forecast_probabilities, threshold_coord.name())) prob_slices = convert_cube_data_to_2d(forecast_probabilities, coord=threshold_coord.name()) # The requirement below for a monotonically changing probability # across thresholds can be thwarted by precision errors of order 1E-10, # as such, here we round to a precision of 9 decimal places. prob_slices = np.around(prob_slices, 9) # Invert probabilities for data thresholded above thresholds. relation = forecast_probabilities.attributes['relative_to_threshold'] if relation == 'above': probabilities_for_cdf = 1 - prob_slices elif relation == 'below': probabilities_for_cdf = prob_slices else: msg = ("Probabilities to percentiles only implemented for " "thresholds above or below a given value." "The relation to threshold is given as {}".format(relation)) raise NotImplementedError(msg) threshold_points, probabilities_for_cdf = ( self._add_bounds_to_thresholds_and_probabilities( threshold_points, probabilities_for_cdf, bounds_pairing)) if np.any(np.diff(probabilities_for_cdf) < 0): msg = ("The probability values used to construct the " "Cumulative Distribution Function (CDF) " "must be ascending i.e. in order to yield " "a monotonically increasing CDF." "The probabilities are {}".format(probabilities_for_cdf)) warnings.warn(msg) # Convert percentiles into fractions. percentiles = np.array([x / 100.0 for x in percentiles], dtype=np.float32) forecast_at_percentiles = (np.empty( (len(percentiles), probabilities_for_cdf.shape[0]), dtype=np.float32)) for index in range(probabilities_for_cdf.shape[0]): forecast_at_percentiles[:, index] = np.interp( percentiles, probabilities_for_cdf[index, :], threshold_points) # Convert percentiles back into percentages. percentiles = np.array([x * 100.0 for x in percentiles], dtype=np.float32) # Reshape forecast_at_percentiles, so the percentiles dimension is # first, and any other dimension coordinates follow. forecast_at_percentiles = (restore_non_probabilistic_dimensions( forecast_at_percentiles, forecast_probabilities, threshold_coord.name(), len(percentiles))) for template_cube in forecast_probabilities.slices_over( threshold_coord.name()): template_cube.rename(template_cube.name().replace( "probability_of_", "")) template_cube.remove_coord(threshold_coord.name()) template_cube.attributes.pop('relative_to_threshold') break percentile_cube = create_cube_with_percentiles( percentiles, template_cube, forecast_at_percentiles, custom_name='percentile', cube_unit=threshold_unit) return percentile_cube
def _interpolate_percentiles(self, forecast_at_percentiles, desired_percentiles, bounds_pairing, percentile_coord): """ Interpolation of forecast for a set of percentiles from an initial set of percentiles to a new set of percentiles. This is constructed by linearly interpolating between the original set of percentiles to a new set of percentiles. Args: forecast_at_percentiles (Iris CubeList or Iris Cube): Cube or CubeList expected to contain a percentile coordinate. desired_percentiles (Numpy array): Array of the desired percentiles. bounds_pairing (Tuple): Lower and upper bound to be used as the ends of the cumulative distribution function. percentile_coord (String): Name of required percentile coordinate. Returns: percentile_cube (iris cube.Cube): Cube containing values for the required diagnostic e.g. air_temperature at the required percentiles. """ original_percentiles = ( forecast_at_percentiles.coord(percentile_coord).points) # Ensure that the percentile dimension is first, so that the # conversion to a 2d array produces data in the desired order. forecast_at_percentiles = (enforce_coordinate_ordering( forecast_at_percentiles, percentile_coord)) forecast_at_reshaped_percentiles = convert_cube_data_to_2d( forecast_at_percentiles, coord=percentile_coord) original_percentiles, forecast_at_reshaped_percentiles = ( self._add_bounds_to_percentiles_and_forecast_at_percentiles( original_percentiles, forecast_at_reshaped_percentiles, bounds_pairing)) forecast_at_interpolated_percentiles = (np.empty( (len(desired_percentiles), forecast_at_reshaped_percentiles.shape[0]), dtype=np.float32)) for index in range(forecast_at_reshaped_percentiles.shape[0]): forecast_at_interpolated_percentiles[:, index] = np.interp( desired_percentiles, original_percentiles, forecast_at_reshaped_percentiles[index, :]) # Reshape forecast_at_percentiles, so the percentiles dimension is # first, and any other dimension coordinates follow. forecast_at_percentiles_data = (restore_non_probabilistic_dimensions( forecast_at_interpolated_percentiles, forecast_at_percentiles, percentile_coord, len(desired_percentiles))) for template_cube in forecast_at_percentiles.slices_over( percentile_coord): template_cube.remove_coord(percentile_coord) break percentile_cube = create_cube_with_percentiles( desired_percentiles, template_cube, forecast_at_percentiles_data, custom_name=percentile_coord) return percentile_cube
def _interpolate_percentiles( self, forecast_at_percentiles, desired_percentiles, bounds_pairing): """ Interpolation of forecast for a set of percentiles from an initial set of percentiles to a new set of percentiles. This is constructed by linearly interpolating between the original set of percentiles to a new set of percentiles. Parameters ---------- forecast_at_percentiles : Iris CubeList or Iris Cube Cube or CubeList expected to contain a percentile coordinate. desired_percentiles : Numpy array Array of the desired percentiles. bounds_pairing : Tuple Lower and upper bound to be used as the ends of the cumulative distribution function. Returns ------- percentile_cube : Iris cube Cube containing values for the required diagnostic e.g. air_temperature at the required percentiles. """ original_percentiles = ( forecast_at_percentiles.coord( "percentile_over_realization").points) # Ensure that the percentile dimension is first, so that the # conversion to a 2d array produces data in the desired order. forecast_at_percentiles = ( ensure_dimension_is_the_zeroth_dimension( forecast_at_percentiles, "percentile_over_realization")) forecast_at_reshaped_percentiles = convert_cube_data_to_2d( forecast_at_percentiles, coord="percentile_over_realization") original_percentiles, forecast_at_reshaped_percentiles = ( self._add_bounds_to_percentiles_and_forecast_at_percentiles( original_percentiles, forecast_at_reshaped_percentiles, bounds_pairing)) forecast_at_interpolated_percentiles = ( np.empty( (len(desired_percentiles), forecast_at_reshaped_percentiles.shape[0]))) for index in range(forecast_at_reshaped_percentiles.shape[0]): forecast_at_interpolated_percentiles[:, index] = np.interp( desired_percentiles, original_percentiles, forecast_at_reshaped_percentiles[index, :]) # Reshape forecast_at_percentiles, so the percentiles dimension is # first, and any other dimension coordinates follow. forecast_at_percentiles_data = ( restore_non_probabilistic_dimensions( forecast_at_interpolated_percentiles, forecast_at_percentiles, "percentile_over_realization", len(desired_percentiles))) for template_cube in forecast_at_percentiles.slices_over( "percentile_over_realization"): template_cube.remove_coord("percentile_over_realization") break percentile_cube = create_cube_with_percentiles( desired_percentiles, template_cube, forecast_at_percentiles_data) return percentile_cube
def _probabilities_to_percentiles(self, forecast_probabilities, percentiles, bounds_pairing): """ Conversion of probabilities to percentiles through the construction of an cumulative distribution function. This is effectively constructed by linear interpolation from the probabilities associated with each threshold to a set of percentiles. Parameters ---------- forecast_probabilities : Iris cube Cube with a threshold coordinate. percentiles : Numpy array Array of percentiles, at which the corresponding values will be calculated. bounds_pairing : Tuple Lower and upper bound to be used as the ends of the cumulative distribution function. Returns ------- percentile_cube : Iris cube Cube containing values for the required diagnostic e.g. air_temperature at the required percentiles. """ threshold_coord = forecast_probabilities.coord("threshold") threshold_points = threshold_coord.points # Ensure that the percentile dimension is first, so that the # conversion to a 2d array produces data in the desired order. forecast_probabilities = (ensure_dimension_is_the_zeroth_dimension( forecast_probabilities, threshold_coord.name())) prob_slices = convert_cube_data_to_2d(forecast_probabilities, coord=threshold_coord.name()) # Invert probabilities probabilities_for_cdf = 1 - prob_slices threshold_points, probabilities_for_cdf = ( self._add_bounds_to_thresholds_and_probabilities( threshold_points, probabilities_for_cdf, bounds_pairing)) if np.any(np.diff(probabilities_for_cdf) < 0): msg = ("The probability values used to construct the " "Cumulative Distribution Function (CDF) " "must be ascending i.e. in order to yield " "a monotonically increasing CDF." "The probabilities are {}".format(probabilities_for_cdf)) raise ValueError(msg) # Convert percentiles into fractions. percentiles = [x / 100.0 for x in percentiles] forecast_at_percentiles = (np.empty( (len(percentiles), probabilities_for_cdf.shape[0]))) for index in range(probabilities_for_cdf.shape[0]): forecast_at_percentiles[:, index] = np.interp( percentiles, probabilities_for_cdf[index, :], threshold_points) # Convert percentiles back into percentages. percentiles = [x * 100.0 for x in percentiles] # Reshape forecast_at_percentiles, so the percentiles dimension is # first, and any other dimension coordinates follow. forecast_at_percentiles = (restore_non_probabilistic_dimensions( forecast_at_percentiles, forecast_probabilities, threshold_coord.name(), len(percentiles))) for template_cube in forecast_probabilities.slices_over( threshold_coord.name()): template_cube.remove_coord(threshold_coord.name()) break percentile_cube = create_cube_with_percentiles( percentiles, template_cube, forecast_at_percentiles) return percentile_cube