def test_unknown_sampling_option(self): """ Test that the plugin returns the expected error message, if an unknown sampling option is selected. """ no_of_percentiles = 3 msg = "The unknown sampling option is not yet implemented" with self.assertRaisesRegex(ValueError, msg): choose_set_of_percentiles(no_of_percentiles, sampling="unknown")
def process(self, calibrated_forecast_predictor_and_variance, no_of_percentiles): """ Generate ensemble percentiles from the mean and variance. Args: calibrated_forecast_predictor_and_variance (Iris CubeList): CubeList containing the calibrated forecast predictor and calibrated forecast variance. raw_forecast (Iris Cube or CubeList): Cube or CubeList that is expected to be the raw (uncalibrated) forecast. Returns: calibrated_forecast_percentiles (iris.cube.Cube): Cube for calibrated percentiles. The percentile coordinate is always the zeroth dimension. """ (calibrated_forecast_predictor, calibrated_forecast_variance) = ( calibrated_forecast_predictor_and_variance) if isinstance(calibrated_forecast_predictor, iris.cube.CubeList): calibrated_forecast_predictor = ( calibrated_forecast_predictor.merge_cube()) if isinstance(calibrated_forecast_variance, iris.cube.CubeList): calibrated_forecast_variance = ( calibrated_forecast_variance.merge_cube()) percentiles = choose_set_of_percentiles(no_of_percentiles) calibrated_forecast_percentiles = ( self._mean_and_variance_to_percentiles( calibrated_forecast_predictor, calibrated_forecast_variance, percentiles)) return calibrated_forecast_percentiles
def test_basic(self): """ Test that the plugin returns a list with the expected number of percentiles. """ no_of_percentiles = 3 result = choose_set_of_percentiles(no_of_percentiles) self.assertIsInstance(result, list) self.assertEqual(len(result), no_of_percentiles)
def test_data(self): """ Test that the plugin returns a list with the expected data values for the percentiles. """ data = np.array([25, 50, 75]) no_of_percentiles = 3 result = choose_set_of_percentiles(no_of_percentiles) self.assertArrayAlmostEqual(result, data)
def process(self, forecast_at_percentiles, no_of_percentiles=None, sampling="quantile"): """ 1. Concatenates cubes with a percentile coordinate. 2. Creates a list of percentiles. 3. Accesses the lower and upper bound pair of the forecast values, in order to specify lower and upper bounds for the percentiles. 4. Interpolate the percentile coordinate into an alternative set of percentiles using linear interpolation. Args: forecast_at_percentiles (Iris CubeList or Iris Cube): Cube or CubeList expected to contain a percentile coordinate. no_of_percentiles (Integer or None): Number of percentiles If None, the number of percentiles within the input forecast_at_percentiles cube is used as the number of percentiles. sampling (String): Type of sampling of the distribution to produce a set of percentiles e.g. quantile or random. Accepted options for sampling are: * Quantile: A regular set of equally-spaced percentiles aimed at dividing a Cumulative Distribution Function into blocks of equal probability. * Random: A random set of ordered percentiles. Returns: forecast_at_percentiles (iris.cube.Cube): Cube with forecast values at the desired set of percentiles. The percentile coordinate is always the zeroth dimension. """ forecast_at_percentiles = concatenate_cubes(forecast_at_percentiles) percentile_coord = ( find_percentile_coordinate(forecast_at_percentiles).name()) if no_of_percentiles is None: no_of_percentiles = ( len(forecast_at_percentiles.coord( percentile_coord).points)) percentiles = choose_set_of_percentiles( no_of_percentiles, sampling=sampling) cube_units = forecast_at_percentiles.units bounds_pairing = ( get_bounds_of_distribution( forecast_at_percentiles.name(), cube_units)) forecast_at_percentiles = self._interpolate_percentiles( forecast_at_percentiles, percentiles, bounds_pairing, percentile_coord) return forecast_at_percentiles
def process(self, forecast_probabilities, no_of_percentiles=None, sampling="quantile"): """ 1. Concatenates cubes with a threshold coordinate. 2. Creates a list of percentiles. 3. Accesses the lower and upper bound pair to find the ends of the cumulative distribution function. 4. Convert the threshold coordinate into values at a set of percentiles using linear interpolation, see Figure 1 from Flowerdew, 2014. Parameters ---------- forecast_probabilities : Iris CubeList or Iris Cube Cube or CubeList expected to contain a threshold coordinate. no_of_percentiles : Integer or None Number of percentiles If None, the number of thresholds within the input forecast_probabilities cube is used as the number of percentiles. sampling : String Type of sampling of the distribution to produce a set of percentiles e.g. quantile or random. Accepted options for sampling are: Quantile: A regular set of equally-spaced percentiles aimed at dividing a Cumulative Distribution Function into blocks of equal probability. Random: A random set of ordered percentiles. Returns ------- forecast_at_percentiles : Iris cube Cube with forecast values at the desired set of percentiles. The threshold coordinate is always the zeroth dimension. """ forecast_probabilities = concatenate_cubes(forecast_probabilities) threshold_coord = forecast_probabilities.coord("threshold") phenom_name = (forecast_probabilities.name().replace( "probability_of_", "")) if no_of_percentiles is None: no_of_percentiles = (len( forecast_probabilities.coord(threshold_coord.name()).points)) percentiles = choose_set_of_percentiles(no_of_percentiles, sampling=sampling) cube_units = (forecast_probabilities.coord( threshold_coord.name()).units) bounds_pairing = (get_bounds_of_distribution(phenom_name, cube_units)) forecast_at_percentiles = self._probabilities_to_percentiles( forecast_probabilities, percentiles, bounds_pairing) return forecast_at_percentiles
def test_random(self): """ Test that the plugin returns a list with the expected number of percentiles, if the random sampling option is selected. """ no_of_percentiles = 3 result = choose_set_of_percentiles( no_of_percentiles, sampling="random") self.assertIsInstance(result, list) self.assertEqual(len(result), no_of_percentiles)
def process(self, calibrated_forecast_predictor, calibrated_forecast_variance, no_of_percentiles=None, percentiles=None): """ Generate ensemble percentiles from the mean and variance. Args: calibrated_forecast_predictor (iris.cube.Cube): Cube containing the calibrated forecast predictor. calibrated_forecast_variance (iris.cube.Cube): CubeList containing the calibrated forecast variance. Kwargs: no_of_percentiles (int): Integer defining the number of percentiles that will be calculated from the mean and variance. percentiles (list): List of percentiles that will be generated from the mean and variance provided. Returns: calibrated_forecast_percentiles (iris.cube.Cube): Cube for calibrated percentiles. The percentile coordinate is always the zeroth dimension. Raises: ValueError: Ensure that it is not possible to supply "no_of_percentiles" and "percentiles" simultaneously as keyword arguments. """ if no_of_percentiles and percentiles: msg = ("Please specify either the number of percentiles or " "provide a list of percentiles. The number of percentiles " "provided was {} and the list of percentiles " "provided was {}".format(no_of_percentiles, percentiles)) raise ValueError(msg) if no_of_percentiles: percentiles = choose_set_of_percentiles(no_of_percentiles) calibrated_forecast_percentiles = ( self._mean_and_variance_to_percentiles( calibrated_forecast_predictor, calibrated_forecast_variance, percentiles)) return calibrated_forecast_percentiles
def process(self, calibrated_forecast_predictor_and_variance, no_of_percentiles): """ Generate ensemble percentiles from the mean and variance. Parameters ---------- calibrated_forecast_predictor_and_variance : Iris CubeList CubeList containing the calibrated forecast predictor and calibrated forecast variance. raw_forecast : Iris Cube or CubeList Cube or CubeList that is expected to be the raw (uncalibrated) forecast. Returns ------- calibrated_forecast_percentiles : Iris cube Cube for calibrated percentiles. The percentile coordinate is always the zeroth dimension. """ (calibrated_forecast_predictor, calibrated_forecast_variance) = ( calibrated_forecast_predictor_and_variance) calibrated_forecast_predictor = concatenate_cubes( calibrated_forecast_predictor) calibrated_forecast_variance = concatenate_cubes( calibrated_forecast_variance) percentiles = choose_set_of_percentiles(no_of_percentiles) calibrated_forecast_percentiles = ( self._mean_and_variance_to_percentiles( calibrated_forecast_predictor, calibrated_forecast_variance, percentiles)) return calibrated_forecast_percentiles
def process(self, forecast_probabilities, no_of_percentiles=None, percentiles=None, sampling="quantile"): """ 1. Concatenates cubes with a threshold coordinate. 2. Creates a list of percentiles. 3. Accesses the lower and upper bound pair to find the ends of the cumulative distribution function. 4. Convert the threshold coordinate into values at a set of percentiles using linear interpolation, see Figure 1 from Flowerdew, 2014. Args: forecast_probabilities (Iris CubeList or Iris Cube): Cube or CubeList expected to contain a threshold coordinate. no_of_percentiles (Integer or None): Number of percentiles. If None and percentiles is not set, the number of thresholds within the input forecast_probabilities cube is used as the number of percentiles. This argument is mutually exclusive with percentiles. percentiles (list of floats): The desired percentile values in the interval [0, 100]. This argument is mutually exclusive with no_of_percentiles. sampling (String): Type of sampling of the distribution to produce a set of percentiles e.g. quantile or random. Accepted options for sampling are: * Quantile: A regular set of equally-spaced percentiles aimed at dividing a Cumulative Distribution Function into blocks of equal probability. * Random: A random set of ordered percentiles. Returns: forecast_at_percentiles (Iris cube): Cube with forecast values at the desired set of percentiles. The threshold coordinate is always the zeroth dimension. """ if no_of_percentiles is not None and percentiles is not None: raise ValueError( "Cannot specify both no_of_percentiles and percentiles to " "GeneratePercentilesFromProbabilities") forecast_probabilities = concatenate_cubes( forecast_probabilities, coords_to_slice_over="threshold", coordinates_for_association=[]) threshold_coord = forecast_probabilities.coord("threshold") phenom_name = (forecast_probabilities.name().replace( "probability_of_", "")) if no_of_percentiles is None: no_of_percentiles = (len( forecast_probabilities.coord(threshold_coord.name()).points)) if percentiles is None: percentiles = choose_set_of_percentiles(no_of_percentiles, sampling=sampling) elif not isinstance(percentiles, (tuple, list)): percentiles = [percentiles] percentiles = np.array(percentiles, dtype=np.float32) cube_units = (forecast_probabilities.coord( threshold_coord.name()).units) bounds_pairing = (get_bounds_of_distribution(phenom_name, cube_units)) # If a cube still has multiple realizations, slice over these to reduce # the memory requirements into manageable chunks. try: slices_over_realization = forecast_probabilities.slices_over( "realization") except CoordinateNotFoundError: slices_over_realization = [forecast_probabilities] cubelist = iris.cube.CubeList([]) for cube_realization in slices_over_realization: cubelist.append( self._probabilities_to_percentiles(cube_realization, percentiles, bounds_pairing)) forecast_at_percentiles = cubelist.merge_cube() return forecast_at_percentiles
def process(cube, coordinates=None, ecc_bounds_warning=False, percentiles=None, no_of_percentiles=None): r"""Collapses cube coordinates and calculate percentiled data. Calculate percentiled data over a given coordinate by collapsing that coordinate. Typically used to convert realization data into percentiled data, but may calculate over any dimension coordinate. Alternatively calling this with a dataset containing probabilities will convert those to percentiles using the ensemble coupla coupling plugin. If no particular percentiles are given at which to calculate values and no 'number of percentiles' to calculate are specified, the following defaults will be used. '[0, 5, 10, 20, 25, 30, 40, 50, 60, 70, 75, 80, 90, 95, 100]' Args: cube (iris.cube.Cube): A Cube for processing. coordinates (str or list): Coordinate or coordinates over which to collapse data and calculate percentiles; e.g. 'realization' or 'latitude longitude'. This argument must be provided when collapsing a coordinate or coordinates to create percentiles, but is redundant when converting probabilities to percentiles and may be omitted. This coordinate(s) will be removed and replaced by a percentile coordinate. Default is None. ecc_bounds_warning (bool): If True, where calculated percentiles are outside the ECC bounds range, raises a warning rather than an exception. Default is False. percentiles (list or None): Optional definition of percentiles at which to calculate data. Default is None. no_of_percentiles (int): Optional definition of the number of percentiles to be generated, these distributed regularly with the aim of dividing into blocks of equal probability. Default is None. Returns: result (iris.cube.Cube): The processed Cube. Raises: ValueError: If the cube name does not contain 'probability_of\_' and coordinates isn't used. Warns: Warning: If 'probability_of\_' is in the cube name and coordinates is used. """ if no_of_percentiles is not None: percentiles = choose_set_of_percentiles(no_of_percentiles, sampling="quantile") # TODO: Correct when formal cf-standards exists if 'probability_of_' in cube.name(): result = GeneratePercentilesFromProbabilities( ecc_bounds_warning=ecc_bounds_warning).process( cube, percentiles=percentiles) if coordinates: warnings.warn("Converting probabilities to percentiles. The " "provided COORDINATES_TO_COLLAPSE variable will " "not be used.") else: if not coordinates: raise ValueError("To collapse a coordinate to calculate " "percentiles, a coordinate or list of " "coordinates must be provided.") # Switch back to use the slow scipy method if the cube contains masked # data which the numpy method cannot handle. fast_percentile_method = True if np.ma.is_masked(cube.data): # Check for masked points: fast_percentile_method = False elif np.ma.isMaskedArray(cube.data): # Check if we have a masked array with an empty mask. If so, # replace it with a non-masked array: cube.data = cube.data.data result = PercentileConverter( coordinates, percentiles=percentiles, fast_percentile_method=fast_percentile_method).process(cube) return result
def main(argv=None): """Load in arguments and get going.""" parser = ArgParser( description="Calculate percentiled data over a given coordinate by " "collapsing that coordinate. Typically used to convert realization " "data into percentiled data, but may calculate over any " "dimension coordinate. Alternatively, calling this CLI with a dataset" " containing probabilities will convert those to percentiles using " "the ensemble copula coupling plugin. If no particular percentiles " "are given at which to calculate values and no 'number of percentiles'" " to calculate are specified, the following defaults will be used: " "[0, 5, 10, 20, 25, 30, 40, 50, 60, 70, 75, 80, 90, 95, 100]") parser.add_argument("input_filepath", metavar="INPUT_FILE", help="A path to an input NetCDF file to be processed") parser.add_argument("output_filepath", metavar="OUTPUT_FILE", help="The output path for the processed NetCDF") parser.add_argument("--coordinates", metavar="COORDINATES_TO_COLLAPSE", nargs="+", help="Coordinate or coordinates over which to collapse" " data and calculate percentiles; e.g. " "'realization' or 'latitude longitude'. This argument " "must be provided when collapsing a coordinate or " "coordinates to create percentiles, but is redundant " "when converting probabilities to percentiles and may " "be omitted. This coordinate(s) will be removed " "and replaced by a percentile coordinate.") parser.add_argument('--ecc_bounds_warning', default=False, action='store_true', help='If True, where calculated percentiles are ' 'outside the ECC bounds range, raise a warning ' 'rather than an exception.') group = parser.add_mutually_exclusive_group(required=False) group.add_argument("--percentiles", metavar="PERCENTILES", nargs="+", default=None, type=float, help="Optional definition of percentiles at which to " "calculate data, e.g. --percentiles 0 33.3 66.6 100") group.add_argument('--no-of-percentiles', default=None, type=int, metavar='NUMBER_OF_PERCENTILES', help="Optional definition of the number of percentiles " "to be generated, these distributed regularly with the " "aim of dividing into blocks of equal probability.") args = parser.parse_args(args=argv) cube = load_cube(args.input_filepath) percentiles = args.percentiles if args.no_of_percentiles is not None: percentiles = choose_set_of_percentiles(args.no_of_percentiles, sampling="quantile") # TODO: Correct when formal cf-standards exists if 'probability_of_' in cube.name(): if args.coordinates: warnings.warn("Converting probabilities to percentiles. The " "provided COORDINATES_TO_COLLAPSE variable will " "not be used.") result = GeneratePercentilesFromProbabilities( ecc_bounds_warning=args.ecc_bounds_warning).process( cube, percentiles=percentiles) else: if not args.coordinates: raise ValueError("To collapse a coordinate to calculate " "percentiles, a coordinate or list of " "coordinates must be provided.") # Switch back to use the slow scipy method if the cube contains masked # data which the numpy method cannot handle. fast_percentile_method = True if np.ma.is_masked(cube.data): # Check for masked points: fast_percentile_method = False elif np.ma.isMaskedArray(cube.data): # Check if we have a masked array with an empty mask. If so, # replace it with a non-masked array: cube.data = cube.data.data result = PercentileConverter( args.coordinates, percentiles=percentiles, fast_percentile_method=fast_percentile_method).process(cube) save_netcdf(result, args.output_filepath)