def process(self, current_forecast, historic_forecast, truth): """ Performs ensemble calibration through the following steps: 1. Estimate optimised coefficients from training period. 2. Apply optimised coefficients to current forecast. Parameters ---------- current_forecast : Iris Cube or CubeList The Cube or CubeList that provides the input forecast for the current cycle. historic_forecast : Iris Cube or CubeList The Cube or CubeList that provides the input historic forecasts for calibration. truth : Iris Cube or CubeList The Cube or CubeList that provides the input truth for calibration with dates matching the historic forecasts. Returns ------- * calibrated_forecast_predictor_and_variance : CubeList CubeList containing the calibrated forecast predictor and calibrated forecast variance. """ def format_calibration_method(calibration_method): """Lowercase input string, and replace underscores with spaces.""" return calibration_method.lower().replace("_", " ") # Ensure predictor_of_mean_flag is valid. check_predictor_of_mean_flag(self.predictor_of_mean_flag) if (format_calibration_method(self.calibration_method) in [ "ensemble model output statistics", "nonhomogeneous gaussian regression" ]): if (format_calibration_method(self.distribution) in ["gaussian", "truncated gaussian"]): ec = EstimateCoefficientsForEnsembleCalibration( self.distribution, self.desired_units, predictor_of_mean_flag=self.predictor_of_mean_flag) optimised_coeffs, coeff_names = ( ec.estimate_coefficients_for_ngr(current_forecast, historic_forecast, truth)) else: msg = ("Other calibration methods are not available. " "{} is not available".format( format_calibration_method(self.calibration_method))) raise ValueError(msg) ac = ApplyCoefficientsFromEnsembleCalibration( current_forecast, optimised_coeffs, coeff_names, predictor_of_mean_flag=self.predictor_of_mean_flag) (calibrated_forecast_predictor, calibrated_forecast_variance, calibrated_forecast_coefficients) = ac.apply_params_entry() calibrated_forecast_predictor_and_variance = iris.cube.CubeList( [calibrated_forecast_predictor, calibrated_forecast_variance]) return calibrated_forecast_predictor_and_variance
def __init__(self, distribution, current_cycle, desired_units=None, predictor_of_mean_flag="mean"): """ Create an ensemble calibration plugin that, for Nonhomogeneous Gaussian Regression, calculates coefficients based on historical forecasts and applies the coefficients to the current forecast. Args: distribution (str): Name of distribution. Assume that the current forecast can be represented using this distribution. current_cycle (str): The current cycle in YYYYMMDDTHHMMZ format e.g. 20171122T0100Z. This is used to create a forecast_reference_time coordinate on the resulting EMOS coefficients cube. Kwargs: desired_units (str or cf_units.Unit): The unit that you would like the calibration to be undertaken in. The current forecast, historical forecast and truth will be converted as required. predictor_of_mean_flag (str): String to specify the input to calculate the calibrated mean. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. """ self.distribution = distribution self.current_cycle = current_cycle self.desired_units = desired_units # Ensure predictor_of_mean_flag is valid. check_predictor_of_mean_flag(predictor_of_mean_flag) self.predictor_of_mean_flag = predictor_of_mean_flag self.minimiser = ContinuousRankedProbabilityScoreMinimisers() # Setting default values for coeff_names. Beta is the final # coefficient name in the list, as there can potentially be # multiple beta coefficients if the ensemble realizations, rather # than the ensemble mean, are provided as the predictor. self.coeff_names = ["gamma", "delta", "alpha", "beta"] import imp try: imp.find_module('statsmodels') except ImportError: statsmodels_found = False if predictor_of_mean_flag.lower() == "realizations": msg = ( "The statsmodels can not be imported. " "Will not be able to calculate an initial guess from " "the individual ensemble realizations. " "A default initial guess will be used without " "estimating coefficients from a linear model.") warnings.warn(msg, ImportWarning) else: statsmodels_found = True import statsmodels.api as sm self.sm = sm self.statsmodels_found = statsmodels_found
def test_foo(self): """ Test that the utility fails when the predictor_of_mean_flag is "foo" i.e. a name not present in the list of accepted values for the predictor_of_mean_flag. """ predictor_of_mean_flag = "foo" msg = "The requested value for the predictor_of_mean_flag" with self.assertRaisesRegex(ValueError, msg): check_predictor_of_mean_flag(predictor_of_mean_flag)
def test_realizations(self): """ Test that the utility does not fail when the predictor_of_mean_flag is "realizations". """ predictor_of_mean_flag = "realizations" try: check_predictor_of_mean_flag(predictor_of_mean_flag) except ValueError as err: msg = ("_check_predictor_of_mean_flag raised " "ValueError unexpectedly." "Message is {}").format(err) self.fail(msg)
def __init__( self, current_forecast, coefficients_cube, predictor_of_mean_flag="mean"): """ Create an ensemble calibration plugin that, for Nonhomogeneous Gaussian Regression, applies coefficients created using on historical forecasts and applies the coefficients to the current forecast. Args: current_forecast (iris.cube.Cube): The cube containing the current forecast. coefficients_cube (iris.cube.Cube): Cube containing the coefficients estimated using EMOS. The cube contains a coefficient_index dimension coordinate where the points of the coordinate are integer values and a coefficient_name auxiliary coordinate where the points of the coordinate are e.g. gamma, delta, alpha, beta. predictor_of_mean_flag (str): String to specify the input to calculate the calibrated mean. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. """ self.current_forecast = current_forecast self.coefficients_cube = coefficients_cube for coord_name in ["forecast_period", "time", "forecast_reference_time"]: try: if (self.current_forecast.coord(coord_name) != self.coefficients_cube.coord(coord_name)): msg = ("The {} coordinate of the current forecast cube " "and coefficients cube differs. " "current forecast: {}, " "coefficients cube: {}").format( coord_name, self.current_forecast.coord(coord_name), self.coefficients_cube.coord(coord_name)) raise ValueError(msg) except CoordinateNotFoundError: pass # Ensure predictor_of_mean_flag is valid. check_predictor_of_mean_flag(predictor_of_mean_flag) self.predictor_of_mean_flag = predictor_of_mean_flag
def apply_params_entry(self): """ Wrapping function to calculate the forecast predictor and forecast variance prior to applying coefficients to the current forecast. Returns: (tuple) : tuple containing: **calibrated_forecast_predictor** (CubeList): CubeList containing both the calibrated version of the ensemble predictor, either the ensemble mean/members. **calibrated_forecast_variance** (CubeList): CubeList containing both the calibrated version of the ensemble variance, either the ensemble mean/members. **calibrated_forecast_coefficients** (CubeList): CubeList containing both the coefficients for calibrating the ensemble. """ # Ensure predictor_of_mean_flag is valid. check_predictor_of_mean_flag(self.predictor_of_mean_flag) rename_coordinate( self.current_forecast, "ensemble_member_id", "realization") current_forecast_cubes = concatenate_cubes( self.current_forecast) if self.predictor_of_mean_flag.lower() in ["mean"]: forecast_predictors = current_forecast_cubes.collapsed( "realization", iris.analysis.MEAN) elif self.predictor_of_mean_flag.lower() in ["members"]: forecast_predictors = current_forecast_cubes forecast_vars = current_forecast_cubes.collapsed( "realization", iris.analysis.VARIANCE) (calibrated_forecast_predictor, calibrated_forecast_var, calibrated_forecast_coefficients) = self._apply_params( forecast_predictors, forecast_vars, self.optimised_coeffs, self.coeff_names, self.predictor_of_mean_flag) return (calibrated_forecast_predictor, calibrated_forecast_var, calibrated_forecast_coefficients)
def crps_minimiser_wrapper(self, initial_guess, forecast_predictor, truth, forecast_var, predictor_of_mean_flag, distribution): """ Function to pass a given minimisation function to the scipy minimize function to estimate optimised values for the coefficients. Args: initial_guess (List): List of optimised coefficients. Order of coefficients is [c, d, a, b]. forecast_predictor (iris.cube.Cube): Cube containing the fields to be used as the predictor, either the ensemble mean or the ensemble realizations. truth (iris.cube.Cube): Cube containing the field, which will be used as truth. forecast_var (iris.cube.Cube): Cube containg the field containing the ensemble variance. predictor_of_mean_flag (String): String to specify the input to calculate the calibrated mean. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. distribution (String): String used to access the appropriate minimisation function within self.minimisation_dict. Returns: optimised_coeffs (List): List of optimised coefficients. Order of coefficients is [c, d, a, b]. """ def calculate_percentage_change_in_last_iteration(allvecs): """ Calculate the percentage change that has occurred within the last iteration of the minimisation. If the percentage change between the last iteration and the last-but-one iteration exceeds the threshold, a warning message is printed. Args: allvecs : List List of numpy arrays containing the optimised coefficients, after each iteration. """ last_iteration_percentage_change = np.absolute( (allvecs[-1] - allvecs[-2]) / allvecs[-2]) * 100 if (np.any(last_iteration_percentage_change > self.TOLERATED_PERCENTAGE_CHANGE)): np.set_printoptions(suppress=True) msg = ("\nThe final iteration resulted in a percentage change " "that is greater than the accepted threshold of 5% " "i.e. {}. " "\nA satisfactory minimisation has not been achieved. " "\nLast iteration: {}, " "\nLast-but-one iteration: {}" "\nAbsolute difference: {}\n").format( last_iteration_percentage_change, allvecs[-1], allvecs[-2], np.absolute(allvecs[-2] - allvecs[-1])) warnings.warn(msg) try: minimisation_function = self.minimisation_dict[distribution] except KeyError as err: msg = ("Distribution requested {} is not supported in {}" "Error message is {}".format(distribution, self.minimisation_dict, err)) raise KeyError(msg) # Ensure predictor_of_mean_flag is valid. check_predictor_of_mean_flag(predictor_of_mean_flag) if predictor_of_mean_flag.lower() in ["mean"]: forecast_predictor_data = forecast_predictor.data.flatten() truth_data = truth.data.flatten() forecast_var_data = forecast_var.data.flatten() elif predictor_of_mean_flag.lower() in ["realizations"]: truth_data = truth.data.flatten() forecast_predictor = (enforce_coordinate_ordering( forecast_predictor, "realization")) forecast_predictor_data = convert_cube_data_to_2d( forecast_predictor) forecast_var_data = forecast_var.data.flatten() initial_guess = np.array(initial_guess, dtype=np.float32) forecast_predictor_data = forecast_predictor_data.astype(np.float32) forecast_var_data = forecast_var_data.astype(np.float32) truth_data = truth_data.astype(np.float32) sqrt_pi = np.sqrt(np.pi).astype(np.float32) optimised_coeffs = minimize(minimisation_function, initial_guess, args=(forecast_predictor_data, truth_data, forecast_var_data, sqrt_pi, predictor_of_mean_flag), method="Nelder-Mead", options={ "maxiter": self.MAX_ITERATIONS, "return_all": True }) if not optimised_coeffs.success: msg = ("Minimisation did not result in convergence after " "{} iterations. \n{}".format(self.MAX_ITERATIONS, optimised_coeffs.message)) warnings.warn(msg) calculate_percentage_change_in_last_iteration(optimised_coeffs.allvecs) return optimised_coeffs.x
def estimate_coefficients_for_ngr(self, current_forecast, historic_forecast, truth): """ Using Nonhomogeneous Gaussian Regression/Ensemble Model Output Statistics, estimate the required coefficients from historical forecasts. The main contents of this method is: 1. Metadata checks to ensure that the current forecast, historic forecast and truth exist in a form that can be processed. 2. Loop through times within the concatenated current forecast cube: 1. Extract the desired forecast period from the historic forecasts to match the current forecasts. Apply unit conversion to ensure that historic forecasts have the desired units for calibration. 2. Extract the relevant truth to co-incide with the time within the historic forecasts. Apply unit conversion to ensure that the truth has the desired units for calibration. 3. Calculate mean and variance. 4. Calculate initial guess at coefficient values by performing a linear regression, if requested, otherwise default values are used. 5. Perform minimisation. Args: current_forecast (Iris Cube or CubeList): The cube containing the current forecast. historical_forecast (Iris Cube or CubeList): The cube or cubelist containing the historical forecasts used for calibration. truth (Iris Cube or CubeList): The cube or cubelist containing the truth used for calibration. Returns: (tuple): tuple containing: **optimised_coeffs** (Dictionary): Dictionary containing a list of the optimised coefficients for each date. **coeff_names** (List): The name of each coefficient. """ def convert_to_cubelist(cubes, cube_type="forecast"): """ Convert cube to cubelist, if necessary. Args: cubes (Iris Cube or Iris CubeList): Cube to be converted to CubeList. cube_type (String): String to describe the cube, which is being converted to a CubeList. Raises ------ TypeError: The input cube is not an Iris cube. """ if not isinstance(cubes, iris.cube.CubeList): cubes = iris.cube.CubeList([cubes]) for cube in cubes: if not isinstance(cube, iris.cube.Cube): msg = ("The input data within the {} " "is not an Iris Cube.".format(cube_type)) raise TypeError(msg) return cubes # Ensure predictor_of_mean_flag is valid. check_predictor_of_mean_flag(self.predictor_of_mean_flag) # Setting default values for optimised_coeffs and coeff_names. optimised_coeffs = {} coeff_names = ["gamma", "delta", "a", "beta"] # Set default values for whether there are NaN values within the # initial guess. nan_in_initial_guess = False for var in [current_forecast, historic_forecast, truth]: if (isinstance(var, iris.cube.Cube) or isinstance(var, iris.cube.CubeList)): current_forecast_cubes = current_forecast historic_forecast_cubes = historic_forecast truth_cubes = truth else: msg = ("{} is not a Cube or CubeList." "Returning default values for optimised_coeffs {} " "and coeff_names {}.").format(var, optimised_coeffs, coeff_names) warnings.warn(msg) return optimised_coeffs, coeff_names current_forecast_cubes = (convert_to_cubelist( current_forecast_cubes, cube_type="current forecast")) historic_forecast_cubes = (convert_to_cubelist( historic_forecast_cubes, cube_type="historic forecast")) truth_cubes = convert_to_cubelist(truth_cubes, cube_type="truth") if (len(current_forecast_cubes) == 0 or len(historic_forecast_cubes) == 0 or len(truth_cubes) == 0): msg = ("Insufficient input data present to estimate " "coefficients using NGR. " "\nNumber of current_forecast_cubes: {}" "\nNumber of historic_forecast_cubes: {}" "\nNumber of truth_cubes: {}".format( len(current_forecast_cubes), len(historic_forecast_cubes), len(truth_cubes))) warnings.warn(msg) return optimised_coeffs, coeff_names current_forecast_cubes = concatenate_cubes(current_forecast_cubes) historic_forecast_cubes = concatenate_cubes(historic_forecast_cubes) truth_cubes = concatenate_cubes(truth_cubes) for current_forecast_cube in current_forecast_cubes.slices_over( "time"): date = unit.num2date( current_forecast_cube.coord("time").points, current_forecast_cube.coord("time").units.name, current_forecast_cube.coord("time").units.calendar)[0] # Extract desired forecast_period from historic_forecast_cubes. forecast_period_constr = iris.Constraint( forecast_period=current_forecast_cube.coord( "forecast_period").points) historic_forecast_cube = historic_forecast_cubes.extract( forecast_period_constr) # Extract truth matching the time of the historic forecast. reference_time = iris_time_to_datetime( historic_forecast_cube.coord("time").copy()) truth_constr = iris.Constraint( forecast_reference_time=reference_time) truth_cube = truth_cubes.extract(truth_constr) if truth_cube is None: msg = ("Unable to calibrate for the time points {} " "as no truth data is available." "Moving on to try to calibrate " "next time point.".format( historic_forecast_cube.coord("time").points)) warnings.warn(msg) continue # Make sure inputs have the same units. historic_forecast_cube.convert_units(self.desired_units) truth_cube.convert_units(self.desired_units) if self.predictor_of_mean_flag.lower() in ["mean"]: no_of_realizations = None forecast_predictor = historic_forecast_cube.collapsed( "realization", iris.analysis.MEAN) elif self.predictor_of_mean_flag.lower() in ["realizations"]: no_of_realizations = len( historic_forecast_cube.coord("realization").points) forecast_predictor = historic_forecast_cube forecast_var = historic_forecast_cube.collapsed( "realization", iris.analysis.VARIANCE) # Computing initial guess for EMOS coefficients # If no initial guess from a previous iteration, or if there # are NaNs in the initial guess, calculate an initial guess. if "initial_guess" not in locals() or nan_in_initial_guess: initial_guess = self.compute_initial_guess( truth_cube, forecast_predictor, self.predictor_of_mean_flag, self.ESTIMATE_COEFFICIENTS_FROM_LINEAR_MODEL_FLAG, no_of_realizations=no_of_realizations) if np.any(np.isnan(initial_guess)): nan_in_initial_guess = True if not nan_in_initial_guess: # Need to access the x attribute returned by the # minimisation function. optimised_coeffs[date] = ( self.minimiser.crps_minimiser_wrapper( initial_guess, forecast_predictor, truth_cube, forecast_var, self.predictor_of_mean_flag, self.distribution.lower())) initial_guess = optimised_coeffs[date] else: optimised_coeffs[date] = initial_guess return optimised_coeffs, coeff_names
def process(self, current_forecast, historic_forecast, truth): """ Performs ensemble calibration through the following steps: 1. Estimate optimised coefficients from training period. 2. Apply optimised coefficients to current forecast. Args: current_forecast (iris.cube.Cube): The cube that provides the input forecast for the current cycle. historic_forecast (iris.cube.Cube): The cube that provides the input historic forecasts for calibration. truth (iris.cube.Cube): The cube that provides the input truth for calibration with dates matching the historic forecasts. Returns: (tuple): tuple containing: **calibrated_forecast_predictor** (iris.cube.Cube): Cube containing the calibrated forecast predictor. **calibrated_forecast_variance** (iris.cube.Cube): Cube containing the calibrated forecast variance. """ def format_calibration_method(calibration_method): """Lowercase input string, and replace underscores with spaces.""" return calibration_method.lower().replace("_", " ") # Ensure predictor_of_mean_flag is valid. check_predictor_of_mean_flag(self.predictor_of_mean_flag) if (format_calibration_method(self.calibration_method) in ["ensemble model output statistics", "nonhomogeneous gaussian regression"]): if (format_calibration_method(self.distribution) in ["gaussian", "truncated gaussian"]): current_cycle = datetime_to_cycletime( iris_time_to_datetime( current_forecast.coord("forecast_reference_time"))[0]) ec = EstimateCoefficientsForEnsembleCalibration( self.distribution, current_cycle=current_cycle, desired_units=self.desired_units, predictor_of_mean_flag=self.predictor_of_mean_flag) coefficient_cube = ( ec.estimate_coefficients_for_ngr( historic_forecast, truth)) else: msg = ("Other calibration methods are not available. " "{} is not available".format( format_calibration_method(self.calibration_method))) raise ValueError(msg) ac = ApplyCoefficientsFromEnsembleCalibration( current_forecast, coefficient_cube, predictor_of_mean_flag=self.predictor_of_mean_flag) (calibrated_forecast_predictor, calibrated_forecast_variance) = ac.apply_params_entry() # TODO: track down where np.float64 promotion takes place. calibrated_forecast_predictor.data = ( calibrated_forecast_predictor.data.astype(np.float32)) calibrated_forecast_variance.data = ( calibrated_forecast_variance.data.astype(np.float32)) return calibrated_forecast_predictor, calibrated_forecast_variance
def estimate_coefficients_for_ngr(self, historic_forecast, truth): """ Using Nonhomogeneous Gaussian Regression/Ensemble Model Output Statistics, estimate the required coefficients from historical forecasts. The main contents of this method is: 1. Metadata checks to ensure that the current forecast, historic forecast and truth exist in a form that can be processed. 2. Loop through times within the concatenated current forecast cube: 1. Extract the desired forecast period from the historic forecasts to match the current forecasts. Apply unit conversion to ensure that historic forecasts have the desired units for calibration. 2. Extract the relevant truth to co-incide with the time within the historic forecasts. Apply unit conversion to ensure that the truth has the desired units for calibration. 3. Calculate mean and variance. 4. Calculate initial guess at coefficient values by performing a linear regression, if requested, otherwise default values are used. 5. Perform minimisation. Args: historic_forecast (iris.cube.Cube): The cube containing the historical forecasts used for calibration. truth (iris.cube.Cube: The cube containing the truth used for calibration. Returns: coefficients_cube (iris.cube.Cube): Cube containing the coefficients estimated using EMOS. The cube contains a coefficient_index dimension coordinate and a coefficient_name auxiliary coordinate. """ # Ensure predictor_of_mean_flag is valid. check_predictor_of_mean_flag(self.predictor_of_mean_flag) # Set default values for whether there are NaN values within the # initial guess. nan_in_initial_guess = False # Make sure inputs have the same units. if self.desired_units: historic_forecast.convert_units(self.desired_units) truth.convert_units(self.desired_units) if historic_forecast.units != truth.units: msg = ("The historic forecast units of {} do not match " "the truth units {}. These units must match, so that " "the coefficients can be estimated.") raise ValueError(msg) if self.predictor_of_mean_flag.lower() == "mean": no_of_realizations = None forecast_predictor = historic_forecast.collapsed( "realization", iris.analysis.MEAN) elif self.predictor_of_mean_flag.lower() == "realizations": no_of_realizations = len( historic_forecast.coord("realization").points) forecast_predictor = historic_forecast forecast_var = historic_forecast.collapsed( "realization", iris.analysis.VARIANCE) # Computing initial guess for EMOS coefficients # If no initial guess from a previous iteration, or if there # are NaNs in the initial guess, calculate an initial guess. if "initial_guess" not in locals() or nan_in_initial_guess: initial_guess = self.compute_initial_guess( truth, forecast_predictor, self.predictor_of_mean_flag, self.ESTIMATE_COEFFICIENTS_FROM_LINEAR_MODEL_FLAG, no_of_realizations=no_of_realizations) if np.any(np.isnan(initial_guess)): nan_in_initial_guess = True if not nan_in_initial_guess: # Need to access the x attribute returned by the # minimisation function. optimised_coeffs = ( self.minimiser.crps_minimiser_wrapper( initial_guess, forecast_predictor, truth, forecast_var, self.predictor_of_mean_flag, self.distribution.lower())) initial_guess = optimised_coeffs else: optimised_coeffs = initial_guess coefficients_cube = ( self.create_coefficients_cube(optimised_coeffs, historic_forecast)) return coefficients_cube
def process(self, historic_forecast, truth, landsea_mask=None): """ Using Nonhomogeneous Gaussian Regression/Ensemble Model Output Statistics, estimate the required coefficients from historical forecasts. The main contents of this method is: 1. Check that the predictor_of_mean_flag is valid. 2. Filter the historic forecasts and truth to ensure that these inputs match in validity time. 3. Apply unit conversion to ensure that the historic forecasts and truth have the desired units for calibration. 4. Calculate the variance of the historic forecasts. If the chosen predictor is the mean, also calculate the mean of the historic forecasts. 5. If a land-sea mask is provided then mask out sea points in the truth and predictor from the historic forecasts. 6. Calculate initial guess at coefficient values by performing a linear regression, if requested, otherwise default values are used. 7. Perform minimisation. Args: historic_forecast (iris.cube.Cube): The cube containing the historical forecasts used for calibration. truth (iris.cube.Cube): The cube containing the truth used for calibration. landsea_mask (iris.cube.Cube): The optional cube containing a land-sea mask. If provided, only land points are used to calculate the coefficients. Within the land-sea mask cube land points should be specified as ones, and sea points as zeros. Returns: coefficients_cube (iris.cube.Cube): Cube containing the coefficients estimated using EMOS. The cube contains a coefficient_index dimension coordinate and a coefficient_name auxiliary coordinate. Raises: ValueError: If the units of the historic and truth cubes do not match. """ # Ensure predictor_of_mean_flag is valid. check_predictor_of_mean_flag(self.predictor_of_mean_flag) historic_forecast, truth = (self._filter_non_matching_cubes( historic_forecast, truth)) # Make sure inputs have the same units. if self.desired_units: historic_forecast.convert_units(self.desired_units) truth.convert_units(self.desired_units) if historic_forecast.units != truth.units: msg = ("The historic forecast units of {} do not match " "the truth units {}. These units must match, so that " "the coefficients can be estimated.") raise ValueError(msg) if self.predictor_of_mean_flag.lower() == "mean": no_of_realizations = None forecast_predictor = historic_forecast.collapsed( "realization", iris.analysis.MEAN) elif self.predictor_of_mean_flag.lower() == "realizations": no_of_realizations = len( historic_forecast.coord("realization").points) forecast_predictor = historic_forecast forecast_var = historic_forecast.collapsed("realization", iris.analysis.VARIANCE) # If a landsea_mask is provided mask out the sea points if landsea_mask: self.mask_cube(forecast_predictor, landsea_mask) self.mask_cube(forecast_var, landsea_mask) self.mask_cube(truth, landsea_mask) # Computing initial guess for EMOS coefficients initial_guess = self.compute_initial_guess( truth, forecast_predictor, self.predictor_of_mean_flag, self.ESTIMATE_COEFFICIENTS_FROM_LINEAR_MODEL_FLAG, no_of_realizations=no_of_realizations) # Calculate coefficients if there are no nans in the initial guess. if np.any(np.isnan(initial_guess)): optimised_coeffs = initial_guess else: optimised_coeffs = (self.minimiser.process( initial_guess, forecast_predictor, truth, forecast_var, self.predictor_of_mean_flag, self.distribution.lower())) coefficients_cube = (self.create_coefficients_cube( optimised_coeffs, historic_forecast)) return coefficients_cube
def __init__(self, distribution, current_cycle, desired_units=None, predictor_of_mean_flag="mean", max_iterations=1000): """ Create an ensemble calibration plugin that, for Nonhomogeneous Gaussian Regression, calculates coefficients based on historical forecasts and applies the coefficients to the current forecast. Args: distribution (str): Name of distribution. Assume that the current forecast can be represented using this distribution. current_cycle (str): The current cycle in YYYYMMDDTHHMMZ format e.g. 20171122T0100Z. This is used to create a forecast_reference_time coordinate on the resulting EMOS coefficients cube. desired_units (str or cf_units.Unit): The unit that you would like the calibration to be undertaken in. The current forecast, historical forecast and truth will be converted as required. predictor_of_mean_flag (str): String to specify the input to calculate the calibrated mean. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. max_iterations (int): The maximum number of iterations allowed until the minimisation has converged to a stable solution. If the maximum number of iterations is reached, but the minimisation has not yet converged to a stable solution, then the available solution is used anyway, and a warning is raised. If the predictor_of_mean is "realizations", then the number of iterations may require increasing, as there will be more coefficients to solve for. Raises: ValueError: If the given distribution is not valid. Warns: ImportWarning: If the statsmodels module can't be imported. """ valid_distributions = (ContinuousRankedProbabilityScoreMinimisers(). minimisation_dict.keys()) if distribution not in valid_distributions: msg = ("Given distribution {} not available. Available " "distributions are {}".format(distribution, valid_distributions)) raise ValueError(msg) self.distribution = distribution self.current_cycle = current_cycle self.desired_units = desired_units # Ensure predictor_of_mean_flag is valid. check_predictor_of_mean_flag(predictor_of_mean_flag) self.predictor_of_mean_flag = predictor_of_mean_flag self.max_iterations = max_iterations self.minimiser = ContinuousRankedProbabilityScoreMinimisers( max_iterations=self.max_iterations) # Setting default values for coeff_names. Beta is the final # coefficient name in the list, as there can potentially be # multiple beta coefficients if the ensemble realizations, rather # than the ensemble mean, are provided as the predictor. self.coeff_names = ["gamma", "delta", "alpha", "beta"] import imp try: imp.find_module('statsmodels') except ImportError: statsmodels_found = False if predictor_of_mean_flag.lower() == "realizations": msg = ("The statsmodels can not be imported. " "Will not be able to calculate an initial guess from " "the individual ensemble realizations. " "A default initial guess will be used without " "estimating coefficients from a linear model.") warnings.warn(msg, ImportWarning) else: statsmodels_found = True import statsmodels.api as sm self.sm = sm self.statsmodels_found = statsmodels_found
def process(self, initial_guess, forecast_predictor, truth, forecast_var, predictor_of_mean_flag, distribution): """ Function to pass a given function to the scipy minimize function to estimate optimised values for the coefficients. If the predictor_of_mean_flag is the ensemble mean, this function estimates values for alpha, beta, gamma and delta based on the equation: N(alpha + beta * ensemble_mean, gamma + delta * ensemble_variance), where N is a chosen distribution. If the predictor_of_mean_flag is the ensemble realizations, this function estimates values for alpha, beta, gamma and delta based on the equation: .. math:: N(alpha + beta0 * realization0 + beta1 * realization1, gamma + delta * ensemble\\_variance) where N is a chosen distribution and the number of beta terms depends on the number of realizations provided. Args: initial_guess (list): List of optimised coefficients. Order of coefficients is [gamma, delta, alpha, beta]. forecast_predictor (iris.cube.Cube): Cube containing the fields to be used as the predictor, either the ensemble mean or the ensemble realizations. truth (iris.cube.Cube): Cube containing the field, which will be used as truth. forecast_var (iris.cube.Cube): Cube containg the field containing the ensemble variance. predictor_of_mean_flag (str): String to specify the input to calculate the calibrated mean. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. distribution (str): String used to access the appropriate function for use in the minimisation within self.minimisation_dict. Returns: optimised_coeffs (list): List of optimised coefficients. Order of coefficients is [gamma, delta, alpha, beta]. Raises: KeyError: If the distribution is not supported. Warns: Warning: If the minimisation did not converge. """ def calculate_percentage_change_in_last_iteration(allvecs): """ Calculate the percentage change that has occurred within the last iteration of the minimisation. If the percentage change between the last iteration and the last-but-one iteration exceeds the threshold, a warning message is printed. Args: allvecs (list): List of numpy arrays containing the optimised coefficients, after each iteration. Warns: Warning: If a satisfactory minimisation has not been achieved. """ last_iteration_percentage_change = np.absolute( (allvecs[-1] - allvecs[-2]) / allvecs[-2]) * 100 if (np.any(last_iteration_percentage_change > self.TOLERATED_PERCENTAGE_CHANGE)): np.set_printoptions(suppress=True) msg = ("The final iteration resulted in a percentage change " "that is greater than the accepted threshold of 5% " "i.e. {}. " "\nA satisfactory minimisation has not been achieved. " "\nLast iteration: {}, " "\nLast-but-one iteration: {}" "\nAbsolute difference: {}\n").format( last_iteration_percentage_change, allvecs[-1], allvecs[-2], np.absolute(allvecs[-2] - allvecs[-1])) warnings.warn(msg) try: minimisation_function = self.minimisation_dict[distribution] except KeyError as err: msg = ("Distribution requested {} is not supported in {}" "Error message is {}".format(distribution, self.minimisation_dict, err)) raise KeyError(msg) # Ensure predictor_of_mean_flag is valid. check_predictor_of_mean_flag(predictor_of_mean_flag) # Flatten the data arrays and remove any missing data. truth_data = flatten_ignoring_masked_data(truth.data) forecast_var_data = flatten_ignoring_masked_data(forecast_var.data) if predictor_of_mean_flag.lower() == "mean": forecast_predictor_data = flatten_ignoring_masked_data( forecast_predictor.data) elif predictor_of_mean_flag.lower() == "realizations": forecast_predictor = (enforce_coordinate_ordering( forecast_predictor, "realization")) # Need to transpose this array so there are columns for each # ensemble member rather than rows. forecast_predictor_data = flatten_ignoring_masked_data( forecast_predictor.data, preserve_leading_dimension=True).T # Increased precision is needed for stable coefficient calculation. # The resulting coefficients are cast to float32 prior to output. initial_guess = np.array(initial_guess, dtype=np.float64) forecast_predictor_data = forecast_predictor_data.astype(np.float64) forecast_var_data = forecast_var_data.astype(np.float64) truth_data = truth_data.astype(np.float64) sqrt_pi = np.sqrt(np.pi).astype(np.float64) optimised_coeffs = minimize(minimisation_function, initial_guess, args=(forecast_predictor_data, truth_data, forecast_var_data, sqrt_pi, predictor_of_mean_flag), method="Nelder-Mead", options={ "maxiter": self.max_iterations, "return_all": True }) if not optimised_coeffs.success: msg = ("Minimisation did not result in convergence after " "{} iterations. \n{}".format(self.max_iterations, optimised_coeffs.message)) warnings.warn(msg) calculate_percentage_change_in_last_iteration(optimised_coeffs.allvecs) return optimised_coeffs.x.astype(np.float32)