def setUp(self): """Use temperature cube to test with.""" data = np.ones((3, 3, 3), dtype=np.float32) self.current_temperature_forecast_cube = set_up_variable_cube( data, realizations=[0, 1, 2]) optimised_coeffs = [ 4.55819380e-06, -8.02401974e-09, 1.66667055e+00, 1.00000011e+00 ] current_cycle = "20171110T0000Z" estimator = (EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius")) self.coeffs_from_mean = (estimator.create_coefficients_cube( optimised_coeffs, self.current_temperature_forecast_cube)) optimised_coeffs = np.array([ 4.55819380e-06, -8.02401974e-09, 1.66667055e+00, 1.00000011e+00, 1.00000011e+00, 1.00000011e+00 ]) estimator = (EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius", predictor_of_mean_flag="realizations")) self.coeffs_from_realizations = (estimator.create_coefficients_cube( optimised_coeffs, self.current_temperature_forecast_cube))
def setUp(self): """Use temperature cube to test with.""" data = (np.tile(np.linspace(-45.0, 45.0, 9), 3).reshape(3, 3, 3) + 273.15) data[0] -= 2 data[1] += 2 data[2] += 4 data = data.astype(np.float32) self.current_temperature_forecast_cube = set_up_variable_cube( data, units="Kelvin", realizations=[0, 1, 2]) optimised_coeffs = [ 4.55819380e-06, -8.02401974e-09, 1.66667055e+00, 1.00000011e+00 ] current_cycle = "20171110T0000Z" estimator = (EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius")) self.coeffs_from_mean = (estimator.create_coefficients_cube( optimised_coeffs, self.current_temperature_forecast_cube)) optimised_coeffs = np.array([5, 1, 0, 0.57, 0.6, 0.6]) estimator = (EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius", predictor_of_mean_flag="realizations")) self.coeffs_from_realizations = (estimator.create_coefficients_cube( optimised_coeffs, self.current_temperature_forecast_cube))
def setUp(self): """Set up coefficients cubes for when either the ensemble mean or the ensemble realizations have been used as the predictor. The coefficients have been constructed from the same underlying set of ensemble realizations, so application of these coefficients would be expected to give similar results. The values for the coefficients used to construct the coefficients cubes are taken from the SetupExpectedCoefficients class. These coefficients are the expected outputs from the tests to estimate the coefficients.""" super().setUp() # Set up a coefficients cube when using the ensemble mean as the # predictor. current_cycle = "20171110T0000Z" estimator = ( EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius")) self.coeffs_from_mean = ( estimator.create_coefficients_cube( self.expected_mean_predictor_gaussian, self.current_temperature_forecast_cube)) # Set up a coefficients cube when using the ensemble realization as the # predictor and the coefficients have been estimated using statsmodels. estimator = ( EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius", predictor_of_mean_flag="realizations")) self.coeffs_from_statsmodels_realizations = ( estimator.create_coefficients_cube( self.expected_realizations_gaussian_statsmodels, self.current_temperature_forecast_cube)) # Set up a coefficients cube when using the ensemble realization as the # predictor and the coefficients have been estimated without using # statsmodels. estimator = ( EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius", predictor_of_mean_flag="realizations")) self.coeffs_from_no_statsmodels_realizations = ( estimator.create_coefficients_cube( self.expected_realizations_gaussian_no_statsmodels, self.current_temperature_forecast_cube))
def process(historic_forecast, truth, combined, historic_forecast_dict, truth_dict, distribution, cycletime, units=None, predictor_of_mean='mean', max_iterations=1000): """Module for estimate coefficients for Ensemble Model Output Statistics. Loads in arguments for estimating coefficients for Ensemble Model Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian Regression (NGR). Two sources of input data must be provided: historical forecasts and historical truth data (to use in calibration). The estimated coefficients are output as a cube. Args: historic_forecast (iris.cube.Cube): The cube containing the historical forecasts used for calibration. truth (iris.cube.Cube): The cube containing the truth used for calibration. combined (iris.cube.CubeList): A cubelist containing a combination of historic forecasts and associated truths. historic_forecast_dict (dict): Dictionary specifying the metadata that defines the historic forecast. For example: :: { "attributes": { "mosg__model_configuration": "uk_ens" } } truth_dict (dict): Dictionary specifying the metadata that defines the truth. For example: :: { "attributes": { "mosg__model_configuration": "uk_det" } } distribution (str): The distribution that will be used for calibration. This will be dependant upon the input phenomenon. cycletime (str): This denotes the cycle at which forecasts will be calibrated using the calculated EMOS coefficients. The validity time in the output coefficients cube will be calculated relative to this cycletime. This cycletime is in the format YYYYMMDDTHHMMZ. units (str): The units that calibration should be undertaken in. The historical forecast and truth will be converted as required. Default is None. predictor_of_mean (str): String to specify the input to calculate the calibrated mean. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. Default is 'mean'. max_iterations (int): The maximum number of iterations allowed until the minimisation has converged to a stable solution. If the maximum number of iterations is reached but the minimisation has not yet converged to a stable solution, then the available solution is used anyway, and a warning is raised. If the predictor_of_mean is "realizations", then the number of iterations may require increasing, as there will be more coefficients to solve. Default is 1000. Returns: result (iris.cube.Cube or None): Cube containing the coefficients estimated using EMOS. The cube contains a coefficient_index dimension coordinate and a coefficient_name auxiliary coordinate. If no historic forecasts or truths are found then None is returned. Raises: ValueError: If the historic forecast and truth inputs are specified, then the combined input, historic forecast dictionary and truth dictionary should not be specified. ValueError: If one of the historic forecast or truth inputs are specified, then they should both be specified. ValueError: All of the combined_filepath, historic_forecast_identifier and truth_identifier arguments should be specified if one of the arguments are specified. Warns: UserWarning: The metadata to identify the desired historic forecast or truth has found nothing matching the metadata information supplied. """ # The logic for the if statements below is: # 1. Check whether either the historic_forecast or the truth exists. # 2. Check that both the historic forecast and the truth exists, otherwise, # raise an error. # 3. Check that none of the combined, historic forecast dictionary or # truth dictionary inputs have been provided, as these arguments are # invalid, if the historic forecast and truth inputs have been provided. if any([historic_forecast, truth]): if all([historic_forecast, truth]): if any([combined, historic_forecast_dict, truth_dict]): msg = ("If the historic_filepath and truth_filepath arguments " "are specified then none of the the combined_filepath, " "historic_forecast_identifier and truth_identifier " "arguments should be specified.") raise ValueError(msg) else: msg = ("Both the historic_filepath and truth_filepath arguments " "should be specified if one of these arguments are " "specified.") raise ValueError(msg) # This if block follows the logic: # 1. Check whether any of the combined, historic forecast dictionary or # truth dictionary inputs have been provided. # 2. If not all of these inputs have been provided then raise an error, # as all of these inputs are required to separate the combined input # into the historic forecasts and truths. if any([combined, historic_forecast_dict, truth_dict]): if not all([combined, historic_forecast_dict, truth_dict]): msg = ("All of the combined_filepath, " "historic_forecast_identifier and truth_identifier " "arguments should be specified if one of the arguments are " "specified.") raise ValueError(msg) try: if combined is not None: historic_forecast, truth = SplitHistoricForecastAndTruth( historic_forecast_dict, truth_dict).process(combined) except ValueError as err: # This error arises if the metadata to identify the desired historic # forecast or truth has found nothing matching the metadata # information supplied. if str(err).startswith("The metadata to identify the desired"): warnings.warn(str(err)) result = None else: raise else: result = EstimateCoefficientsForEnsembleCalibration( distribution, cycletime, desired_units=units, predictor_of_mean_flag=predictor_of_mean, max_iterations=max_iterations).process(historic_forecast, truth) return result
def process(*cubes: cli.inputcube, distribution, truth_attribute, cycletime, units=None, predictor_of_mean='mean', tolerance: float = 0.01, max_iterations: int = 1000): """Estimate coefficients for Ensemble Model Output Statistics. Loads in arguments for estimating coefficients for Ensemble Model Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian Regression (NGR). Two sources of input data must be provided: historical forecasts and historical truth data (to use in calibration). The estimated coefficients are output as a cube. Args: cubes (list of iris.cube.Cube): A list of cubes containing the historical forecasts and corresponding truth used for calibration. They must have the same cube name and will be separated based on the truth attribute. Optionally this may also contain a single land-sea mask cube on the same domain as the historic forecasts and truth (where land points are set to one and sea points are set to zero). distribution (str): The distribution that will be used for calibration. This will be dependant upon the input phenomenon. truth_attribute (str): An attribute and its value in the format of "attribute=value", which must be present on historical truth cubes. cycletime (str): This denotes the cycle at which forecasts will be calibrated using the calculated EMOS coefficients. The validity time in the output coefficients cube will be calculated relative to this cycletime. This cycletime is in the format YYYYMMDDTHHMMZ. units (str): The units that calibration should be undertaken in. The historical forecast and truth will be converted as required. predictor_of_mean (str): String to specify the input to calculate the calibrated mean. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. tolerance (float): The tolerance for the Continuous Ranked Probability Score (CRPS) calculated by the minimisation. Once multiple iterations result in a CRPS equal to the same value within the specified tolerance, the minimisation will terminate. max_iterations (int): The maximum number of iterations allowed until the minimisation has converged to a stable solution. If the maximum number of iterations is reached but the minimisation has not yet converged to a stable solution, then the available solution is used anyway, and a warning is raised. If the predictor_of_mean is "realizations", then the number of iterations may require increasing, as there will be more coefficients to solve. Returns: iris.cube.Cube: Cube containing the coefficients estimated using EMOS. The cube contains a coefficient_index dimension coordinate and a coefficient_name auxiliary coordinate. Raises: RuntimeError: An unexpected number of distinct cube names were passed in. RuntimeError: More than one cube was identified as a land-sea mask. RuntimeError: Missing truth or historical forecast in input cubes. """ from collections import OrderedDict from improver.utilities.cube_manipulation import MergeCubes from improver.ensemble_calibration.ensemble_calibration import ( EstimateCoefficientsForEnsembleCalibration) grouped_cubes = {} for cube in cubes: grouped_cubes.setdefault(cube.name(), []).append(cube) if len(grouped_cubes) == 1: # Only one group - all forecast/truth cubes land_sea_mask = None diag_name = list(grouped_cubes.keys())[0] elif len(grouped_cubes) == 2: # Two groups - the one with exactly one cube matching a name should # be the land_sea_mask, since we require more than 2 cubes in # the forecast/truth group grouped_cubes = OrderedDict(sorted(grouped_cubes.items(), key=lambda kv: len(kv[1]))) # landsea name should be the key with the lowest number of cubes (1) landsea_name, diag_name = list(grouped_cubes.keys()) land_sea_mask = grouped_cubes[landsea_name][0] if len(grouped_cubes[landsea_name]) != 1: raise RuntimeError('Expected one cube for land-sea mask.') else: raise RuntimeError('Must have cubes with 1 or 2 distinct names.') # split non-land_sea_mask cubes on forecast vs truth truth_key, truth_value = truth_attribute.split('=') input_cubes = grouped_cubes[diag_name] grouped_cubes = {'truth': [], 'historical forecast': []} for cube in input_cubes: if cube.attributes.get(truth_key) == truth_value: grouped_cubes['truth'].append(cube) else: grouped_cubes['historical forecast'].append(cube) missing_inputs = ' and '.join(k for k, v in grouped_cubes.items() if not v) if missing_inputs: raise RuntimeError('Missing ' + missing_inputs + ' input.') truth = MergeCubes()(grouped_cubes['truth']) forecast = MergeCubes()(grouped_cubes['historical forecast']) return EstimateCoefficientsForEnsembleCalibration( distribution, cycletime, desired_units=units, predictor_of_mean_flag=predictor_of_mean, tolerance=tolerance, max_iterations=max_iterations).process( forecast, truth, landsea_mask=land_sea_mask)
def setUp(self): """Set up coefficients cubes for when either the ensemble mean or the ensemble realizations have been used as the predictor. The coefficients have been constructed from the same underlying set of ensemble realizations, so application of these coefficients would be expected to give similar results. The values for the coefficients used to construct the coefficients cubes are taken from the SetupExpectedCoefficients class. These coefficients are the expected outputs from the tests to estimate the coefficients.""" super().setUp() # Set up a coefficients cube when using the ensemble mean as the # predictor. current_cycle = "20171110T0000Z" estimator = (EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius")) self.coeffs_from_mean = (estimator.create_coefficients_cube( self.expected_mean_predictor_gaussian, self.current_temperature_forecast_cube)) # Set up a coefficients cube when using the ensemble realization as the # predictor and the coefficients have been estimated using statsmodels. estimator = (EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius", predictor_of_mean_flag="realizations")) self.coeffs_from_statsmodels_realizations = ( estimator.create_coefficients_cube( self.expected_realizations_gaussian_statsmodels, self.current_temperature_forecast_cube)) # Set up a coefficients cube when using the ensemble realization as the # predictor and the coefficients have been estimated without using # statsmodels. estimator = (EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius", predictor_of_mean_flag="realizations")) self.coeffs_from_no_statsmodels_realizations = ( estimator.create_coefficients_cube( self.expected_realizations_gaussian_no_statsmodels, self.current_temperature_forecast_cube)) # Some expected data that are used in various tests. self.expected_loc_param_mean = (np.array( [[273.7854, 274.6913, 275.4461], [276.8652, 277.6502, 278.405], [279.492, 280.1562, 280.9715]], dtype=np.float32)) self.expected_scale_param_mean = (np.array( [[0.1952, 0.1974, 0.0117], [0.0226, 0.0197, 0.0117], [0.0532, 0.0029, 0.0007]], dtype=np.float32)) self.expected_loc_param_statsmodels_realizations = (np.array( [[274.1395, 275.0975, 275.258], [276.9771, 277.3487, 278.3144], [280.0085, 280.2506, 281.1632]], dtype=np.float32)) self.expected_loc_param_no_statsmodels_realizations = (np.array( [[273.4695, 274.4673, 275.3034], [276.8648, 277.733, 278.5632], [279.7562, 280.4913, 281.3889]], dtype=np.float32)) # Create output cubes with the expected data. self.expected_loc_param_mean_cube = set_up_variable_cube( self.expected_loc_param_mean, name="location_parameter", units="K", attributes=MANDATORY_ATTRIBUTE_DEFAULTS) self.expected_scale_param_mean_cube = (set_up_variable_cube( self.expected_scale_param_mean, name="scale_parameter", units="Kelvin^2", attributes=MANDATORY_ATTRIBUTE_DEFAULTS))
def setUp(self): """Set up coefficients cubes for when either the ensemble mean or the ensemble realizations have been used as the predictor. The coefficients have been constructed from the same underlying set of ensemble realizations, so application of these coefficients would be expected to give similar results. The values for the coefficients used to construct the coefficients cubes are taken from the SetupExpectedCoefficients class. These coefficients are the expected outputs from the tests to estimate the coefficients.""" super().setUp() # Set up a coefficients cube when using the ensemble mean as the # predictor. current_cycle = "20171110T0000Z" estimator = (EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius")) self.coeffs_from_mean = (estimator.create_coefficients_cube( self.expected_mean_predictor_gaussian, self.current_temperature_forecast_cube)) # Set up a coefficients cube when using the ensemble realization as the # predictor and the coefficients have been estimated using statsmodels. estimator = (EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius", predictor_of_mean_flag="realizations")) self.coeffs_from_statsmodels_realizations = ( estimator.create_coefficients_cube( self.expected_realizations_gaussian_statsmodels, self.current_temperature_forecast_cube)) # Set up a coefficients cube when using the ensemble realization as the # predictor and the coefficients have been estimated without using # statsmodels. estimator = (EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius", predictor_of_mean_flag="realizations")) self.coeffs_from_no_statsmodels_realizations = ( estimator.create_coefficients_cube( self.expected_realizations_gaussian_no_statsmodels, self.current_temperature_forecast_cube)) # Some expected data that are used in various tests. self.expected_calibrated_predictor_mean = (np.array( [[273.7854, 274.6913, 275.4461], [276.8652, 277.6502, 278.405], [279.492, 280.1562, 280.9715]])) self.expected_calibrated_variance_mean = (np.array( [[0.1952, 0.1974, 0.0117], [0.0226, 0.0197, 0.0117], [0.0532, 0.0029, 0.0007]])) self.expected_calibrated_predictor_statsmodels_realizations = ( np.array([[274.1395, 275.0975, 275.258], [276.9771, 277.3487, 278.3144], [280.0085, 280.2506, 281.1632]])) self.expected_calibrated_variance_statsmodels_realizations = (np.array( [[0.8973, 0.9073, 0.0536], [0.1038, 0.0904, 0.0536], [0.2444, 0.0134, 0.0033]])) self.expected_calibrated_predictor_no_statsmodels_realizations = ( np.array([[273.4695, 274.4673, 275.3034], [276.8648, 277.733, 278.5632], [279.7562, 280.4913, 281.3889]])) self.expected_calibrated_variance_no_statsmodels_realizations = ( np.array([[0.9344, 0.9448, 0.0558], [0.1081, 0.0941, 0.0558], [0.2545, 0.0139, 0.0035]]))
def main(argv=None): """Load in arguments for estimating coefficients for Ensemble Model Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian Regression (NGR). 2 sources of input data must be provided: historical forecasts and historical truth data (to use in calibration). The estimated coefficients are written to a netCDF file. """ parser = ArgParser( description='Estimate coefficients for Ensemble Model Output ' 'Statistics (EMOS), otherwise known as Non-homogeneous ' 'Gaussian Regression (NGR)') parser.add_argument('distribution', metavar='DISTRIBUTION', choices=['gaussian', 'truncated gaussian'], help='The distribution that will be used for ' 'calibration. This will be dependent upon the ' 'input phenomenon. This has to be supported by ' 'the minimisation functions in ' 'ContinuousRankedProbabilityScoreMinimisers.') parser.add_argument('cycletime', metavar='CYCLETIME', type=str, help='This denotes the cycle at which forecasts ' 'will be calibrated using the calculated ' 'EMOS coefficients. The validity time in the ' 'output coefficients cube will be calculated ' 'relative to this cycletime. ' 'This cycletime is in the format ' 'YYYYMMDDTHHMMZ.') # Filepaths for historic and truth data. parser.add_argument('historic_filepath', metavar='HISTORIC_FILEPATH', help='A path to an input NetCDF file containing the ' 'historic forecast(s) used for calibration.') parser.add_argument('truth_filepath', metavar='TRUTH_FILEPATH', help='A path to an input NetCDF file containing the ' 'historic truth analyses used for calibration.') parser.add_argument('output_filepath', metavar='OUTPUT_FILEPATH', help='The output path for the processed NetCDF') # Optional arguments. parser.add_argument('--units', metavar='UNITS', help='The units that calibration should be undertaken ' 'in. The historical forecast and truth will be ' 'converted as required.') parser.add_argument('--predictor_of_mean', metavar='PREDICTOR_OF_MEAN', choices=['mean', 'realizations'], default='mean', help='String to specify the predictor used to ' 'calibrate the forecast mean. Currently the ' 'ensemble mean ("mean") and the ensemble ' 'realizations ("realizations") are supported as ' 'options. Default: "mean".') parser.add_argument('--max_iterations', metavar='MAX_ITERATIONS', type=np.int32, default=1000, help='The maximum number of iterations allowed ' 'until the minimisation has converged to a ' 'stable solution. If the maximum number ' 'of iterations is reached, but the ' 'minimisation has not yet converged to a ' 'stable solution, then the available solution ' 'is used anyway, and a warning is raised.' 'This may be modified for testing purposes ' 'but otherwise kept fixed. If the ' 'predictor_of_mean is "realizations", ' 'then the number of iterations may require ' 'increasing, as there will be more coefficients ' 'to solve for.') args = parser.parse_args(args=argv) historic_forecast = load_cube(args.historic_filepath) truth = load_cube(args.truth_filepath) # Estimate coefficients using Ensemble Model Output Statistics (EMOS). estcoeffs = EstimateCoefficientsForEnsembleCalibration( args.distribution, args.cycletime, desired_units=args.units, predictor_of_mean_flag=args.predictor_of_mean, max_iterations=args.max_iterations) coefficients = ( estcoeffs.process(historic_forecast, truth)) save_netcdf(coefficients, args.output_filepath)