def test_mean_predictor_estimate_coefficients(self): """ Test that the plugin returns the expected values for the initial guess for the calibration coefficients, when the ensemble mean is used as the predictor. The coefficients are estimated using a linear model, where there is an offset of one between the truth and the forecast during the training period. Therefore, in this case the result of the linear regression is a gradient of 1 and an intercept of 1. """ data = np.array([0., 1., 1., 1.], dtype=np.float32) estimate_coefficients_from_linear_model_flag = True plugin = Plugin(self.distribution, self.desired_units) result = plugin.compute_initial_guess( self.truth, self.current_forecast_predictor_mean, self.predictor_of_mean_flag, estimate_coefficients_from_linear_model_flag) self.assertArrayAlmostEqual(result, data)
def test_coefficient_values_for_fake_distribution(self): """ Ensure the appropriate error is raised if the minimisation function requested is not available. """ current_forecast = self.current_temperature_forecast_cube historic_forecasts = self.historic_temperature_forecast_cube truth = self.temperature_truth_cube distribution = "fake" desired_units = "degreesC" plugin = Plugin(distribution, desired_units) msg = "Distribution requested" with self.assertRaisesRegex(KeyError, msg): plugin.estimate_coefficients_for_ngr(current_forecast, historic_forecasts, truth)
def test_truth_data_is_none(self): """ Ensure that a ValueError with the expected text is generated, if the input data is None, rather than a cube. """ current_forecast = self.current_temperature_forecast_cube historic_forecasts = self.historic_temperature_forecast_cube truth = iris.cube.CubeList([None]) distribution = "gaussian" desired_units = "degreesC" plugin = Plugin(distribution, desired_units) msg = "The input data within the" with self.assertRaisesRegex(TypeError, msg): plugin.estimate_coefficients_for_ngr(current_forecast, historic_forecasts, truth)
def test_coefficients_truncated_gaussian_realizations_statsmodels(self): """Ensure that the values for the optimised_coefficients match the expected values, and the coefficient names also match expected values for a truncated Gaussian distribution where the realizations are used as the predictor of the mean.""" distribution = "truncated gaussian" predictor_of_mean_flag = "realizations" plugin = Plugin(distribution, self.current_cycle, predictor_of_mean_flag=predictor_of_mean_flag) result = plugin.process(self.historic_wind_speed_forecast_cube, self.wind_speed_truth_cube) self.assertEMOSCoefficientsAlmostEqual( result.data, self.expected_realizations_truncated_gaussian_statsmodels) self.assertArrayEqual( result.coord("coefficient_name").points, self.coeff_names_realizations)
def test_coefficient_values_for_gaussian_distribution_realizations(self): """ Ensure that the values generated within optimised_coeffs match the expected values, and the coefficient names also match expected values. """ import imp try: statsmodels_found = imp.find_module('statsmodels') statsmodels_found = True except ImportError: statsmodels_found = False if statsmodels_found: data = [-0.00114, -0.00006, 1.00037, -0.00196, 0.99999, -0.00315] else: data = [ 4.30804737e-02, 1.39042785e+00, 8.99047025e-04, 2.02661310e-01, 9.27197381e-01, 3.17407626e-01 ] current_forecast = self.current_temperature_forecast_cube historic_forecasts = self.historic_temperature_forecast_cube truth = self.temperature_truth_cube distribution = "gaussian" desired_units = "degreesC" predictor_of_mean_flag = "realizations" plugin = Plugin(distribution, desired_units, predictor_of_mean_flag=predictor_of_mean_flag) result = plugin.estimate_coefficients_for_ngr(current_forecast, historic_forecasts, truth) optimised_coeffs, coeff_names = result for key in optimised_coeffs.keys(): self.assertArrayAlmostEqual(optimised_coeffs[key], data, decimal=5) self.assertListEqual(coeff_names, ["gamma", "delta", "a", "beta"])
def test_realizations_predictor_estimate_coefficients(self): """ Test that the plugin returns the expected values for the initial guess for the calibration coefficients, when the ensemble mean is used as the predictor. The coefficients are estimated using a linear model. In this case, the result of the linear regression is for an intercept of 0.333333 with different weights for the realizations because some of the realizations are closer to the truth, in this instance. """ data = [0., 1., 0.333333, 0., 0.333333, 0.666667] predictor_of_mean_flag = "realizations" estimate_coefficients_from_linear_model_flag = True plugin = Plugin(self.distribution, self.desired_units) result = plugin.compute_initial_guess( self.truth, self.current_forecast_predictor_realizations, predictor_of_mean_flag, estimate_coefficients_from_linear_model_flag, no_of_realizations=self.no_of_realizations) self.assertArrayAlmostEqual(result, data)
def test_basic_mean_predictor(self): """ Test that the plugin returns a list containing the initial guess for the calibration coefficients, when the ensemble mean is used as the predictor. """ cube = self.cube current_forecast_predictor = cube.collapsed("realization", iris.analysis.MEAN) truth = cube.collapsed("realization", iris.analysis.MAX) distribution = "gaussian" desired_units = "degreesC" predictor_of_mean_flag = "mean" estimate_coefficients_from_linear_model_flag = False plugin = Plugin(distribution, desired_units) result = plugin.compute_initial_guess( truth, current_forecast_predictor, predictor_of_mean_flag, estimate_coefficients_from_linear_model_flag) self.assertIsInstance(result, list)
def test_basic(self): """Ensure that the optimised_coeffs are returned as a dictionary, and the coefficient names are returned as a list.""" current_forecast = self.current_temperature_forecast_cube historic_forecasts = self.historic_temperature_forecast_cube truth = self.temperature_truth_cube distribution = "gaussian" desired_units = "degreesC" plugin = Plugin(distribution, desired_units) result = plugin.estimate_coefficients_for_ngr(current_forecast, historic_forecasts, truth) optimised_coeffs, coeff_names = result self.assertIsInstance(optimised_coeffs, dict) self.assertIsInstance(coeff_names, list) for key in optimised_coeffs.keys(): self.assertEqual(len(optimised_coeffs[key]), len(coeff_names))
def test_coefficient_values_truncated_gaussian_distribution_realization( self): """Ensure that the values for the optimised_coefficients match the expected values, and the coefficient names also match expected values for a truncated Gaussian distribution where the realizations are used as the predictor of the mean.""" import imp try: imp.find_module('statsmodels') statsmodels_found = True except ImportError: statsmodels_found = False if statsmodels_found: data = [ 0.11821805, -0.00474737, 0.17631301, 0.17178835, 0.66749225, 0.72287342 ] else: data = [ 2.05550997, 0.10577237, 0.00028531, 0.53208837, 0.67233013, 0.53704241 ] distribution = "truncated gaussian" current_cycle = "20171110T0000Z" predictor_of_mean_flag = "realizations" expected_coeff_names = ([ 'gamma', 'delta', 'alpha', 'beta0', 'beta1', 'beta2' ]) plugin = Plugin(distribution, current_cycle, predictor_of_mean_flag=predictor_of_mean_flag) result = plugin.estimate_coefficients_for_ngr( self.historic_wind_speed_forecast_cube, self.wind_speed_truth_cube) self.assertArrayAlmostEqual(result.data, data) self.assertArrayEqual( result.coord("coefficient_name").points, expected_coeff_names)
def test_basic_realizations_predictor(self): """ Test that the plugin returns a list containing the initial guess for the calibration coefficients, when the individual ensemble realizations are used as predictors. """ current_forecast_predictor = self.cube.copy() truth = self.cube.collapsed("realization", iris.analysis.MAX) distribution = "gaussian" desired_units = "degreesC" predictor_of_mean_flag = "realizations" no_of_realizations = 3 estimate_coefficients_from_linear_model_flag = False plugin = Plugin(distribution, desired_units) result = plugin.compute_initial_guess( truth, current_forecast_predictor, predictor_of_mean_flag, estimate_coefficients_from_linear_model_flag, no_of_realizations=no_of_realizations) self.assertIsInstance(result, np.ndarray)
def test_basic_mean_predictor_value_check(self): """ Test that the plugin returns the expected values for the initial guess for the calibration coefficients, when the ensemble mean is used as the predictor. As coefficients are not estimated using a linear model, the default values for the initial guess are used. """ data = [1, 1, 0, 1] current_forecast_predictor = self.cube.collapsed( "realization", iris.analysis.MEAN) truth = self.cube.collapsed("realization", iris.analysis.MAX) distribution = "gaussian" desired_units = "degreesC" predictor_of_mean_flag = "mean" estimate_coefficients_from_linear_model_flag = False plugin = Plugin(distribution, desired_units) result = plugin.compute_initial_guess( truth, current_forecast_predictor, predictor_of_mean_flag, estimate_coefficients_from_linear_model_flag) self.assertArrayAlmostEqual(result, data)
def test_truth_data_is_fake_catch_warning(self, warning_list=None): """ Ensure that a ValueError with the expected text is generated, if the input data is None, rather than a cube. """ current_forecast = self.current_temperature_forecast_cube historic_forecasts = self.historic_temperature_forecast_cube truth = "fake" distribution = "gaussian" desired_units = "degreesC" plugin = Plugin(distribution, desired_units) plugin.estimate_coefficients_for_ngr(current_forecast, historic_forecasts, truth) self.assertTrue(len(warning_list) == 1) self.assertTrue( any(item.category == UserWarning for item in warning_list)) self.assertTrue("is not a Cube or CubeList" in str(warning_list[0]))
def test_coefficient_values_for_gaussian_distribution(self): """Ensure that the values for the optimised_coefficients match the expected values, and the coefficient names also match expected values for a Gaussian distribution.""" data = [ 4.55819380e-06, -8.02401974e-09, 1.66667055e+00, 1.00000011e+00 ] distribution = "gaussian" current_cycle = "20171110T0000Z" desired_units = "Celsius" plugin = Plugin(distribution, current_cycle, desired_units=desired_units) result = plugin.estimate_coefficients_for_ngr( self.historic_temperature_forecast_cube, self.temperature_truth_cube) self.assertArrayAlmostEqual(result.data, data) self.assertArrayEqual( result.coord("coefficient_name").points, self.coeff_names)
def test_truth_unit_conversion(self): """Ensure the expected optimised coefficients are generated, even if the input truth cube has different units.""" data = [ 4.55819380e-06, -8.02401974e-09, 1.66667055e+00, 1.00000011e+00 ] truth = self.temperature_truth_cube truth.convert_units("Fahrenheit") distribution = "gaussian" current_cycle = "20171110T0000Z" desired_units = "degreesC" plugin = Plugin(distribution, current_cycle, desired_units=desired_units) result = plugin.estimate_coefficients_for_ngr( self.historic_temperature_forecast_cube, truth) self.assertArrayAlmostEqual(result.data, data, decimal=5)
def test_mean_predictor_estimate_coefficients(self): """ Test that the plugin returns the expected values for the initial guess for the calibration coefficients, when the ensemble mean is used as the predictor. The coefficients are estimated using a linear model. """ data = np.array([1, 1, 2.66663, 1], dtype=np.float32) current_forecast_predictor = self.cube.collapsed( "realization", iris.analysis.MEAN) truth = self.cube.collapsed("realization", iris.analysis.MAX) distribution = "gaussian" desired_units = "degreesC" predictor_of_mean_flag = "mean" estimate_coefficients_from_linear_model_flag = True plugin = Plugin(distribution, desired_units) result = plugin.compute_initial_guess( truth, current_forecast_predictor, predictor_of_mean_flag, estimate_coefficients_from_linear_model_flag) self.assertArrayAlmostEqual(result, data, decimal=5)
def test_coefficient_values_for_gaussian_distribution_realizations(self): """Ensure that the values for the optimised_coefficients match the expected values, and the coefficient names also match expected values for a Gaussian distribution where the realizations are used as the predictor of the mean.""" import imp try: imp.find_module('statsmodels') statsmodels_found = True except ImportError: statsmodels_found = False if statsmodels_found: data = [-0.00114, -0.00006, 1.00037, -0.00196, 0.99999, -0.00315] else: data = [ 4.30804737e-02, 1.39042785e+00, 8.99047025e-04, 2.02661310e-01, 9.27197381e-01, 3.17407626e-01 ] distribution = "gaussian" current_cycle = "20171110T0000Z" desired_units = "Celsius" predictor_of_mean_flag = "realizations" expected_coeff_names = ([ 'gamma', 'delta', 'alpha', 'beta0', 'beta1', 'beta2' ]) plugin = Plugin(distribution, current_cycle, desired_units=desired_units, predictor_of_mean_flag=predictor_of_mean_flag) result = plugin.estimate_coefficients_for_ngr( self.historic_temperature_forecast_cube, self.temperature_truth_cube) self.assertArrayAlmostEqual(result.data, data, decimal=5) self.assertArrayEqual( result.coord("coefficient_name").points, expected_coeff_names)
def test_truth_data_length_zero_catch_warning(self, warning_list=None): """ Ensure that a ValueError with the expected text is generated, if the input data is None, rather than a cube. """ current_forecast = self.current_temperature_forecast_cube historic_forecasts = self.historic_temperature_forecast_cube truth = iris.cube.CubeList([]) distribution = "gaussian" desired_units = "degreesC" plugin = Plugin(distribution, desired_units) plugin.estimate_coefficients_for_ngr(current_forecast, historic_forecasts, truth) warning_msg = ("Insufficient input data present to estimate " "coefficients using NGR.") self.assertTrue( any(item.category == UserWarning for item in warning_list)) self.assertTrue(any(warning_msg in str(item) for item in warning_list))
def test_truth_data_has_wrong_time_catch_warning(self, warning_list=None): """ Ensure that a ValueError with the expected text is generated, if the input data is None, rather than a cube. """ current_forecast = self.current_temperature_forecast_cube historic_forecasts = self.historic_temperature_forecast_cube truth = self.temperature_truth_cube truth.coord("forecast_reference_time").points += 10 distribution = "gaussian" desired_units = "degreesC" plugin = Plugin(distribution, desired_units) plugin.estimate_coefficients_for_ngr(current_forecast, historic_forecasts, truth) warning_msg = "Unable to calibrate for the time points" self.assertTrue( any(item.category == UserWarning for item in warning_list)) self.assertTrue(any(warning_msg in str(item) for item in warning_list))
def test_members_predictor_estimate_coefficients(self): """ Test that the plugin returns the expected values for the initial guess for the calibration coefficients, when the ensemble mean is used as the predictor. The coefficients are estimated using a linear model. """ warnings.simplefilter("always") import imp try: statsmodels_found = imp.find_module('statsmodels') statsmodels_found = True except ImportError: statsmodels_found = False if statsmodels_found: data = [1., 1., 0.13559322, -0.11864407, 0.42372881, 0.69491525] else: data = [1, 1, 0, 1, 1, 1] cube = self.cube current_forecast_predictor = cube truth = cube.collapsed("realization", iris.analysis.MAX) distribution = "gaussian" desired_units = "degreesC" predictor_of_mean_flag = "members" no_of_members = 3 estimate_coefficients_from_linear_model_flag = True plugin = Plugin(distribution, desired_units) result = plugin.compute_initial_guess( truth, current_forecast_predictor, predictor_of_mean_flag, estimate_coefficients_from_linear_model_flag, no_of_members=no_of_members) self.assertArrayAlmostEqual(result, data)
def test_mismatching(self): """Test for when there is both a missing historic forecasts and a missing truth at different validity times. This results in the expected historic forecasts and the expected truths containing cubes at three matching validity times.""" partial_truth = self.truth[1:].merge_cube() expected_historical_forecasts = iris.cube.CubeList([ self.historic_forecasts[index] for index in (1, 3, 4) ]).merge_cube() expected_truth = iris.cube.CubeList( [self.truth[index] for index in (1, 3, 4)]).merge_cube() hf_result, truth_result = Plugin._filter_non_matching_cubes( self.partial_historic_forecasts, partial_truth) self.assertEqual(hf_result, expected_historical_forecasts) self.assertEqual(truth_result, expected_truth)
def setUp(self): """Set up coefficients cubes for when either the ensemble mean or the ensemble realizations have been used as the predictor. The coefficients have been constructed from the same underlying set of ensemble realizations, so application of these coefficients would be expected to give similar results. The values for the coefficients used to construct the coefficients cubes are taken from the SetupExpectedCoefficients class. These coefficients are the expected outputs from the tests to estimate the coefficients.""" super().setUp() # Set up a coefficients cube when using the ensemble mean as the # predictor. current_cycle = "20171110T0000Z" estimator = (EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius")) self.coeffs_from_mean = (estimator.create_coefficients_cube( self.expected_mean_predictor_gaussian, self.current_temperature_forecast_cube)) # Set up a coefficients cube when using the ensemble realization as the # predictor and the coefficients have been estimated using statsmodels. estimator = (EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius", predictor_of_mean_flag="realizations")) self.coeffs_from_statsmodels_realizations = ( estimator.create_coefficients_cube( self.expected_realizations_gaussian_statsmodels, self.current_temperature_forecast_cube)) # Set up a coefficients cube when using the ensemble realization as the # predictor and the coefficients have been estimated without using # statsmodels. estimator = (EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius", predictor_of_mean_flag="realizations")) self.coeffs_from_no_statsmodels_realizations = ( estimator.create_coefficients_cube( self.expected_realizations_gaussian_no_statsmodels, self.current_temperature_forecast_cube)) # Some expected data that are used in various tests. self.expected_calibrated_predictor_mean = (np.array( [[273.7854, 274.6913, 275.4461], [276.8652, 277.6502, 278.405], [279.492, 280.1562, 280.9715]])) self.expected_calibrated_variance_mean = (np.array( [[0.1952, 0.1974, 0.0117], [0.0226, 0.0197, 0.0117], [0.0532, 0.0029, 0.0007]])) self.expected_calibrated_predictor_statsmodels_realizations = ( np.array([[274.1395, 275.0975, 275.258], [276.9771, 277.3487, 278.3144], [280.0085, 280.2506, 281.1632]])) self.expected_calibrated_variance_statsmodels_realizations = (np.array( [[0.8973, 0.9073, 0.0536], [0.1038, 0.0904, 0.0536], [0.2444, 0.0134, 0.0033]])) self.expected_calibrated_predictor_no_statsmodels_realizations = ( np.array([[273.4695, 274.4673, 275.3034], [276.8648, 277.733, 278.5632], [279.7562, 280.4913, 281.3889]])) self.expected_calibrated_variance_no_statsmodels_realizations = ( np.array([[0.9344, 0.9448, 0.0558], [0.1081, 0.0941, 0.0558], [0.2545, 0.0139, 0.0035]]))
def setUp(self): """Set up coefficients cubes for when either the ensemble mean or the ensemble realizations have been used as the predictor. The coefficients have been constructed from the same underlying set of ensemble realizations, so application of these coefficients would be expected to give similar results. The values for the coefficients used to construct the coefficients cubes are taken from the SetupExpectedCoefficients class. These coefficients are the expected outputs from the tests to estimate the coefficients.""" super().setUp() # Set up a coefficients cube when using the ensemble mean as the # predictor. current_cycle = "20171110T0000Z" estimator = (EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius")) self.coeffs_from_mean = (estimator.create_coefficients_cube( self.expected_mean_predictor_gaussian, self.current_temperature_forecast_cube)) # Set up a coefficients cube when using the ensemble realization as the # predictor and the coefficients have been estimated using statsmodels. estimator = (EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius", predictor_of_mean_flag="realizations")) self.coeffs_from_statsmodels_realizations = ( estimator.create_coefficients_cube( self.expected_realizations_gaussian_statsmodels, self.current_temperature_forecast_cube)) # Set up a coefficients cube when using the ensemble realization as the # predictor and the coefficients have been estimated without using # statsmodels. estimator = (EstimateCoefficientsForEnsembleCalibration( "gaussian", current_cycle, desired_units="Celsius", predictor_of_mean_flag="realizations")) self.coeffs_from_no_statsmodels_realizations = ( estimator.create_coefficients_cube( self.expected_realizations_gaussian_no_statsmodels, self.current_temperature_forecast_cube)) # Some expected data that are used in various tests. self.expected_loc_param_mean = (np.array( [[273.7854, 274.6913, 275.4461], [276.8652, 277.6502, 278.405], [279.492, 280.1562, 280.9715]], dtype=np.float32)) self.expected_scale_param_mean = (np.array( [[0.1952, 0.1974, 0.0117], [0.0226, 0.0197, 0.0117], [0.0532, 0.0029, 0.0007]], dtype=np.float32)) self.expected_loc_param_statsmodels_realizations = (np.array( [[274.1395, 275.0975, 275.258], [276.9771, 277.3487, 278.3144], [280.0085, 280.2506, 281.1632]], dtype=np.float32)) self.expected_loc_param_no_statsmodels_realizations = (np.array( [[273.4695, 274.4673, 275.3034], [276.8648, 277.733, 278.5632], [279.7562, 280.4913, 281.3889]], dtype=np.float32)) # Create output cubes with the expected data. self.expected_loc_param_mean_cube = set_up_variable_cube( self.expected_loc_param_mean, name="location_parameter", units="K", attributes=MANDATORY_ATTRIBUTE_DEFAULTS) self.expected_scale_param_mean_cube = (set_up_variable_cube( self.expected_scale_param_mean, name="scale_parameter", units="Kelvin^2", attributes=MANDATORY_ATTRIBUTE_DEFAULTS))
def process(*cubes: cli.inputcube, distribution, truth_attribute, cycletime, units=None, predictor_of_mean='mean', tolerance: float = 0.01, max_iterations: int = 1000): """Estimate coefficients for Ensemble Model Output Statistics. Loads in arguments for estimating coefficients for Ensemble Model Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian Regression (NGR). Two sources of input data must be provided: historical forecasts and historical truth data (to use in calibration). The estimated coefficients are output as a cube. Args: cubes (list of iris.cube.Cube): A list of cubes containing the historical forecasts and corresponding truth used for calibration. They must have the same cube name and will be separated based on the truth attribute. Optionally this may also contain a single land-sea mask cube on the same domain as the historic forecasts and truth (where land points are set to one and sea points are set to zero). distribution (str): The distribution that will be used for calibration. This will be dependant upon the input phenomenon. truth_attribute (str): An attribute and its value in the format of "attribute=value", which must be present on historical truth cubes. cycletime (str): This denotes the cycle at which forecasts will be calibrated using the calculated EMOS coefficients. The validity time in the output coefficients cube will be calculated relative to this cycletime. This cycletime is in the format YYYYMMDDTHHMMZ. units (str): The units that calibration should be undertaken in. The historical forecast and truth will be converted as required. predictor_of_mean (str): String to specify the input to calculate the calibrated mean. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. tolerance (float): The tolerance for the Continuous Ranked Probability Score (CRPS) calculated by the minimisation. Once multiple iterations result in a CRPS equal to the same value within the specified tolerance, the minimisation will terminate. max_iterations (int): The maximum number of iterations allowed until the minimisation has converged to a stable solution. If the maximum number of iterations is reached but the minimisation has not yet converged to a stable solution, then the available solution is used anyway, and a warning is raised. If the predictor_of_mean is "realizations", then the number of iterations may require increasing, as there will be more coefficients to solve. Returns: iris.cube.Cube: Cube containing the coefficients estimated using EMOS. The cube contains a coefficient_index dimension coordinate and a coefficient_name auxiliary coordinate. Raises: RuntimeError: An unexpected number of distinct cube names were passed in. RuntimeError: More than one cube was identified as a land-sea mask. RuntimeError: Missing truth or historical forecast in input cubes. """ from collections import OrderedDict from improver.utilities.cube_manipulation import MergeCubes from improver.ensemble_calibration.ensemble_calibration import ( EstimateCoefficientsForEnsembleCalibration) grouped_cubes = {} for cube in cubes: grouped_cubes.setdefault(cube.name(), []).append(cube) if len(grouped_cubes) == 1: # Only one group - all forecast/truth cubes land_sea_mask = None diag_name = list(grouped_cubes.keys())[0] elif len(grouped_cubes) == 2: # Two groups - the one with exactly one cube matching a name should # be the land_sea_mask, since we require more than 2 cubes in # the forecast/truth group grouped_cubes = OrderedDict(sorted(grouped_cubes.items(), key=lambda kv: len(kv[1]))) # landsea name should be the key with the lowest number of cubes (1) landsea_name, diag_name = list(grouped_cubes.keys()) land_sea_mask = grouped_cubes[landsea_name][0] if len(grouped_cubes[landsea_name]) != 1: raise RuntimeError('Expected one cube for land-sea mask.') else: raise RuntimeError('Must have cubes with 1 or 2 distinct names.') # split non-land_sea_mask cubes on forecast vs truth truth_key, truth_value = truth_attribute.split('=') input_cubes = grouped_cubes[diag_name] grouped_cubes = {'truth': [], 'historical forecast': []} for cube in input_cubes: if cube.attributes.get(truth_key) == truth_value: grouped_cubes['truth'].append(cube) else: grouped_cubes['historical forecast'].append(cube) missing_inputs = ' and '.join(k for k, v in grouped_cubes.items() if not v) if missing_inputs: raise RuntimeError('Missing ' + missing_inputs + ' input.') truth = MergeCubes()(grouped_cubes['truth']) forecast = MergeCubes()(grouped_cubes['historical forecast']) return EstimateCoefficientsForEnsembleCalibration( distribution, cycletime, desired_units=units, predictor_of_mean_flag=predictor_of_mean, tolerance=tolerance, max_iterations=max_iterations).process( forecast, truth, landsea_mask=land_sea_mask)
def process(historic_forecast, truth, combined, historic_forecast_dict, truth_dict, distribution, cycletime, units=None, predictor_of_mean='mean', max_iterations=1000): """Module for estimate coefficients for Ensemble Model Output Statistics. Loads in arguments for estimating coefficients for Ensemble Model Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian Regression (NGR). Two sources of input data must be provided: historical forecasts and historical truth data (to use in calibration). The estimated coefficients are output as a cube. Args: historic_forecast (iris.cube.Cube): The cube containing the historical forecasts used for calibration. truth (iris.cube.Cube): The cube containing the truth used for calibration. combined (iris.cube.CubeList): A cubelist containing a combination of historic forecasts and associated truths. historic_forecast_dict (dict): Dictionary specifying the metadata that defines the historic forecast. For example: :: { "attributes": { "mosg__model_configuration": "uk_ens" } } truth_dict (dict): Dictionary specifying the metadata that defines the truth. For example: :: { "attributes": { "mosg__model_configuration": "uk_det" } } distribution (str): The distribution that will be used for calibration. This will be dependant upon the input phenomenon. cycletime (str): This denotes the cycle at which forecasts will be calibrated using the calculated EMOS coefficients. The validity time in the output coefficients cube will be calculated relative to this cycletime. This cycletime is in the format YYYYMMDDTHHMMZ. units (str): The units that calibration should be undertaken in. The historical forecast and truth will be converted as required. Default is None. predictor_of_mean (str): String to specify the input to calculate the calibrated mean. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. Default is 'mean'. max_iterations (int): The maximum number of iterations allowed until the minimisation has converged to a stable solution. If the maximum number of iterations is reached but the minimisation has not yet converged to a stable solution, then the available solution is used anyway, and a warning is raised. If the predictor_of_mean is "realizations", then the number of iterations may require increasing, as there will be more coefficients to solve. Default is 1000. Returns: result (iris.cube.Cube or None): Cube containing the coefficients estimated using EMOS. The cube contains a coefficient_index dimension coordinate and a coefficient_name auxiliary coordinate. If no historic forecasts or truths are found then None is returned. Raises: ValueError: If the historic forecast and truth inputs are specified, then the combined input, historic forecast dictionary and truth dictionary should not be specified. ValueError: If one of the historic forecast or truth inputs are specified, then they should both be specified. ValueError: All of the combined_filepath, historic_forecast_identifier and truth_identifier arguments should be specified if one of the arguments are specified. Warns: UserWarning: The metadata to identify the desired historic forecast or truth has found nothing matching the metadata information supplied. """ # The logic for the if statements below is: # 1. Check whether either the historic_forecast or the truth exists. # 2. Check that both the historic forecast and the truth exists, otherwise, # raise an error. # 3. Check that none of the combined, historic forecast dictionary or # truth dictionary inputs have been provided, as these arguments are # invalid, if the historic forecast and truth inputs have been provided. if any([historic_forecast, truth]): if all([historic_forecast, truth]): if any([combined, historic_forecast_dict, truth_dict]): msg = ("If the historic_filepath and truth_filepath arguments " "are specified then none of the the combined_filepath, " "historic_forecast_identifier and truth_identifier " "arguments should be specified.") raise ValueError(msg) else: msg = ("Both the historic_filepath and truth_filepath arguments " "should be specified if one of these arguments are " "specified.") raise ValueError(msg) # This if block follows the logic: # 1. Check whether any of the combined, historic forecast dictionary or # truth dictionary inputs have been provided. # 2. If not all of these inputs have been provided then raise an error, # as all of these inputs are required to separate the combined input # into the historic forecasts and truths. if any([combined, historic_forecast_dict, truth_dict]): if not all([combined, historic_forecast_dict, truth_dict]): msg = ("All of the combined_filepath, " "historic_forecast_identifier and truth_identifier " "arguments should be specified if one of the arguments are " "specified.") raise ValueError(msg) try: if combined is not None: historic_forecast, truth = SplitHistoricForecastAndTruth( historic_forecast_dict, truth_dict).process(combined) except ValueError as err: # This error arises if the metadata to identify the desired historic # forecast or truth has found nothing matching the metadata # information supplied. if str(err).startswith("The metadata to identify the desired"): warnings.warn(str(err)) result = None else: raise else: result = EstimateCoefficientsForEnsembleCalibration( distribution, cycletime, desired_units=units, predictor_of_mean_flag=predictor_of_mean, max_iterations=max_iterations).process(historic_forecast, truth) return result
def main(argv=None): """Load in arguments for estimating coefficients for Ensemble Model Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian Regression (NGR). 2 sources of input data must be provided: historical forecasts and historical truth data (to use in calibration). The estimated coefficients are written to a netCDF file. """ parser = ArgParser( description='Estimate coefficients for Ensemble Model Output ' 'Statistics (EMOS), otherwise known as Non-homogeneous ' 'Gaussian Regression (NGR)') parser.add_argument('distribution', metavar='DISTRIBUTION', choices=['gaussian', 'truncated gaussian'], help='The distribution that will be used for ' 'calibration. This will be dependent upon the ' 'input phenomenon. This has to be supported by ' 'the minimisation functions in ' 'ContinuousRankedProbabilityScoreMinimisers.') parser.add_argument('cycletime', metavar='CYCLETIME', type=str, help='This denotes the cycle at which forecasts ' 'will be calibrated using the calculated ' 'EMOS coefficients. The validity time in the ' 'output coefficients cube will be calculated ' 'relative to this cycletime. ' 'This cycletime is in the format ' 'YYYYMMDDTHHMMZ.') # Filepaths for historic and truth data. parser.add_argument('historic_filepath', metavar='HISTORIC_FILEPATH', help='A path to an input NetCDF file containing the ' 'historic forecast(s) used for calibration.') parser.add_argument('truth_filepath', metavar='TRUTH_FILEPATH', help='A path to an input NetCDF file containing the ' 'historic truth analyses used for calibration.') parser.add_argument('output_filepath', metavar='OUTPUT_FILEPATH', help='The output path for the processed NetCDF') # Optional arguments. parser.add_argument('--units', metavar='UNITS', help='The units that calibration should be undertaken ' 'in. The historical forecast and truth will be ' 'converted as required.') parser.add_argument('--predictor_of_mean', metavar='PREDICTOR_OF_MEAN', choices=['mean', 'realizations'], default='mean', help='String to specify the predictor used to ' 'calibrate the forecast mean. Currently the ' 'ensemble mean ("mean") and the ensemble ' 'realizations ("realizations") are supported as ' 'options. Default: "mean".') parser.add_argument('--max_iterations', metavar='MAX_ITERATIONS', type=np.int32, default=1000, help='The maximum number of iterations allowed ' 'until the minimisation has converged to a ' 'stable solution. If the maximum number ' 'of iterations is reached, but the ' 'minimisation has not yet converged to a ' 'stable solution, then the available solution ' 'is used anyway, and a warning is raised.' 'This may be modified for testing purposes ' 'but otherwise kept fixed. If the ' 'predictor_of_mean is "realizations", ' 'then the number of iterations may require ' 'increasing, as there will be more coefficients ' 'to solve for.') args = parser.parse_args(args=argv) historic_forecast = load_cube(args.historic_filepath) truth = load_cube(args.truth_filepath) # Estimate coefficients using Ensemble Model Output Statistics (EMOS). estcoeffs = EstimateCoefficientsForEnsembleCalibration( args.distribution, args.cycletime, desired_units=args.units, predictor_of_mean_flag=args.predictor_of_mean, max_iterations=args.max_iterations) coefficients = ( estcoeffs.process(historic_forecast, truth)) save_netcdf(coefficients, args.output_filepath)