def test_truncated_gaussian_realizations_data_check(self): """ Test that the plugin returns an iris.cube.CubeList of wind_speed cubes with the expected data, where the plugin returns a cubelist of, firstly, the predictor and, secondly the variance. The ensemble realizations are the predictor. """ distribution = "truncated gaussian" predictor_of_mean_flag = "realizations" plugin = Plugin(distribution, predictor_of_mean_flag=predictor_of_mean_flag) calibrated_predictor, calibrated_variance = plugin.process( self.current_wind_speed_forecast_cube, self.historic_wind_speed_forecast_cube, self.wind_speed_truth_cube) self.assertCalibratedVariablesAlmostEqual( calibrated_predictor.data, self.expected_truncated_gaussian_realization_no_statsmodels) self.assertCalibratedVariablesAlmostEqual( calibrated_variance.data, self.expected_truncated_gaussian_variance_no_statsmodels) # The assertions below are for comparison to the results # generated from using the ensemble mean as the predictor. # In this case, the expectation is for there to be broad agreement # between whether either the ensemble mean or the ensemble # realizations, but the results would not be expected to match exactly. self.assertArrayAlmostEqual(calibrated_predictor.data, self.expected_truncated_gaussian_mean_data, decimal=0) self.assertArrayAlmostEqual( calibrated_variance.data, self.expected_truncated_gaussian_variance_data, decimal=0)
def test_temperature_data_check(self): """ Test that the plugin returns an iris.cube.CubeList of temperature cubes with the expected data, where the plugin returns a cubelist of, firstly, the predictor and, secondly the variance. The ensemble mean is the predictor. """ predictor_data = np.array( [[231.15002892, 242.40003015, 253.65003137], [264.9000326, 276.15003383, 287.40003505], [298.65003628, 309.90003751, 321.15003874]]) variance_data = np.array( [[2.07777316e-11, 2.07777316e-11, 2.07777316e-11], [2.07777316e-11, 2.07777316e-11, 2.07777316e-11], [2.07777316e-11, 2.07777316e-11, 2.07777316e-11]]) calibration_method = "ensemble model output_statistics" distribution = "gaussian" desired_units = "degreesC" plugin = Plugin(calibration_method, distribution, desired_units) result = plugin.process( self.current_temperature_forecast_cube, self.historic_temperature_forecast_cube, self.temperature_truth_cube) self.assertArrayAlmostEqual(result[0][0].data, predictor_data) self.assertArrayAlmostEqual(result[1][0].data, variance_data)
def test_wind_speed_data_check(self): """ Test that the plugin returns an iris.cube.CubeList of wind_speed cubes with the expected data, where the plugin returns a cubelist of, firstly, the predictor and, secondly the variance. The ensemble mean is the predictor. """ predictor_data = np.array( [[2.9999862, 10.49998827, 17.99999034], [25.4999924, 32.99999447, 40.49999654], [47.99999861, 55.50000068, 63.00000275]]) variance_data = np.array( [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]]) calibration_method = "ensemble model output_statistics" distribution = "truncated gaussian" desired_units = "m s^-1" plugin = Plugin(calibration_method, distribution, desired_units) result = plugin.process( self.current_wind_speed_forecast_cube, self.historic_wind_speed_forecast_cube, self.wind_speed_truth_cube) self.assertArrayAlmostEqual(result[0][0].data, predictor_data) self.assertArrayAlmostEqual(result[1][0].data, variance_data)
def test_basic_truncated_gaussian(self): """ Test that the plugin returns an iris.cube.CubeList with the desired length. The ensemble mean is the predictor. """ distribution = "truncated gaussian" plugin = Plugin(distribution) result = plugin.process(self.current_wind_speed_forecast_cube, self.historic_wind_speed_forecast_cube, self.wind_speed_truth_cube) self.assertIsInstance(result, tuple) self.assertEqual(len(result), 2)
def test_temperature_realizations_data_check(self): """ Test that the plugin returns an iris.cube.CubeList of temperature cubes with the expected data, where the plugin returns a cubelist of, firstly, the predictor and, secondly the variance. The ensemble realizations is the predictor. """ import imp try: imp.find_module('statsmodels') statsmodels_found = True import statsmodels.api as sm self.sm = sm except ImportError: statsmodels_found = False if statsmodels_found: predictor_data = np.array( [[231.1493, 242.3992, 253.6492], [264.8991, 276.149, 287.399], [298.649, 309.8989, 321.1488]], dtype=np.float32) variance_data = np.array([[0.000001, 0.000001, 0.000001], [0.000001, 0.000001, 0.000001], [0.000001, 0.000001, 0.000001]], dtype=np.float32) else: predictor_data = np.array( [[230.53659896, 241.80363361, 253.07066826], [264.33770292, 275.60473757, 286.87177222], [298.13880687, 309.40584153, 320.67287618]], dtype=np.float32) variance_data = np.array([[18.04589231, 18.04589231, 18.04589231], [18.04589231, 18.04589231, 18.04589231], [18.04589231, 18.04589231, 18.04589231]], dtype=np.float32) calibration_method = "ensemble model output_statistics" distribution = "gaussian" desired_units = "degreesC" predictor_of_mean_flag = "realizations" plugin = Plugin(calibration_method, distribution, desired_units, predictor_of_mean_flag=predictor_of_mean_flag) calibrated_predictor, calibrated_variance = plugin.process( self.current_temperature_forecast_cube, self.historic_temperature_forecast_cube, self.temperature_truth_cube) self.assertArrayAlmostEqual(calibrated_predictor.data, predictor_data, decimal=4) self.assertArrayAlmostEqual(calibrated_variance.data, variance_data, decimal=4)
def test_alternative_calibration_name(self): """ Test that the plugin returns an iris.cube.CubeList. The ensemble mean is the predictor. """ calibration_method = "nonhomogeneous gaussian regression" distribution = "gaussian" desired_units = "degreesC" plugin = Plugin(calibration_method, distribution, desired_units) result = plugin.process(self.current_temperature_forecast_cube, self.historic_temperature_forecast_cube, self.temperature_truth_cube) self.assertIsInstance(result, CubeList)
def test_basic_gaussian_realizations(self): """ Test that the plugin returns an iris.cube.CubeList with the desired length. The ensemble realizations is the predictor. """ distribution = "gaussian" predictor_of_mean_flag = "realizations" plugin = Plugin(distribution, predictor_of_mean_flag=predictor_of_mean_flag) result = plugin.process(self.current_temperature_forecast_cube, self.historic_temperature_forecast_cube, self.temperature_truth_cube) self.assertIsInstance(result, tuple) self.assertEqual(len(result), 2)
def test_basic_temperature(self): """ Test that the plugin returns an iris.cube.CubeList with the desired length. The ensemble mean is the predictor. """ calibration_method = "ensemble model output statistics" distribution = "gaussian" desired_units = "degreesC" plugin = Plugin(calibration_method, distribution, desired_units) result = plugin.process(self.current_temperature_forecast_cube, self.historic_temperature_forecast_cube, self.temperature_truth_cube) self.assertIsInstance(result, CubeList) self.assertEqual(len(result), 2)
def test_basic_wind_speed(self): """ Test that the plugin returns an iris.cube.CubeList with the desired length. The ensemble mean is the predictor. """ calibration_method = "ensemble model output_statistics" distribution = "truncated gaussian" desired_units = "m s^-1" plugin = Plugin(calibration_method, distribution, desired_units) result = plugin.process(self.current_wind_speed_forecast_cube, self.historic_wind_speed_forecast_cube, self.wind_speed_truth_cube) self.assertIsInstance(result, tuple) self.assertEqual(len(result), 2)
def test_unknown_calibration_method(self): """ Test that the plugin raises an error if an unknown calibration method is requested. The ensemble mean is the predictor. """ calibration_method = "unknown" distribution = "gaussian" desired_units = "degreesC" plugin = Plugin(calibration_method, distribution, desired_units) msg = "unknown" with self.assertRaisesRegexp(ValueError, msg): plugin.process(self.current_temperature_forecast_cube, self.historic_temperature_forecast_cube, self.temperature_truth_cube)
def test_alternative_calibration_name(self): """ Test that the plugin returns the calibrated predictor and the calibrated variance if an alternative name for the calibration is provided. The ensemble mean is the predictor. """ calibration_method = "nonhomogeneous gaussian regression" distribution = "gaussian" desired_units = "degreesC" plugin = Plugin(calibration_method, distribution, desired_units) calibrated_predictor, calibrated_variance = plugin.process( self.current_temperature_forecast_cube, self.historic_temperature_forecast_cube, self.temperature_truth_cube) self.assertIsInstance(calibrated_predictor, iris.cube.Cube) self.assertIsInstance(calibrated_variance, iris.cube.Cube)
def test_wind_speed_members_data_check(self): """ Test that the plugin returns an iris.cube.CubeList of wind_speed cubes with the expected data, where the plugin returns a cubelist of, firstly, the predictor and, secondly the variance. The ensemble members is the predictor. """ import imp try: statsmodels_found = imp.find_module('statsmodels') statsmodels_found = True import statsmodels.api as sm self.sm = sm except ImportError: statsmodels_found = False if statsmodels_found: predictor_data = np.array( [[3.15758874, 10.63961216, 18.12163557], [25.60365899, 33.08568241, 40.56770583], [48.04972924, 55.53175266, 63.01377608]]) variance_data = np.array( [[0.01406566, 0.01406566, 0.01406566], [0.01406566, 0.01406566, 0.01406566], [0.01406566, 0.01406566, 0.01406566]]) else: predictor_data = np.array( [[2.05799912, 9.73470204, 17.41140496], [25.08810788, 32.7648108, 40.44151372], [48.11821664, 55.79491955, 63.47162247]]) variance_data = np.array( [[4.26987243, 4.26987243, 4.26987243], [4.26987243, 4.26987243, 4.26987243], [4.26987243, 4.26987243, 4.26987243]]) calibration_method = "ensemble model output_statistics" distribution = "truncated gaussian" desired_units = "m s^-1" predictor_of_mean_flag = "members" plugin = Plugin( calibration_method, distribution, desired_units, predictor_of_mean_flag=predictor_of_mean_flag) result = plugin.process( self.current_wind_speed_forecast_cube, self.historic_wind_speed_forecast_cube, self.wind_speed_truth_cube) self.assertArrayAlmostEqual(result[0][0].data, predictor_data) self.assertArrayAlmostEqual(result[1][0].data, variance_data)
def test_temperature_members_data_check(self): """ Test that the plugin returns an iris.cube.CubeList of temperature cubes with the expected data, where the plugin returns a cubelist of, firstly, the predictor and, secondly the variance. The ensemble members is the predictor. """ import imp try: statsmodels_found = imp.find_module('statsmodels') statsmodels_found = True import statsmodels.api as sm self.sm = sm except ImportError: statsmodels_found = False if statsmodels_found: predictor_data = np.array( [[230.72248097, 241.94440325, 253.16632553], [264.38824782, 275.6101701, 286.83209238], [298.05401466, 309.27593695, 320.49785923]]) variance_data = np.array( [[0.05635014, 0.05635014, 0.05635014], [0.05635014, 0.05635014, 0.05635014], [0.05635014, 0.05635014, 0.05635014]]) else: predictor_data = np.array( [[230.53659896, 241.80363361, 253.07066826], [264.33770292, 275.60473757, 286.87177222], [298.13880687, 309.40584153, 320.67287618]]) variance_data = np.array( [[18.04589231, 18.04589231, 18.04589231], [18.04589231, 18.04589231, 18.04589231], [18.04589231, 18.04589231, 18.04589231]]) calibration_method = "ensemble model output_statistics" distribution = "gaussian" desired_units = "degreesC" predictor_of_mean_flag = "members" plugin = Plugin( calibration_method, distribution, desired_units, predictor_of_mean_flag=predictor_of_mean_flag) result = plugin.process( self.current_temperature_forecast_cube, self.historic_temperature_forecast_cube, self.temperature_truth_cube) self.assertArrayAlmostEqual(result[0][0].data, predictor_data) self.assertArrayAlmostEqual(result[1][0].data, variance_data)
def test_truncated_gaussian_data_check(self): """ Test that the plugin returns an iris.cube.CubeList of wind_speed cubes with the expected data, where the plugin returns a cubelist of, firstly, the predictor and, secondly the variance. The ensemble mean is the predictor. """ distribution = "truncated gaussian" plugin = Plugin(distribution) calibrated_predictor, calibrated_variance = plugin.process( self.current_wind_speed_forecast_cube, self.historic_wind_speed_forecast_cube, self.wind_speed_truth_cube) self.assertCalibratedVariablesAlmostEqual( calibrated_predictor.data, self.expected_truncated_gaussian_mean_data) self.assertCalibratedVariablesAlmostEqual( calibrated_variance.data, self.expected_truncated_gaussian_variance_data)
def test_gaussian_data_check_max_iterations(self): """ Test that the plugin returns an iris.cube.CubeList of temperature cubes with the expected data, where the plugin returns a cubelist of, firstly, the predictor and, secondly the variance when the maximum number of iterations is specified. The ensemble mean is the predictor. """ distribution = "gaussian" plugin = Plugin(distribution, max_iterations=10000) calibrated_predictor, calibrated_variance = plugin.process( self.current_temperature_forecast_cube, self.historic_temperature_forecast_cube, self.temperature_truth_cube) self.assertCalibratedVariablesAlmostEqual( calibrated_predictor.data, self.expected_gaussian_mean_data) self.assertCalibratedVariablesAlmostEqual( calibrated_variance.data, self.expected_gaussian_variance_data)
def test_wind_speed_data_check(self): """ Test that the plugin returns an iris.cube.CubeList of wind_speed cubes with the expected data, where the plugin returns a cubelist of, firstly, the predictor and, secondly the variance. The ensemble mean is the predictor. """ predictor_data = np.array( [[2.9999862, 10.499988, 17.999989], [25.49999, 32.999992, 40.499992], [47.999996, 55.499996, 63.]], dtype=np.float32) variance_data = np.array([[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]], dtype=np.float32) calibration_method = "ensemble model output_statistics" distribution = "truncated gaussian" desired_units = "m s^-1" plugin = Plugin(calibration_method, distribution, desired_units) calibrated_predictor, calibrated_variance = plugin.process( self.current_wind_speed_forecast_cube, self.historic_wind_speed_forecast_cube, self.wind_speed_truth_cube) self.assertArrayAlmostEqual(calibrated_predictor.data, predictor_data) self.assertArrayAlmostEqual(calibrated_variance.data, variance_data)
def main(argv=None): """Do ensemble calibration using the EnsembleCalibration plugin. """ parser = ArgParser( description='Apply the requested ensemble calibration method using ' 'the current forecast (to be calibrated) in the form of ' 'realizations, probabilities, or percentiles, historical ' 'forecasts in the form of realizations and historical truth data ' '(to use in calibration). The mean and variance output from the ' 'EnsembleCalibration plugin can be written to an output file ' 'if required. If the current forecast is supplied in the form of ' 'probabilities or percentiles, these are converted to realizations ' 'prior to calibration. After calibration, the mean and variance ' 'computed in the calibration are converted to match the format of the ' 'current forecast i.e. if realizations are input, realizations ' 'are output, if probabilities are input, probabilities are output, ' 'and if percentiles are input, percentiles are output.' 'If realizations are input, realizations are regenerated using ' 'Ensemble Copula Coupling.') # Arguments for EnsembleCalibration parser.add_argument( 'units', metavar='UNITS_TO_CALIBRATE_IN', help='The unit that calibration should be undertaken in. The current ' 'forecast, historical forecast and truth will be converted as ' 'required.') parser.add_argument( 'distribution', metavar='DISTRIBUTION', choices=['gaussian', 'truncated gaussian'], help='The distribution that will be used for calibration. This will ' 'be dependent upon the input phenomenon. This has to be ' 'supported by the minimisation functions in ' 'ContinuousRankedProbabilityScoreMinimisers.') # Filepaths for current, historic and truth data. parser.add_argument( 'input_filepath', metavar='INPUT_FILE', help='A path to an input NetCDF file containing the current forecast ' 'to be processed. The file provided could be in the form of ' 'realizations, probabilities or percentiles.') parser.add_argument( 'historic_filepath', metavar='HISTORIC_DATA_FILE', help='A path to an input NetCDF file containing the historic ' 'forecast(s) used for calibration. The file provided must be in ' 'the form of realizations.') parser.add_argument( 'truth_filepath', metavar='TRUTH_DATA_FILE', help='A path to an input NetCDF file containing the historic truth ' 'analyses used for calibration.') parser.add_argument('output_filepath', metavar='OUTPUT_FILE', help='The output path for the processed NetCDF') # Optional arguments. parser.add_argument( '--predictor_of_mean', metavar='CALIBRATE_MEAN_FLAG', choices=['mean', 'realizations'], default='mean', help='String to specify the input to calculate the calibrated mean. ' 'Currently the ensemble mean ("mean") and the ensemble ' 'realizations ("realizations") are supported as the predictors. ' 'Default: "mean".') parser.add_argument( '--save_mean', metavar='MEAN_FILE', default=False, help='Option to save the mean output from EnsembleCalibration plugin. ' 'If used, a path to save the output to must be provided.') parser.add_argument( '--save_variance', metavar='VARIANCE_FILE', default=False, help='Option to save the variance output from EnsembleCalibration ' 'plugin. If used, a path to save the output to must be provided.') parser.add_argument( '--num_realizations', metavar='NUMBER_OF_REALIZATIONS', default=None, type=np.int32, help='Optional argument to specify the number of ' 'ensemble realizations to produce. ' 'If the current forecast is input as probabilities or ' 'percentiles then this argument is used to create the requested ' 'number of realizations. In addition, this argument is used to ' 'construct the requested number of realizations from the mean ' 'and variance output after applying the EMOS coefficients.' 'Default will be the number of realizations in the raw input ' 'file, if realizations are provided as input, otherwise if the ' 'input format is probabilities or percentiles, then an error ' 'will be raised if no value is provided.') parser.add_argument( '--random_ordering', default=False, action='store_true', help='Option to reorder the post-processed forecasts randomly. If not ' 'set, the ordering of the raw ensemble is used. This option is ' 'only valid when the input format is realizations.') parser.add_argument( '--random_seed', metavar='RANDOM_SEED', default=None, help='Option to specify a value for the random seed for testing ' 'purposes, otherwise, the default random seed behaviour is ' 'utilised. The random seed is used in the generation of the ' 'random numbers used for either the random_ordering option to ' 'order the input percentiles randomly, rather than use the ' 'ordering from the raw ensemble, or for splitting tied values ' 'within the raw ensemble, so that the values from the input ' 'percentiles can be ordered to match the raw ensemble.') parser.add_argument( '--ecc_bounds_warning', default=False, action='store_true', help='If True, where the percentiles exceed the ECC bounds range, ' 'raise a warning rather than an exception. This occurs when the ' 'current forecast is in the form of probabilities and is ' 'converted to percentiles, as part of converting the input ' 'probabilities into realizations.') parser.add_argument( '--max_iterations', metavar='MAX_ITERATIONS', type=np.int32, default=1000, help='The maximum number of iterations allowed until the minimisation ' 'has converged to a stable solution. If the maximum number of ' 'iterations is reached, but the minimisation has not yet ' 'converged to a stable solution, then the available solution is ' 'used anyway, and a warning is raised. This may be modified for ' 'testing purposes but otherwise kept fixed. If the ' 'predictor_of_mean is "realizations", then the number of ' 'iterations may require increasing, as there will be more ' 'coefficients to solve for.') args = parser.parse_args(args=argv) current_forecast = load_cube(args.input_filepath) historic_forecast = load_cube(args.historic_filepath) truth = load_cube(args.truth_filepath) original_current_forecast = current_forecast.copy() msg = ("The current forecast has been provided as {0}. " "These {0} need to be converted to realizations " "for ensemble calibration. The args.num_realizations " "argument is used to define the number of realizations " "to construct from the input {0}, so if the " "current forecast is provided as {0} then " "args.num_realizations must be defined.") try: find_percentile_coordinate(current_forecast) input_forecast_type = "percentiles" except CoordinateNotFoundError: input_forecast_type = "realizations" if current_forecast.name().startswith("probability_of"): input_forecast_type = "probabilities" # If probabilities, convert to percentiles. conversion_plugin = GeneratePercentilesFromProbabilities( ecc_bounds_warning=args.ecc_bounds_warning) elif input_forecast_type == "percentiles": # If percentiles, resample percentiles so that the percentiles are # evenly spaced. conversion_plugin = ResamplePercentiles( ecc_bounds_warning=args.ecc_bounds_warning) # If percentiles, resample percentiles and then rebadge. # If probabilities, generate percentiles and then rebadge. if input_forecast_type in ["percentiles", "probabilities"]: if not args.num_realizations: raise ValueError(msg.format(input_forecast_type)) current_forecast = conversion_plugin.process( current_forecast, no_of_percentiles=args.num_realizations) current_forecast = ( RebadgePercentilesAsRealizations().process(current_forecast)) # Default number of ensemble realizations is the number in # the raw forecast. if not args.num_realizations: args.num_realizations = len( current_forecast.coord('realization').points) # Ensemble-Calibration to calculate the mean and variance. forecast_predictor, forecast_variance = EnsembleCalibration( args.distribution, args.units, predictor_of_mean_flag=args.predictor_of_mean, max_iterations=args.max_iterations).process(current_forecast, historic_forecast, truth) # If required, save the mean and variance. if args.save_mean: save_netcdf(forecast_predictor, args.save_mean) if args.save_variance: save_netcdf(forecast_variance, args.save_variance) # If input forecast is probabilities, convert output into probabilities. # If input forecast is percentiles, convert output into percentiles. # If input forecast is realizations, convert output into realizations. if input_forecast_type == "probabilities": result = GenerateProbabilitiesFromMeanAndVariance().process( forecast_predictor, forecast_variance, original_current_forecast) elif input_forecast_type == "percentiles": perc_coord = find_percentile_coordinate(original_current_forecast) result = GeneratePercentilesFromMeanAndVariance().process( forecast_predictor, forecast_variance, percentiles=perc_coord.points) elif input_forecast_type == "realizations": # Ensemble Copula Coupling to generate realizations # from mean and variance. percentiles = GeneratePercentilesFromMeanAndVariance().process( forecast_predictor, forecast_variance, no_of_percentiles=args.num_realizations) result = EnsembleReordering().process( percentiles, current_forecast, random_ordering=args.random_ordering, random_seed=args.random_seed) save_netcdf(result, args.output_filepath)