示例#1
0
    def setUp(self):
        """Use temperature cube to test with."""
        data = np.ones((3, 3, 3), dtype=np.float32)
        self.current_temperature_forecast_cube = set_up_variable_cube(
            data, realizations=[0, 1, 2])

        optimised_coeffs = [
            4.55819380e-06, -8.02401974e-09, 1.66667055e+00, 1.00000011e+00
        ]
        current_cycle = "20171110T0000Z"
        estimator = (EstimateCoefficientsForEnsembleCalibration(
            "gaussian", current_cycle, desired_units="Celsius"))
        self.coeffs_from_mean = (estimator.create_coefficients_cube(
            optimised_coeffs, self.current_temperature_forecast_cube))

        optimised_coeffs = np.array([
            4.55819380e-06, -8.02401974e-09, 1.66667055e+00, 1.00000011e+00,
            1.00000011e+00, 1.00000011e+00
        ])
        estimator = (EstimateCoefficientsForEnsembleCalibration(
            "gaussian",
            current_cycle,
            desired_units="Celsius",
            predictor_of_mean_flag="realizations"))
        self.coeffs_from_realizations = (estimator.create_coefficients_cube(
            optimised_coeffs, self.current_temperature_forecast_cube))
示例#2
0
    def setUp(self):
        """Use temperature cube to test with."""
        data = (np.tile(np.linspace(-45.0, 45.0, 9), 3).reshape(3, 3, 3) +
                273.15)
        data[0] -= 2
        data[1] += 2
        data[2] += 4
        data = data.astype(np.float32)
        self.current_temperature_forecast_cube = set_up_variable_cube(
            data, units="Kelvin", realizations=[0, 1, 2])

        optimised_coeffs = [
            4.55819380e-06, -8.02401974e-09, 1.66667055e+00, 1.00000011e+00
        ]
        current_cycle = "20171110T0000Z"
        estimator = (EstimateCoefficientsForEnsembleCalibration(
            "gaussian", current_cycle, desired_units="Celsius"))
        self.coeffs_from_mean = (estimator.create_coefficients_cube(
            optimised_coeffs, self.current_temperature_forecast_cube))

        optimised_coeffs = np.array([5, 1, 0, 0.57, 0.6, 0.6])
        estimator = (EstimateCoefficientsForEnsembleCalibration(
            "gaussian",
            current_cycle,
            desired_units="Celsius",
            predictor_of_mean_flag="realizations"))
        self.coeffs_from_realizations = (estimator.create_coefficients_cube(
            optimised_coeffs, self.current_temperature_forecast_cube))
    def setUp(self):
        """Set up coefficients cubes for when either the ensemble mean or the
        ensemble realizations have been used as the predictor. The coefficients
        have been constructed from the same underlying set of ensemble
        realizations, so application of these coefficients would be expected
        to give similar results. The values for the coefficients used to
        construct the coefficients cubes are taken from the
        SetupExpectedCoefficients class. These coefficients are the
        expected outputs from the tests to estimate the coefficients."""
        super().setUp()
        # Set up a coefficients cube when using the ensemble mean as the
        # predictor.
        current_cycle = "20171110T0000Z"
        estimator = (
            EstimateCoefficientsForEnsembleCalibration(
                "gaussian", current_cycle, desired_units="Celsius"))
        self.coeffs_from_mean = (
            estimator.create_coefficients_cube(
                self.expected_mean_predictor_gaussian,
                self.current_temperature_forecast_cube))

        # Set up a coefficients cube when using the ensemble realization as the
        # predictor and the coefficients have been estimated using statsmodels.
        estimator = (
            EstimateCoefficientsForEnsembleCalibration(
                "gaussian", current_cycle, desired_units="Celsius",
                predictor_of_mean_flag="realizations"))
        self.coeffs_from_statsmodels_realizations = (
            estimator.create_coefficients_cube(
                self.expected_realizations_gaussian_statsmodels,
                self.current_temperature_forecast_cube))

        # Set up a coefficients cube when using the ensemble realization as the
        # predictor and the coefficients have been estimated without using
        # statsmodels.
        estimator = (
            EstimateCoefficientsForEnsembleCalibration(
                "gaussian", current_cycle, desired_units="Celsius",
                predictor_of_mean_flag="realizations"))
        self.coeffs_from_no_statsmodels_realizations = (
            estimator.create_coefficients_cube(
                self.expected_realizations_gaussian_no_statsmodels,
                self.current_temperature_forecast_cube))
示例#4
0
def process(historic_forecast, truth, combined, historic_forecast_dict,
            truth_dict, distribution, cycletime, units=None,
            predictor_of_mean='mean', max_iterations=1000):
    """Module for estimate coefficients for Ensemble Model Output Statistics.

    Loads in arguments for estimating coefficients for Ensemble Model
    Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
    Regression (NGR). Two sources of input data must be provided: historical
    forecasts and historical truth data (to use in calibration).
    The estimated coefficients are output as a cube.

    Args:
        historic_forecast (iris.cube.Cube):
            The cube containing the historical forecasts used for calibration.
        truth (iris.cube.Cube):
            The cube containing the truth used for calibration.
        combined (iris.cube.CubeList):
            A cubelist containing a combination of historic forecasts and
            associated truths.
        historic_forecast_dict (dict):
            Dictionary specifying the metadata that defines the historic
            forecast. For example:
            ::

                {
                    "attributes": {
                        "mosg__model_configuration": "uk_ens"
                    }
                }
        truth_dict (dict):
            Dictionary specifying the metadata that defines the truth.
            For example:
            ::

                {
                    "attributes": {
                        "mosg__model_configuration": "uk_det"
                    }
                }
        distribution (str):
            The distribution that will be used for calibration. This will be
            dependant upon the input phenomenon.
        cycletime (str):
            This denotes the cycle at which forecasts will be calibrated using
            the calculated EMOS coefficients. The validity time in the output
            coefficients cube will be calculated relative to this cycletime.
            This cycletime is in the format YYYYMMDDTHHMMZ.
        units (str):
            The units that calibration should be undertaken in. The historical
            forecast and truth will be converted as required.
            Default is None.
        predictor_of_mean (str):
            String to specify the input to calculate the calibrated mean.
            Currently the ensemble mean ("mean") and the ensemble realizations
            ("realizations") are supported as the predictors.
            Default is 'mean'.
        max_iterations (int):
            The maximum number of iterations allowed until the minimisation has
            converged to a stable solution. If the maximum number of iterations
            is reached but the minimisation has not yet converged to a stable
            solution, then the available solution is used anyway, and a warning
            is raised.
            If the predictor_of_mean is "realizations", then the number of
            iterations may require increasing, as there will be more
            coefficients to solve.
            Default is 1000.

    Returns:
        result (iris.cube.Cube or None):
            Cube containing the coefficients estimated using EMOS. The cube
            contains a coefficient_index dimension coordinate and a
            coefficient_name auxiliary coordinate. If no historic forecasts or
            truths are found then None is returned.

    Raises:
        ValueError: If the historic forecast and truth inputs are specified,
            then the combined input, historic forecast dictionary and truth
            dictionary should not be specified.
        ValueError: If one of the historic forecast or truth inputs are
            specified, then they should both be specified.
        ValueError: All of the combined_filepath, historic_forecast_identifier
            and truth_identifier arguments should be specified if one of the
            arguments are specified.

    Warns:
        UserWarning: The metadata to identify the desired historic forecast or
            truth has found nothing matching the metadata information supplied.

    """
    # The logic for the if statements below is:
    # 1. Check whether either the historic_forecast or the truth exists.
    # 2. Check that both the historic forecast and the truth exists, otherwise,
    #    raise an error.
    # 3. Check that none of the combined, historic forecast dictionary or
    #    truth dictionary inputs have been provided, as these arguments are
    #    invalid, if the historic forecast and truth inputs have been provided.
    if any([historic_forecast, truth]):
        if all([historic_forecast, truth]):
            if any([combined, historic_forecast_dict, truth_dict]):
                msg = ("If the historic_filepath and truth_filepath arguments "
                       "are specified then none of the the combined_filepath, "
                       "historic_forecast_identifier and truth_identifier "
                       "arguments should be specified.")
                raise ValueError(msg)
        else:
            msg = ("Both the historic_filepath and truth_filepath arguments "
                   "should be specified if one of these arguments are "
                   "specified.")
            raise ValueError(msg)

    # This if block follows the logic:
    # 1. Check whether any of the combined, historic forecast dictionary or
    #    truth dictionary inputs have been provided.
    # 2. If not all of these inputs have been provided then raise an error,
    #    as all of these inputs are required to separate the combined input
    #    into the historic forecasts and truths.
    if any([combined, historic_forecast_dict, truth_dict]):
        if not all([combined, historic_forecast_dict, truth_dict]):
            msg = ("All of the combined_filepath, "
                   "historic_forecast_identifier and truth_identifier "
                   "arguments should be specified if one of the arguments are "
                   "specified.")
            raise ValueError(msg)

    try:
        if combined is not None:
            historic_forecast, truth = SplitHistoricForecastAndTruth(
                historic_forecast_dict, truth_dict).process(combined)
    except ValueError as err:
        # This error arises if the metadata to identify the desired historic
        # forecast or truth has found nothing matching the metadata
        # information supplied.
        if str(err).startswith("The metadata to identify the desired"):
            warnings.warn(str(err))
            result = None
        else:
            raise
    else:
        result = EstimateCoefficientsForEnsembleCalibration(
            distribution, cycletime, desired_units=units,
            predictor_of_mean_flag=predictor_of_mean,
            max_iterations=max_iterations).process(historic_forecast, truth)

    return result
def process(*cubes: cli.inputcube,
            distribution,
            truth_attribute,
            cycletime,
            units=None,
            predictor_of_mean='mean',
            tolerance: float = 0.01,
            max_iterations: int = 1000):
    """Estimate coefficients for Ensemble Model Output Statistics.

    Loads in arguments for estimating coefficients for Ensemble Model
    Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
    Regression (NGR). Two sources of input data must be provided: historical
    forecasts and historical truth data (to use in calibration).
    The estimated coefficients are output as a cube.

    Args:
        cubes (list of iris.cube.Cube):
            A list of cubes containing the historical forecasts and
            corresponding truth used for calibration. They must have the same
            cube name and will be separated based on the truth attribute.
            Optionally this may also contain a single land-sea mask cube on the
            same domain as the historic forecasts and truth (where land points
            are set to one and sea points are set to zero).
        distribution (str):
            The distribution that will be used for calibration. This will be
            dependant upon the input phenomenon.
        truth_attribute (str):
            An attribute and its value in the format of "attribute=value",
            which must be present on historical truth cubes.
        cycletime (str):
            This denotes the cycle at which forecasts will be calibrated using
            the calculated EMOS coefficients. The validity time in the output
            coefficients cube will be calculated relative to this cycletime.
            This cycletime is in the format YYYYMMDDTHHMMZ.
        units (str):
            The units that calibration should be undertaken in. The historical
            forecast and truth will be converted as required.
        predictor_of_mean (str):
            String to specify the input to calculate the calibrated mean.
            Currently the ensemble mean ("mean") and the ensemble realizations
            ("realizations") are supported as the predictors.
        tolerance (float):
            The tolerance for the Continuous Ranked Probability Score (CRPS)
            calculated by the minimisation. Once multiple iterations result in
            a CRPS equal to the same value within the specified tolerance, the
            minimisation will terminate.
        max_iterations (int):
            The maximum number of iterations allowed until the minimisation has
            converged to a stable solution. If the maximum number of iterations
            is reached but the minimisation has not yet converged to a stable
            solution, then the available solution is used anyway, and a warning
            is raised.
            If the predictor_of_mean is "realizations", then the number of
            iterations may require increasing, as there will be more
            coefficients to solve.

    Returns:
        iris.cube.Cube:
            Cube containing the coefficients estimated using EMOS. The cube
            contains a coefficient_index dimension coordinate and a
            coefficient_name auxiliary coordinate.

    Raises:
        RuntimeError:
            An unexpected number of distinct cube names were passed in.
        RuntimeError:
            More than one cube was identified as a land-sea mask.
        RuntimeError:
            Missing truth or historical forecast in input cubes.

    """

    from collections import OrderedDict
    from improver.utilities.cube_manipulation import MergeCubes
    from improver.ensemble_calibration.ensemble_calibration import (
        EstimateCoefficientsForEnsembleCalibration)

    grouped_cubes = {}
    for cube in cubes:
        grouped_cubes.setdefault(cube.name(), []).append(cube)
    if len(grouped_cubes) == 1:
        # Only one group - all forecast/truth cubes
        land_sea_mask = None
        diag_name = list(grouped_cubes.keys())[0]
    elif len(grouped_cubes) == 2:
        # Two groups - the one with exactly one cube matching a name should
        # be the land_sea_mask, since we require more than 2 cubes in
        # the forecast/truth group
        grouped_cubes = OrderedDict(sorted(grouped_cubes.items(),
                                           key=lambda kv: len(kv[1])))
        # landsea name should be the key with the lowest number of cubes (1)
        landsea_name, diag_name = list(grouped_cubes.keys())
        land_sea_mask = grouped_cubes[landsea_name][0]
        if len(grouped_cubes[landsea_name]) != 1:
            raise RuntimeError('Expected one cube for land-sea mask.')
    else:
        raise RuntimeError('Must have cubes with 1 or 2 distinct names.')

    # split non-land_sea_mask cubes on forecast vs truth
    truth_key, truth_value = truth_attribute.split('=')
    input_cubes = grouped_cubes[diag_name]
    grouped_cubes = {'truth': [], 'historical forecast': []}
    for cube in input_cubes:
        if cube.attributes.get(truth_key) == truth_value:
            grouped_cubes['truth'].append(cube)
        else:
            grouped_cubes['historical forecast'].append(cube)

    missing_inputs = ' and '.join(k for k, v in grouped_cubes.items() if not v)
    if missing_inputs:
        raise RuntimeError('Missing ' + missing_inputs + ' input.')

    truth = MergeCubes()(grouped_cubes['truth'])
    forecast = MergeCubes()(grouped_cubes['historical forecast'])

    return EstimateCoefficientsForEnsembleCalibration(
        distribution, cycletime, desired_units=units,
        predictor_of_mean_flag=predictor_of_mean,
        tolerance=tolerance, max_iterations=max_iterations).process(
            forecast, truth, landsea_mask=land_sea_mask)
示例#6
0
    def setUp(self):
        """Set up coefficients cubes for when either the ensemble mean or the
        ensemble realizations have been used as the predictor. The coefficients
        have been constructed from the same underlying set of ensemble
        realizations, so application of these coefficients would be expected
        to give similar results. The values for the coefficients used to
        construct the coefficients cubes are taken from the
        SetupExpectedCoefficients class. These coefficients are the
        expected outputs from the tests to estimate the coefficients."""
        super().setUp()
        # Set up a coefficients cube when using the ensemble mean as the
        # predictor.
        current_cycle = "20171110T0000Z"
        estimator = (EstimateCoefficientsForEnsembleCalibration(
            "gaussian", current_cycle, desired_units="Celsius"))
        self.coeffs_from_mean = (estimator.create_coefficients_cube(
            self.expected_mean_predictor_gaussian,
            self.current_temperature_forecast_cube))

        # Set up a coefficients cube when using the ensemble realization as the
        # predictor and the coefficients have been estimated using statsmodels.
        estimator = (EstimateCoefficientsForEnsembleCalibration(
            "gaussian",
            current_cycle,
            desired_units="Celsius",
            predictor_of_mean_flag="realizations"))
        self.coeffs_from_statsmodels_realizations = (
            estimator.create_coefficients_cube(
                self.expected_realizations_gaussian_statsmodels,
                self.current_temperature_forecast_cube))

        # Set up a coefficients cube when using the ensemble realization as the
        # predictor and the coefficients have been estimated without using
        # statsmodels.
        estimator = (EstimateCoefficientsForEnsembleCalibration(
            "gaussian",
            current_cycle,
            desired_units="Celsius",
            predictor_of_mean_flag="realizations"))
        self.coeffs_from_no_statsmodels_realizations = (
            estimator.create_coefficients_cube(
                self.expected_realizations_gaussian_no_statsmodels,
                self.current_temperature_forecast_cube))

        # Some expected data that are used in various tests.
        self.expected_loc_param_mean = (np.array(
            [[273.7854, 274.6913, 275.4461], [276.8652, 277.6502, 278.405],
             [279.492, 280.1562, 280.9715]],
            dtype=np.float32))
        self.expected_scale_param_mean = (np.array(
            [[0.1952, 0.1974, 0.0117], [0.0226, 0.0197, 0.0117],
             [0.0532, 0.0029, 0.0007]],
            dtype=np.float32))
        self.expected_loc_param_statsmodels_realizations = (np.array(
            [[274.1395, 275.0975, 275.258], [276.9771, 277.3487, 278.3144],
             [280.0085, 280.2506, 281.1632]],
            dtype=np.float32))
        self.expected_loc_param_no_statsmodels_realizations = (np.array(
            [[273.4695, 274.4673, 275.3034], [276.8648, 277.733, 278.5632],
             [279.7562, 280.4913, 281.3889]],
            dtype=np.float32))

        # Create output cubes with the expected data.
        self.expected_loc_param_mean_cube = set_up_variable_cube(
            self.expected_loc_param_mean,
            name="location_parameter",
            units="K",
            attributes=MANDATORY_ATTRIBUTE_DEFAULTS)
        self.expected_scale_param_mean_cube = (set_up_variable_cube(
            self.expected_scale_param_mean,
            name="scale_parameter",
            units="Kelvin^2",
            attributes=MANDATORY_ATTRIBUTE_DEFAULTS))
示例#7
0
    def setUp(self):
        """Set up coefficients cubes for when either the ensemble mean or the
        ensemble realizations have been used as the predictor. The coefficients
        have been constructed from the same underlying set of ensemble
        realizations, so application of these coefficients would be expected
        to give similar results. The values for the coefficients used to
        construct the coefficients cubes are taken from the
        SetupExpectedCoefficients class. These coefficients are the
        expected outputs from the tests to estimate the coefficients."""
        super().setUp()
        # Set up a coefficients cube when using the ensemble mean as the
        # predictor.
        current_cycle = "20171110T0000Z"
        estimator = (EstimateCoefficientsForEnsembleCalibration(
            "gaussian", current_cycle, desired_units="Celsius"))
        self.coeffs_from_mean = (estimator.create_coefficients_cube(
            self.expected_mean_predictor_gaussian,
            self.current_temperature_forecast_cube))

        # Set up a coefficients cube when using the ensemble realization as the
        # predictor and the coefficients have been estimated using statsmodels.
        estimator = (EstimateCoefficientsForEnsembleCalibration(
            "gaussian",
            current_cycle,
            desired_units="Celsius",
            predictor_of_mean_flag="realizations"))
        self.coeffs_from_statsmodels_realizations = (
            estimator.create_coefficients_cube(
                self.expected_realizations_gaussian_statsmodels,
                self.current_temperature_forecast_cube))

        # Set up a coefficients cube when using the ensemble realization as the
        # predictor and the coefficients have been estimated without using
        # statsmodels.
        estimator = (EstimateCoefficientsForEnsembleCalibration(
            "gaussian",
            current_cycle,
            desired_units="Celsius",
            predictor_of_mean_flag="realizations"))
        self.coeffs_from_no_statsmodels_realizations = (
            estimator.create_coefficients_cube(
                self.expected_realizations_gaussian_no_statsmodels,
                self.current_temperature_forecast_cube))

        # Some expected data that are used in various tests.
        self.expected_calibrated_predictor_mean = (np.array(
            [[273.7854, 274.6913, 275.4461], [276.8652, 277.6502, 278.405],
             [279.492, 280.1562, 280.9715]]))
        self.expected_calibrated_variance_mean = (np.array(
            [[0.1952, 0.1974, 0.0117], [0.0226, 0.0197, 0.0117],
             [0.0532, 0.0029, 0.0007]]))
        self.expected_calibrated_predictor_statsmodels_realizations = (
            np.array([[274.1395, 275.0975, 275.258],
                      [276.9771, 277.3487, 278.3144],
                      [280.0085, 280.2506, 281.1632]]))
        self.expected_calibrated_variance_statsmodels_realizations = (np.array(
            [[0.8973, 0.9073, 0.0536], [0.1038, 0.0904, 0.0536],
             [0.2444, 0.0134, 0.0033]]))
        self.expected_calibrated_predictor_no_statsmodels_realizations = (
            np.array([[273.4695, 274.4673, 275.3034],
                      [276.8648, 277.733, 278.5632],
                      [279.7562, 280.4913, 281.3889]]))
        self.expected_calibrated_variance_no_statsmodels_realizations = (
            np.array([[0.9344, 0.9448, 0.0558], [0.1081, 0.0941, 0.0558],
                      [0.2545, 0.0139, 0.0035]]))
示例#8
0
def main(argv=None):
    """Load in arguments for estimating coefficients for Ensemble Model Output
       Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
       Regression (NGR). 2 sources of input data must be provided: historical
       forecasts and historical truth data (to use in calibration). The
       estimated coefficients are written to a netCDF file.
    """
    parser = ArgParser(
        description='Estimate coefficients for Ensemble Model Output '
                    'Statistics (EMOS), otherwise known as Non-homogeneous '
                    'Gaussian Regression (NGR)')
    parser.add_argument('distribution', metavar='DISTRIBUTION',
                        choices=['gaussian', 'truncated gaussian'],
                        help='The distribution that will be used for '
                             'calibration. This will be dependent upon the '
                             'input phenomenon. This has to be supported by '
                             'the minimisation functions in '
                             'ContinuousRankedProbabilityScoreMinimisers.')
    parser.add_argument('cycletime', metavar='CYCLETIME', type=str,
                        help='This denotes the cycle at which forecasts '
                             'will be calibrated using the calculated '
                             'EMOS coefficients. The validity time in the '
                             'output coefficients cube will be calculated '
                             'relative to this cycletime. '
                             'This cycletime is in the format '
                             'YYYYMMDDTHHMMZ.')
    # Filepaths for historic and truth data.
    parser.add_argument('historic_filepath', metavar='HISTORIC_FILEPATH',
                        help='A path to an input NetCDF file containing the '
                             'historic forecast(s) used for calibration.')
    parser.add_argument('truth_filepath', metavar='TRUTH_FILEPATH',
                        help='A path to an input NetCDF file containing the '
                             'historic truth analyses used for calibration.')
    parser.add_argument('output_filepath', metavar='OUTPUT_FILEPATH',
                        help='The output path for the processed NetCDF')
    # Optional arguments.
    parser.add_argument('--units', metavar='UNITS',
                        help='The units that calibration should be undertaken '
                             'in. The historical forecast and truth will be '
                             'converted as required.')
    parser.add_argument('--predictor_of_mean', metavar='PREDICTOR_OF_MEAN',
                        choices=['mean', 'realizations'], default='mean',
                        help='String to specify the predictor used to '
                             'calibrate the forecast mean. Currently the '
                             'ensemble mean ("mean") and the ensemble '
                             'realizations ("realizations") are supported as '
                             'options. Default: "mean".')
    parser.add_argument('--max_iterations', metavar='MAX_ITERATIONS',
                        type=np.int32, default=1000,
                        help='The maximum number of iterations allowed '
                             'until the minimisation has converged to a '
                             'stable solution. If the maximum number '
                             'of iterations is reached, but the '
                             'minimisation has not yet converged to a '
                             'stable solution, then the available solution '
                             'is used anyway, and a warning is raised.'
                             'This may be modified for testing purposes '
                             'but otherwise kept fixed. If the '
                             'predictor_of_mean is "realizations", '
                             'then the number of iterations may require '
                             'increasing, as there will be more coefficients '
                             'to solve for.')
    args = parser.parse_args(args=argv)

    historic_forecast = load_cube(args.historic_filepath)
    truth = load_cube(args.truth_filepath)

    # Estimate coefficients using Ensemble Model Output Statistics (EMOS).
    estcoeffs = EstimateCoefficientsForEnsembleCalibration(
        args.distribution, args.cycletime, desired_units=args.units,
        predictor_of_mean_flag=args.predictor_of_mean,
        max_iterations=args.max_iterations)
    coefficients = (
        estcoeffs.process(historic_forecast, truth))

    save_netcdf(coefficients, args.output_filepath)