示例#1
0
    def process(self, current_forecast, historic_forecast, truth):
        """
        Performs ensemble calibration through the following steps:
        1. Estimate optimised coefficients from training period.
        2. Apply optimised coefficients to current forecast.

        Parameters
        ----------
        current_forecast : Iris Cube or CubeList
            The Cube or CubeList that provides the input forecast for
            the current cycle.
        historic_forecast : Iris Cube or CubeList
            The Cube or CubeList that provides the input historic forecasts for
            calibration.
        truth : Iris Cube or CubeList
            The Cube or CubeList that provides the input truth for calibration
            with dates matching the historic forecasts.

        Returns
        -------
        *  calibrated_forecast_predictor_and_variance : CubeList
            CubeList containing the calibrated forecast predictor and
            calibrated forecast variance.

        """
        def format_calibration_method(calibration_method):
            """Lowercase input string, and replace underscores with spaces."""
            return calibration_method.lower().replace("_", " ")

        # Ensure predictor_of_mean_flag is valid.
        check_predictor_of_mean_flag(self.predictor_of_mean_flag)

        if (format_calibration_method(self.calibration_method) in [
                "ensemble model output statistics",
                "nonhomogeneous gaussian regression"
        ]):
            if (format_calibration_method(self.distribution)
                    in ["gaussian", "truncated gaussian"]):
                ec = EstimateCoefficientsForEnsembleCalibration(
                    self.distribution,
                    self.desired_units,
                    predictor_of_mean_flag=self.predictor_of_mean_flag)
                optimised_coeffs, coeff_names = (
                    ec.estimate_coefficients_for_ngr(current_forecast,
                                                     historic_forecast, truth))
        else:
            msg = ("Other calibration methods are not available. "
                   "{} is not available".format(
                       format_calibration_method(self.calibration_method)))
            raise ValueError(msg)
        ac = ApplyCoefficientsFromEnsembleCalibration(
            current_forecast,
            optimised_coeffs,
            coeff_names,
            predictor_of_mean_flag=self.predictor_of_mean_flag)
        (calibrated_forecast_predictor, calibrated_forecast_variance,
         calibrated_forecast_coefficients) = ac.apply_params_entry()
        calibrated_forecast_predictor_and_variance = iris.cube.CubeList(
            [calibrated_forecast_predictor, calibrated_forecast_variance])
        return calibrated_forecast_predictor_and_variance
示例#2
0
    def __init__(self, distribution, current_cycle, desired_units=None,
                 predictor_of_mean_flag="mean"):
        """
        Create an ensemble calibration plugin that, for Nonhomogeneous Gaussian
        Regression, calculates coefficients based on historical forecasts and
        applies the coefficients to the current forecast.

        Args:
            distribution (str):
                Name of distribution. Assume that the current forecast can be
                represented using this distribution.
            current_cycle (str):
                The current cycle in YYYYMMDDTHHMMZ format e.g. 20171122T0100Z.
                This is used to create a forecast_reference_time coordinate
                on the resulting EMOS coefficients cube.

        Kwargs:
            desired_units (str or cf_units.Unit):
                The unit that you would like the calibration to be undertaken
                in. The current forecast, historical forecast and truth will be
                converted as required.
            predictor_of_mean_flag (str):
                String to specify the input to calculate the calibrated mean.
                Currently the ensemble mean ("mean") and the ensemble
                realizations ("realizations") are supported as the predictors.

        """
        self.distribution = distribution
        self.current_cycle = current_cycle
        self.desired_units = desired_units
        # Ensure predictor_of_mean_flag is valid.
        check_predictor_of_mean_flag(predictor_of_mean_flag)
        self.predictor_of_mean_flag = predictor_of_mean_flag
        self.minimiser = ContinuousRankedProbabilityScoreMinimisers()
        # Setting default values for coeff_names. Beta is the final
        # coefficient name in the list, as there can potentially be
        # multiple beta coefficients if the ensemble realizations, rather
        # than the ensemble mean, are provided as the predictor.
        self.coeff_names = ["gamma", "delta", "alpha", "beta"]

        import imp
        try:
            imp.find_module('statsmodels')
        except ImportError:
            statsmodels_found = False
            if predictor_of_mean_flag.lower() == "realizations":
                msg = (
                    "The statsmodels can not be imported. "
                    "Will not be able to calculate an initial guess from "
                    "the individual ensemble realizations. "
                    "A default initial guess will be used without "
                    "estimating coefficients from a linear model.")
                warnings.warn(msg, ImportWarning)
        else:
            statsmodels_found = True
            import statsmodels.api as sm
            self.sm = sm
        self.statsmodels_found = statsmodels_found
示例#3
0
    def test_foo(self):
        """
        Test that the utility fails when the predictor_of_mean_flag
        is "foo" i.e. a name not present in the list of accepted values
        for the predictor_of_mean_flag.
        """
        predictor_of_mean_flag = "foo"

        msg = "The requested value for the predictor_of_mean_flag"
        with self.assertRaisesRegex(ValueError, msg):
            check_predictor_of_mean_flag(predictor_of_mean_flag)
示例#4
0
    def test_realizations(self):
        """
        Test that the utility does not fail when the predictor_of_mean_flag
        is "realizations".
        """
        predictor_of_mean_flag = "realizations"

        try:
            check_predictor_of_mean_flag(predictor_of_mean_flag)
        except ValueError as err:
            msg = ("_check_predictor_of_mean_flag raised "
                   "ValueError unexpectedly."
                   "Message is {}").format(err)
            self.fail(msg)
示例#5
0
    def __init__(
            self, current_forecast, coefficients_cube,
            predictor_of_mean_flag="mean"):
        """
        Create an ensemble calibration plugin that, for Nonhomogeneous Gaussian
        Regression, applies coefficients created using on historical forecasts
        and applies the coefficients to the current forecast.

        Args:
            current_forecast (iris.cube.Cube):
                The cube containing the current forecast.
            coefficients_cube (iris.cube.Cube):
                Cube containing the coefficients estimated using EMOS.
                The cube contains a coefficient_index dimension coordinate
                where the points of the coordinate are integer values and a
                coefficient_name auxiliary coordinate where the points of
                the coordinate are e.g. gamma, delta, alpha, beta.
            predictor_of_mean_flag (str):
                String to specify the input to calculate the calibrated mean.
                Currently the ensemble mean ("mean") and the ensemble
                realizations ("realizations") are supported as the predictors.

        """
        self.current_forecast = current_forecast
        self.coefficients_cube = coefficients_cube
        for coord_name in ["forecast_period", "time",
                           "forecast_reference_time"]:
            try:
                if (self.current_forecast.coord(coord_name) !=
                        self.coefficients_cube.coord(coord_name)):
                    msg = ("The {} coordinate of the current forecast cube "
                           "and coefficients cube differs. "
                           "current forecast: {}, "
                           "coefficients cube: {}").format(
                                coord_name,
                                self.current_forecast.coord(coord_name),
                                self.coefficients_cube.coord(coord_name))
                    raise ValueError(msg)
            except CoordinateNotFoundError:
                pass

        # Ensure predictor_of_mean_flag is valid.
        check_predictor_of_mean_flag(predictor_of_mean_flag)
        self.predictor_of_mean_flag = predictor_of_mean_flag
示例#6
0
    def apply_params_entry(self):
        """
        Wrapping function to calculate the forecast predictor and forecast
        variance prior to applying coefficients to the current forecast.

        Returns:
            (tuple) : tuple containing:
                **calibrated_forecast_predictor** (CubeList):
                    CubeList containing both the calibrated version of the
                    ensemble predictor, either the ensemble mean/members.
                **calibrated_forecast_variance** (CubeList):
                    CubeList containing both the calibrated version of the
                    ensemble variance, either the ensemble mean/members.
                **calibrated_forecast_coefficients** (CubeList):
                    CubeList containing both the coefficients for calibrating
                    the ensemble.

        """
        # Ensure predictor_of_mean_flag is valid.
        check_predictor_of_mean_flag(self.predictor_of_mean_flag)

        rename_coordinate(
            self.current_forecast, "ensemble_member_id", "realization")

        current_forecast_cubes = concatenate_cubes(
            self.current_forecast)

        if self.predictor_of_mean_flag.lower() in ["mean"]:
            forecast_predictors = current_forecast_cubes.collapsed(
                "realization", iris.analysis.MEAN)
        elif self.predictor_of_mean_flag.lower() in ["members"]:
            forecast_predictors = current_forecast_cubes

        forecast_vars = current_forecast_cubes.collapsed(
            "realization", iris.analysis.VARIANCE)

        (calibrated_forecast_predictor, calibrated_forecast_var,
         calibrated_forecast_coefficients) = self._apply_params(
             forecast_predictors, forecast_vars, self.optimised_coeffs,
             self.coeff_names, self.predictor_of_mean_flag)
        return (calibrated_forecast_predictor,
                calibrated_forecast_var,
                calibrated_forecast_coefficients)
    def crps_minimiser_wrapper(self, initial_guess, forecast_predictor, truth,
                               forecast_var, predictor_of_mean_flag,
                               distribution):
        """
        Function to pass a given minimisation function to the scipy minimize
        function to estimate optimised values for the coefficients.

        Args:
            initial_guess (List):
                List of optimised coefficients.
                Order of coefficients is [c, d, a, b].
            forecast_predictor (iris.cube.Cube):
                Cube containing the fields to be used as the predictor,
                either the ensemble mean or the ensemble realizations.
            truth (iris.cube.Cube):
                Cube containing the field, which will be used as truth.
            forecast_var (iris.cube.Cube):
                Cube containg the field containing the ensemble variance.
            predictor_of_mean_flag (String):
                String to specify the input to calculate the calibrated mean.
                Currently the ensemble mean ("mean") and the ensemble
                realizations ("realizations") are supported as the predictors.
            distribution (String):
                String used to access the appropriate minimisation function
                within self.minimisation_dict.

        Returns:
            optimised_coeffs (List):
                List of optimised coefficients.
                Order of coefficients is [c, d, a, b].

        """
        def calculate_percentage_change_in_last_iteration(allvecs):
            """
            Calculate the percentage change that has occurred within
            the last iteration of the minimisation. If the percentage change
            between the last iteration and the last-but-one iteration exceeds
            the threshold, a warning message is printed.

            Args:
                allvecs : List
                    List of numpy arrays containing the optimised coefficients,
                    after each iteration.
            """
            last_iteration_percentage_change = np.absolute(
                (allvecs[-1] - allvecs[-2]) / allvecs[-2]) * 100
            if (np.any(last_iteration_percentage_change >
                       self.TOLERATED_PERCENTAGE_CHANGE)):
                np.set_printoptions(suppress=True)
                msg = ("\nThe final iteration resulted in a percentage change "
                       "that is greater than the accepted threshold of 5% "
                       "i.e. {}. "
                       "\nA satisfactory minimisation has not been achieved. "
                       "\nLast iteration: {}, "
                       "\nLast-but-one iteration: {}"
                       "\nAbsolute difference: {}\n").format(
                           last_iteration_percentage_change, allvecs[-1],
                           allvecs[-2], np.absolute(allvecs[-2] - allvecs[-1]))
                warnings.warn(msg)

        try:
            minimisation_function = self.minimisation_dict[distribution]
        except KeyError as err:
            msg = ("Distribution requested {} is not supported in {}"
                   "Error message is {}".format(distribution,
                                                self.minimisation_dict, err))
            raise KeyError(msg)

        # Ensure predictor_of_mean_flag is valid.
        check_predictor_of_mean_flag(predictor_of_mean_flag)

        if predictor_of_mean_flag.lower() in ["mean"]:
            forecast_predictor_data = forecast_predictor.data.flatten()
            truth_data = truth.data.flatten()
            forecast_var_data = forecast_var.data.flatten()
        elif predictor_of_mean_flag.lower() in ["realizations"]:
            truth_data = truth.data.flatten()
            forecast_predictor = (enforce_coordinate_ordering(
                forecast_predictor, "realization"))
            forecast_predictor_data = convert_cube_data_to_2d(
                forecast_predictor)
            forecast_var_data = forecast_var.data.flatten()

        initial_guess = np.array(initial_guess, dtype=np.float32)
        forecast_predictor_data = forecast_predictor_data.astype(np.float32)
        forecast_var_data = forecast_var_data.astype(np.float32)
        truth_data = truth_data.astype(np.float32)
        sqrt_pi = np.sqrt(np.pi).astype(np.float32)

        optimised_coeffs = minimize(minimisation_function,
                                    initial_guess,
                                    args=(forecast_predictor_data, truth_data,
                                          forecast_var_data, sqrt_pi,
                                          predictor_of_mean_flag),
                                    method="Nelder-Mead",
                                    options={
                                        "maxiter": self.MAX_ITERATIONS,
                                        "return_all": True
                                    })
        if not optimised_coeffs.success:
            msg = ("Minimisation did not result in convergence after "
                   "{} iterations. \n{}".format(self.MAX_ITERATIONS,
                                                optimised_coeffs.message))
            warnings.warn(msg)
        calculate_percentage_change_in_last_iteration(optimised_coeffs.allvecs)
        return optimised_coeffs.x
    def estimate_coefficients_for_ngr(self, current_forecast,
                                      historic_forecast, truth):
        """
        Using Nonhomogeneous Gaussian Regression/Ensemble Model Output
        Statistics, estimate the required coefficients from historical
        forecasts.

        The main contents of this method is:

        1. Metadata checks to ensure that the current forecast, historic
           forecast and truth exist in a form that can be processed.
        2. Loop through times within the concatenated current forecast cube:

           1. Extract the desired forecast period from the historic forecasts
              to match the current forecasts. Apply unit conversion to ensure
              that historic forecasts have the desired units for calibration.
           2. Extract the relevant truth to co-incide with the time within
              the historic forecasts. Apply unit conversion to ensure
              that the truth has the desired units for calibration.
           3. Calculate mean and variance.
           4. Calculate initial guess at coefficient values by performing a
              linear regression, if requested, otherwise default values are
              used.
           5. Perform minimisation.

        Args:
            current_forecast (Iris Cube or CubeList):
                The cube containing the current forecast.
            historical_forecast (Iris Cube or CubeList):
                The cube or cubelist containing the historical forecasts used
                for calibration.
            truth (Iris Cube or CubeList):
                The cube or cubelist containing the truth used for calibration.

        Returns:
            (tuple): tuple containing:
                **optimised_coeffs** (Dictionary):
                    Dictionary containing a list of the optimised coefficients
                    for each date.
                **coeff_names** (List):
                    The name of each coefficient.

        """
        def convert_to_cubelist(cubes, cube_type="forecast"):
            """
            Convert cube to cubelist, if necessary.

            Args:
                cubes (Iris Cube or Iris CubeList):
                    Cube to be converted to CubeList.
                cube_type (String):
                    String to describe the cube, which is being converted to a
                    CubeList.

            Raises
            ------
            TypeError: The input cube is not an Iris cube.

            """
            if not isinstance(cubes, iris.cube.CubeList):
                cubes = iris.cube.CubeList([cubes])
            for cube in cubes:
                if not isinstance(cube, iris.cube.Cube):
                    msg = ("The input data within the {} "
                           "is not an Iris Cube.".format(cube_type))
                    raise TypeError(msg)
            return cubes

        # Ensure predictor_of_mean_flag is valid.
        check_predictor_of_mean_flag(self.predictor_of_mean_flag)

        # Setting default values for optimised_coeffs and coeff_names.
        optimised_coeffs = {}
        coeff_names = ["gamma", "delta", "a", "beta"]

        # Set default values for whether there are NaN values within the
        # initial guess.
        nan_in_initial_guess = False

        for var in [current_forecast, historic_forecast, truth]:
            if (isinstance(var, iris.cube.Cube)
                    or isinstance(var, iris.cube.CubeList)):
                current_forecast_cubes = current_forecast
                historic_forecast_cubes = historic_forecast
                truth_cubes = truth
            else:
                msg = ("{} is not a Cube or CubeList."
                       "Returning default values for optimised_coeffs {} "
                       "and coeff_names {}.").format(var, optimised_coeffs,
                                                     coeff_names)
                warnings.warn(msg)
                return optimised_coeffs, coeff_names

        current_forecast_cubes = (convert_to_cubelist(
            current_forecast_cubes, cube_type="current forecast"))
        historic_forecast_cubes = (convert_to_cubelist(
            historic_forecast_cubes, cube_type="historic forecast"))
        truth_cubes = convert_to_cubelist(truth_cubes, cube_type="truth")

        if (len(current_forecast_cubes) == 0
                or len(historic_forecast_cubes) == 0 or len(truth_cubes) == 0):
            msg = ("Insufficient input data present to estimate "
                   "coefficients using NGR. "
                   "\nNumber of current_forecast_cubes: {}"
                   "\nNumber of historic_forecast_cubes: {}"
                   "\nNumber of truth_cubes: {}".format(
                       len(current_forecast_cubes),
                       len(historic_forecast_cubes), len(truth_cubes)))
            warnings.warn(msg)
            return optimised_coeffs, coeff_names

        current_forecast_cubes = concatenate_cubes(current_forecast_cubes)
        historic_forecast_cubes = concatenate_cubes(historic_forecast_cubes)
        truth_cubes = concatenate_cubes(truth_cubes)

        for current_forecast_cube in current_forecast_cubes.slices_over(
                "time"):
            date = unit.num2date(
                current_forecast_cube.coord("time").points,
                current_forecast_cube.coord("time").units.name,
                current_forecast_cube.coord("time").units.calendar)[0]
            # Extract desired forecast_period from historic_forecast_cubes.
            forecast_period_constr = iris.Constraint(
                forecast_period=current_forecast_cube.coord(
                    "forecast_period").points)
            historic_forecast_cube = historic_forecast_cubes.extract(
                forecast_period_constr)

            # Extract truth matching the time of the historic forecast.
            reference_time = iris_time_to_datetime(
                historic_forecast_cube.coord("time").copy())
            truth_constr = iris.Constraint(
                forecast_reference_time=reference_time)
            truth_cube = truth_cubes.extract(truth_constr)

            if truth_cube is None:
                msg = ("Unable to calibrate for the time points {} "
                       "as no truth data is available."
                       "Moving on to try to calibrate "
                       "next time point.".format(
                           historic_forecast_cube.coord("time").points))
                warnings.warn(msg)
                continue

            # Make sure inputs have the same units.
            historic_forecast_cube.convert_units(self.desired_units)
            truth_cube.convert_units(self.desired_units)

            if self.predictor_of_mean_flag.lower() in ["mean"]:
                no_of_realizations = None
                forecast_predictor = historic_forecast_cube.collapsed(
                    "realization", iris.analysis.MEAN)
            elif self.predictor_of_mean_flag.lower() in ["realizations"]:
                no_of_realizations = len(
                    historic_forecast_cube.coord("realization").points)
                forecast_predictor = historic_forecast_cube

            forecast_var = historic_forecast_cube.collapsed(
                "realization", iris.analysis.VARIANCE)

            # Computing initial guess for EMOS coefficients
            # If no initial guess from a previous iteration, or if there
            # are NaNs in the initial guess, calculate an initial guess.
            if "initial_guess" not in locals() or nan_in_initial_guess:
                initial_guess = self.compute_initial_guess(
                    truth_cube,
                    forecast_predictor,
                    self.predictor_of_mean_flag,
                    self.ESTIMATE_COEFFICIENTS_FROM_LINEAR_MODEL_FLAG,
                    no_of_realizations=no_of_realizations)

            if np.any(np.isnan(initial_guess)):
                nan_in_initial_guess = True

            if not nan_in_initial_guess:
                # Need to access the x attribute returned by the
                # minimisation function.
                optimised_coeffs[date] = (
                    self.minimiser.crps_minimiser_wrapper(
                        initial_guess, forecast_predictor, truth_cube,
                        forecast_var, self.predictor_of_mean_flag,
                        self.distribution.lower()))
                initial_guess = optimised_coeffs[date]
            else:
                optimised_coeffs[date] = initial_guess

        return optimised_coeffs, coeff_names
示例#9
0
    def process(self, current_forecast, historic_forecast, truth):
        """
        Performs ensemble calibration through the following steps:
        1. Estimate optimised coefficients from training period.
        2. Apply optimised coefficients to current forecast.

        Args:
            current_forecast (iris.cube.Cube):
                The cube that provides the input forecast for
                the current cycle.
            historic_forecast (iris.cube.Cube):
                The cube that provides the input historic forecasts
                for calibration.
            truth (iris.cube.Cube):
                The cube that provides the input truth for
                calibration with dates matching the historic forecasts.

        Returns:
            (tuple): tuple containing:
                **calibrated_forecast_predictor** (iris.cube.Cube):
                    Cube containing the calibrated forecast predictor.
                **calibrated_forecast_variance** (iris.cube.Cube):
                    Cube containing the calibrated forecast variance.

        """
        def format_calibration_method(calibration_method):
            """Lowercase input string, and replace underscores with spaces."""
            return calibration_method.lower().replace("_", " ")

        # Ensure predictor_of_mean_flag is valid.
        check_predictor_of_mean_flag(self.predictor_of_mean_flag)

        if (format_calibration_method(self.calibration_method) in
                ["ensemble model output statistics",
                 "nonhomogeneous gaussian regression"]):
            if (format_calibration_method(self.distribution) in
                    ["gaussian", "truncated gaussian"]):
                current_cycle = datetime_to_cycletime(
                    iris_time_to_datetime(
                        current_forecast.coord("forecast_reference_time"))[0])
                ec = EstimateCoefficientsForEnsembleCalibration(
                    self.distribution, current_cycle=current_cycle,
                    desired_units=self.desired_units,
                    predictor_of_mean_flag=self.predictor_of_mean_flag)
                coefficient_cube = (
                    ec.estimate_coefficients_for_ngr(
                        historic_forecast, truth))
        else:
            msg = ("Other calibration methods are not available. "
                   "{} is not available".format(
                       format_calibration_method(self.calibration_method)))
            raise ValueError(msg)
        ac = ApplyCoefficientsFromEnsembleCalibration(
            current_forecast, coefficient_cube,
            predictor_of_mean_flag=self.predictor_of_mean_flag)
        (calibrated_forecast_predictor,
         calibrated_forecast_variance) = ac.apply_params_entry()

        # TODO: track down where np.float64 promotion takes place.
        calibrated_forecast_predictor.data = (
            calibrated_forecast_predictor.data.astype(np.float32))
        calibrated_forecast_variance.data = (
            calibrated_forecast_variance.data.astype(np.float32))

        return calibrated_forecast_predictor, calibrated_forecast_variance
示例#10
0
    def estimate_coefficients_for_ngr(self, historic_forecast, truth):
        """
        Using Nonhomogeneous Gaussian Regression/Ensemble Model Output
        Statistics, estimate the required coefficients from historical
        forecasts.

        The main contents of this method is:

        1. Metadata checks to ensure that the current forecast, historic
           forecast and truth exist in a form that can be processed.
        2. Loop through times within the concatenated current forecast cube:

           1. Extract the desired forecast period from the historic forecasts
              to match the current forecasts. Apply unit conversion to ensure
              that historic forecasts have the desired units for calibration.
           2. Extract the relevant truth to co-incide with the time within
              the historic forecasts. Apply unit conversion to ensure
              that the truth has the desired units for calibration.
           3. Calculate mean and variance.
           4. Calculate initial guess at coefficient values by performing a
              linear regression, if requested, otherwise default values are
              used.
           5. Perform minimisation.

        Args:
            historic_forecast (iris.cube.Cube):
                The cube containing the historical forecasts used
                for calibration.
            truth (iris.cube.Cube:
                The cube containing the truth used for calibration.

        Returns:
            coefficients_cube (iris.cube.Cube):
                Cube containing the coefficients estimated using EMOS.
                The cube contains a coefficient_index dimension coordinate
                and a coefficient_name auxiliary coordinate.

        """
        # Ensure predictor_of_mean_flag is valid.
        check_predictor_of_mean_flag(self.predictor_of_mean_flag)

        # Set default values for whether there are NaN values within the
        # initial guess.
        nan_in_initial_guess = False

        # Make sure inputs have the same units.
        if self.desired_units:
            historic_forecast.convert_units(self.desired_units)
            truth.convert_units(self.desired_units)

        if historic_forecast.units != truth.units:
            msg = ("The historic forecast units of {} do not match "
                   "the truth units {}. These units must match, so that "
                   "the coefficients can be estimated.")
            raise ValueError(msg)

        if self.predictor_of_mean_flag.lower() == "mean":
            no_of_realizations = None
            forecast_predictor = historic_forecast.collapsed(
                "realization", iris.analysis.MEAN)
        elif self.predictor_of_mean_flag.lower() == "realizations":
            no_of_realizations = len(
                historic_forecast.coord("realization").points)
            forecast_predictor = historic_forecast

        forecast_var = historic_forecast.collapsed(
            "realization", iris.analysis.VARIANCE)

        # Computing initial guess for EMOS coefficients
        # If no initial guess from a previous iteration, or if there
        # are NaNs in the initial guess, calculate an initial guess.
        if "initial_guess" not in locals() or nan_in_initial_guess:
            initial_guess = self.compute_initial_guess(
                truth, forecast_predictor, self.predictor_of_mean_flag,
                self.ESTIMATE_COEFFICIENTS_FROM_LINEAR_MODEL_FLAG,
                no_of_realizations=no_of_realizations)

        if np.any(np.isnan(initial_guess)):
            nan_in_initial_guess = True

        if not nan_in_initial_guess:
            # Need to access the x attribute returned by the
            # minimisation function.
            optimised_coeffs = (
                self.minimiser.crps_minimiser_wrapper(
                    initial_guess, forecast_predictor,
                    truth, forecast_var,
                    self.predictor_of_mean_flag,
                    self.distribution.lower()))
            initial_guess = optimised_coeffs
        else:
            optimised_coeffs = initial_guess

        coefficients_cube = (
            self.create_coefficients_cube(optimised_coeffs, historic_forecast))
        return coefficients_cube
示例#11
0
    def process(self, historic_forecast, truth, landsea_mask=None):
        """
        Using Nonhomogeneous Gaussian Regression/Ensemble Model Output
        Statistics, estimate the required coefficients from historical
        forecasts.

        The main contents of this method is:

        1. Check that the predictor_of_mean_flag is valid.
        2. Filter the historic forecasts and truth to ensure that these
           inputs match in validity time.
        3. Apply unit conversion to ensure that the historic forecasts and
           truth have the desired units for calibration.
        4. Calculate the variance of the historic forecasts. If the chosen
           predictor is the mean, also calculate the mean of the historic
           forecasts.
        5. If a land-sea mask is provided then mask out sea points in the truth
           and predictor from the historic forecasts.
        6. Calculate initial guess at coefficient values by performing a
           linear regression, if requested, otherwise default values are
           used.
        7. Perform minimisation.

        Args:
            historic_forecast (iris.cube.Cube):
                The cube containing the historical forecasts used
                for calibration.
            truth (iris.cube.Cube):
                The cube containing the truth used for calibration.
            landsea_mask (iris.cube.Cube):
                The optional cube containing a land-sea mask. If provided, only
                land points are used to calculate the coefficients. Within the
                land-sea mask cube land points should be specified as ones,
                and sea points as zeros.

        Returns:
            coefficients_cube (iris.cube.Cube):
                Cube containing the coefficients estimated using EMOS.
                The cube contains a coefficient_index dimension coordinate
                and a coefficient_name auxiliary coordinate.

        Raises:
            ValueError: If the units of the historic and truth cubes do not
                match.

        """
        # Ensure predictor_of_mean_flag is valid.
        check_predictor_of_mean_flag(self.predictor_of_mean_flag)

        historic_forecast, truth = (self._filter_non_matching_cubes(
            historic_forecast, truth))

        # Make sure inputs have the same units.
        if self.desired_units:
            historic_forecast.convert_units(self.desired_units)
            truth.convert_units(self.desired_units)

        if historic_forecast.units != truth.units:
            msg = ("The historic forecast units of {} do not match "
                   "the truth units {}. These units must match, so that "
                   "the coefficients can be estimated.")
            raise ValueError(msg)

        if self.predictor_of_mean_flag.lower() == "mean":
            no_of_realizations = None
            forecast_predictor = historic_forecast.collapsed(
                "realization", iris.analysis.MEAN)
        elif self.predictor_of_mean_flag.lower() == "realizations":
            no_of_realizations = len(
                historic_forecast.coord("realization").points)
            forecast_predictor = historic_forecast

        forecast_var = historic_forecast.collapsed("realization",
                                                   iris.analysis.VARIANCE)

        # If a landsea_mask is provided mask out the sea points
        if landsea_mask:
            self.mask_cube(forecast_predictor, landsea_mask)
            self.mask_cube(forecast_var, landsea_mask)
            self.mask_cube(truth, landsea_mask)

        # Computing initial guess for EMOS coefficients
        initial_guess = self.compute_initial_guess(
            truth,
            forecast_predictor,
            self.predictor_of_mean_flag,
            self.ESTIMATE_COEFFICIENTS_FROM_LINEAR_MODEL_FLAG,
            no_of_realizations=no_of_realizations)

        # Calculate coefficients if there are no nans in the initial guess.
        if np.any(np.isnan(initial_guess)):
            optimised_coeffs = initial_guess
        else:
            optimised_coeffs = (self.minimiser.process(
                initial_guess, forecast_predictor, truth, forecast_var,
                self.predictor_of_mean_flag, self.distribution.lower()))
        coefficients_cube = (self.create_coefficients_cube(
            optimised_coeffs, historic_forecast))
        return coefficients_cube
示例#12
0
    def __init__(self,
                 distribution,
                 current_cycle,
                 desired_units=None,
                 predictor_of_mean_flag="mean",
                 max_iterations=1000):
        """
        Create an ensemble calibration plugin that, for Nonhomogeneous Gaussian
        Regression, calculates coefficients based on historical forecasts and
        applies the coefficients to the current forecast.

        Args:
            distribution (str):
                Name of distribution. Assume that the current forecast can be
                represented using this distribution.
            current_cycle (str):
                The current cycle in YYYYMMDDTHHMMZ format e.g. 20171122T0100Z.
                This is used to create a forecast_reference_time coordinate
                on the resulting EMOS coefficients cube.
            desired_units (str or cf_units.Unit):
                The unit that you would like the calibration to be undertaken
                in. The current forecast, historical forecast and truth will be
                converted as required.
            predictor_of_mean_flag (str):
                String to specify the input to calculate the calibrated mean.
                Currently the ensemble mean ("mean") and the ensemble
                realizations ("realizations") are supported as the predictors.
            max_iterations (int):
                The maximum number of iterations allowed until the
                minimisation has converged to a stable solution. If the
                maximum number of iterations is reached, but the minimisation
                has not yet converged to a stable solution, then the available
                solution is used anyway, and a warning is raised. If the
                predictor_of_mean is "realizations", then the number of
                iterations may require increasing, as there will be
                more coefficients to solve for.

        Raises:
            ValueError: If the given distribution is not valid.

        Warns:
            ImportWarning: If the statsmodels module can't be imported.
        """
        valid_distributions = (ContinuousRankedProbabilityScoreMinimisers().
                               minimisation_dict.keys())
        if distribution not in valid_distributions:
            msg = ("Given distribution {} not available. Available "
                   "distributions are {}".format(distribution,
                                                 valid_distributions))
            raise ValueError(msg)
        self.distribution = distribution
        self.current_cycle = current_cycle
        self.desired_units = desired_units
        # Ensure predictor_of_mean_flag is valid.
        check_predictor_of_mean_flag(predictor_of_mean_flag)
        self.predictor_of_mean_flag = predictor_of_mean_flag
        self.max_iterations = max_iterations
        self.minimiser = ContinuousRankedProbabilityScoreMinimisers(
            max_iterations=self.max_iterations)

        # Setting default values for coeff_names. Beta is the final
        # coefficient name in the list, as there can potentially be
        # multiple beta coefficients if the ensemble realizations, rather
        # than the ensemble mean, are provided as the predictor.
        self.coeff_names = ["gamma", "delta", "alpha", "beta"]

        import imp
        try:
            imp.find_module('statsmodels')
        except ImportError:
            statsmodels_found = False
            if predictor_of_mean_flag.lower() == "realizations":
                msg = ("The statsmodels can not be imported. "
                       "Will not be able to calculate an initial guess from "
                       "the individual ensemble realizations. "
                       "A default initial guess will be used without "
                       "estimating coefficients from a linear model.")
                warnings.warn(msg, ImportWarning)
        else:
            statsmodels_found = True
            import statsmodels.api as sm
            self.sm = sm
        self.statsmodels_found = statsmodels_found
示例#13
0
    def process(self, initial_guess, forecast_predictor, truth, forecast_var,
                predictor_of_mean_flag, distribution):
        """
        Function to pass a given function to the scipy minimize
        function to estimate optimised values for the coefficients.

        If the predictor_of_mean_flag is the ensemble mean, this function
        estimates values for alpha, beta, gamma and delta based on the
        equation:
        N(alpha + beta * ensemble_mean, gamma + delta * ensemble_variance),
        where N is a chosen distribution.

        If the predictor_of_mean_flag is the ensemble realizations, this
        function estimates values for alpha, beta, gamma and delta based on the
        equation:

        .. math::
          N(alpha + beta0 * realization0 + beta1 * realization1,

          gamma + delta * ensemble\\_variance)

        where N is a chosen distribution and the number of beta terms
        depends on the number of realizations provided.

        Args:
            initial_guess (list):
                List of optimised coefficients.
                Order of coefficients is [gamma, delta, alpha, beta].
            forecast_predictor (iris.cube.Cube):
                Cube containing the fields to be used as the predictor,
                either the ensemble mean or the ensemble realizations.
            truth (iris.cube.Cube):
                Cube containing the field, which will be used as truth.
            forecast_var (iris.cube.Cube):
                Cube containg the field containing the ensemble variance.
            predictor_of_mean_flag (str):
                String to specify the input to calculate the calibrated mean.
                Currently the ensemble mean ("mean") and the ensemble
                realizations ("realizations") are supported as the predictors.
            distribution (str):
                String used to access the appropriate function for use in the
                minimisation within self.minimisation_dict.

        Returns:
            optimised_coeffs (list):
                List of optimised coefficients.
                Order of coefficients is [gamma, delta, alpha, beta].

        Raises:
            KeyError: If the distribution is not supported.

        Warns:
            Warning: If the minimisation did not converge.

        """
        def calculate_percentage_change_in_last_iteration(allvecs):
            """
            Calculate the percentage change that has occurred within
            the last iteration of the minimisation. If the percentage change
            between the last iteration and the last-but-one iteration exceeds
            the threshold, a warning message is printed.

            Args:
                allvecs (list):
                    List of numpy arrays containing the optimised coefficients,
                    after each iteration.

            Warns:
                Warning: If a satisfactory minimisation has not been achieved.
            """
            last_iteration_percentage_change = np.absolute(
                (allvecs[-1] - allvecs[-2]) / allvecs[-2]) * 100
            if (np.any(last_iteration_percentage_change >
                       self.TOLERATED_PERCENTAGE_CHANGE)):
                np.set_printoptions(suppress=True)
                msg = ("The final iteration resulted in a percentage change "
                       "that is greater than the accepted threshold of 5% "
                       "i.e. {}. "
                       "\nA satisfactory minimisation has not been achieved. "
                       "\nLast iteration: {}, "
                       "\nLast-but-one iteration: {}"
                       "\nAbsolute difference: {}\n").format(
                           last_iteration_percentage_change, allvecs[-1],
                           allvecs[-2], np.absolute(allvecs[-2] - allvecs[-1]))
                warnings.warn(msg)

        try:
            minimisation_function = self.minimisation_dict[distribution]
        except KeyError as err:
            msg = ("Distribution requested {} is not supported in {}"
                   "Error message is {}".format(distribution,
                                                self.minimisation_dict, err))
            raise KeyError(msg)

        # Ensure predictor_of_mean_flag is valid.
        check_predictor_of_mean_flag(predictor_of_mean_flag)

        # Flatten the data arrays and remove any missing data.
        truth_data = flatten_ignoring_masked_data(truth.data)
        forecast_var_data = flatten_ignoring_masked_data(forecast_var.data)
        if predictor_of_mean_flag.lower() == "mean":
            forecast_predictor_data = flatten_ignoring_masked_data(
                forecast_predictor.data)
        elif predictor_of_mean_flag.lower() == "realizations":
            forecast_predictor = (enforce_coordinate_ordering(
                forecast_predictor, "realization"))
            # Need to transpose this array so there are columns for each
            # ensemble member rather than rows.
            forecast_predictor_data = flatten_ignoring_masked_data(
                forecast_predictor.data, preserve_leading_dimension=True).T

        # Increased precision is needed for stable coefficient calculation.
        # The resulting coefficients are cast to float32 prior to output.
        initial_guess = np.array(initial_guess, dtype=np.float64)
        forecast_predictor_data = forecast_predictor_data.astype(np.float64)
        forecast_var_data = forecast_var_data.astype(np.float64)
        truth_data = truth_data.astype(np.float64)
        sqrt_pi = np.sqrt(np.pi).astype(np.float64)
        optimised_coeffs = minimize(minimisation_function,
                                    initial_guess,
                                    args=(forecast_predictor_data, truth_data,
                                          forecast_var_data, sqrt_pi,
                                          predictor_of_mean_flag),
                                    method="Nelder-Mead",
                                    options={
                                        "maxiter": self.max_iterations,
                                        "return_all": True
                                    })

        if not optimised_coeffs.success:
            msg = ("Minimisation did not result in convergence after "
                   "{} iterations. \n{}".format(self.max_iterations,
                                                optimised_coeffs.message))
            warnings.warn(msg)
        calculate_percentage_change_in_last_iteration(optimised_coeffs.allvecs)
        return optimised_coeffs.x.astype(np.float32)