Exemplo n.º 1
0
def process(cube,
            coordinates=None,
            ecc_bounds_warning=False,
            percentiles=None,
            no_of_percentiles=None):
    r"""Collapses cube coordinates and calculate percentiled data.

    Calculate percentiled data over a given coordinate by collapsing that
    coordinate. Typically used to convert realization data into percentiled
    data, but may calculate over any dimension coordinate. Alternatively
    calling this with a dataset containing probabilities will convert those
    to percentiles using the ensemble coupla coupling plugin. If no particular
    percentiles are given at which to calculate values and no
    'number of percentiles' to calculate are specified, the
    following defaults will be used.
    '[0, 5, 10, 20, 25, 30, 40, 50, 60, 70, 75, 80, 90, 95, 100]'

    Args:
        cube (iris.cube.Cube):
            A Cube for processing.
        coordinates (str or list):
            Coordinate or coordinates over which to collapse data and
            calculate percentiles; e.g. 'realization' or 'latitude longitude'.
            This argument must be provided when collapsing a coordinate or
            coordinates to create percentiles, but is redundant when
            converting probabilities to percentiles and may be omitted. This
            coordinate(s) will be removed and replaced by a percentile
            coordinate.
            Default is None.
        ecc_bounds_warning (bool):
            If True, where calculated percentiles are outside the ECC bounds
            range, raises a warning rather than an exception.
            Default is False.
        percentiles (list or None):
            Optional definition of percentiles at which to calculate data.
            Default is None.
        no_of_percentiles (int):
            Optional definition of the number of percentiles to be generated,
            these distributed regularly with the aim of dividing into blocks
            of equal probability.
            Default is None.

    Returns:
        result (iris.cube.Cube):
            The processed Cube.

    Raises:
        ValueError:
            If the cube name does not contain 'probability_of\_' and
            coordinates isn't used.

    Warns:
        Warning:
            If 'probability_of\_' is in the cube name and coordinates is used.

    """
    if no_of_percentiles is not None:
        percentiles = choose_set_of_percentiles(no_of_percentiles,
                                                sampling="quantile")
    # TODO: Correct when formal cf-standards exists
    if 'probability_of_' in cube.name():
        result = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=ecc_bounds_warning).process(
                cube, percentiles=percentiles)
        if coordinates:
            warnings.warn("Converting probabilities to percentiles. The "
                          "provided COORDINATES_TO_COLLAPSE variable will "
                          "not be used.")
    else:
        if not coordinates:
            raise ValueError("To collapse a coordinate to calculate "
                             "percentiles, a coordinate or list of "
                             "coordinates must be provided.")

        # Switch back to use the slow scipy method if the cube contains masked
        # data which the numpy method cannot handle.
        fast_percentile_method = True

        if np.ma.is_masked(cube.data):
            # Check for masked points:
            fast_percentile_method = False
        elif np.ma.isMaskedArray(cube.data):
            # Check if we have a masked array with an empty mask. If so,
            # replace it with a non-masked array:
            cube.data = cube.data.data

        result = PercentileConverter(
            coordinates,
            percentiles=percentiles,
            fast_percentile_method=fast_percentile_method).process(cube)
    return result
def main(argv=None):
    """Load in arguments for applying coefficients for Ensemble Model Output
       Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
       Regression (NGR). The coefficients are applied to the forecast
       that is supplied, so as to calibrate the forecast. The calibrated
       forecast is written to a netCDF file.
    """
    parser = ArgParser(
        description='Apply coefficients for Ensemble Model Output '
        'Statistics (EMOS), otherwise known as Non-homogeneous '
        'Gaussian Regression (NGR). The supported input formats '
        'are realizations, probabilities and percentiles. '
        'The forecast will be converted to realizations before '
        'applying the coefficients and then converted back to '
        'match the input format.')
    # Filepaths for the forecast, EMOS coefficients and the output.
    parser.add_argument(
        'forecast_filepath',
        metavar='FORECAST_FILEPATH',
        help='A path to an input NetCDF file containing the forecast to be '
        'calibrated. The input format could be either realizations, '
        'probabilities or percentiles.')
    parser.add_argument('coefficients_filepath',
                        metavar='COEFFICIENTS_FILEPATH',
                        help='A path to an input NetCDF file containing the '
                        'coefficients used for calibration.')
    parser.add_argument('output_filepath',
                        metavar='OUTPUT_FILEPATH',
                        help='The output path for the processed NetCDF')
    # Optional arguments.
    parser.add_argument(
        '--num_realizations',
        metavar='NUMBER_OF_REALIZATIONS',
        default=None,
        type=np.int32,
        help='Optional argument to specify the number of '
        'ensemble realizations to produce. '
        'If the current forecast is input as probabilities or '
        'percentiles then this argument is used to create the requested '
        'number of realizations. In addition, this argument is used to '
        'construct the requested number of realizations from the mean '
        'and variance output after applying the EMOS coefficients.'
        'Default will be the number of realizations in the raw input '
        'file, if realizations are provided as input, otherwise if the '
        'input format is probabilities or percentiles, then an error '
        'will be raised if no value is provided.')
    parser.add_argument(
        '--random_ordering',
        default=False,
        action='store_true',
        help='Option to reorder the post-processed forecasts randomly. If not '
        'set, the ordering of the raw ensemble is used. This option is '
        'only valid when the input format is realizations.')
    parser.add_argument(
        '--random_seed',
        metavar='RANDOM_SEED',
        default=None,
        help='Option to specify a value for the random seed for testing '
        'purposes, otherwise, the default random seed behaviour is '
        'utilised. The random seed is used in the generation of the '
        'random numbers used for either the random_ordering option to '
        'order the input percentiles randomly, rather than use the '
        'ordering from the raw ensemble, or for splitting tied values '
        'within the raw ensemble, so that the values from the input '
        'percentiles can be ordered to match the raw ensemble.')
    parser.add_argument(
        '--ecc_bounds_warning',
        default=False,
        action='store_true',
        help='If True, where the percentiles exceed the ECC bounds range, '
        'raise a warning rather than an exception. This occurs when the '
        'current forecast is in the form of probabilities and is '
        'converted to percentiles, as part of converting the input '
        'probabilities into realizations.')
    parser.add_argument(
        '--predictor_of_mean',
        metavar='PREDICTOR_OF_MEAN',
        choices=['mean', 'realizations'],
        default='mean',
        help='String to specify the predictor used to calibrate the forecast '
        'mean. Currently the ensemble mean ("mean") and the ensemble '
        'realizations ("realizations") are supported as options. '
        'Default: "mean".')

    args = parser.parse_args(args=argv)

    current_forecast = load_cube(args.forecast_filepath)
    coeffs = load_cube(args.coefficients_filepath)

    original_current_forecast = current_forecast.copy()

    msg = ("The current forecast has been provided as {0}. "
           "These {0} need to be converted to realizations "
           "for ensemble calibration. The args.num_realizations "
           "argument is used to define the number of realizations "
           "to construct from the input {0}, so if the "
           "current forecast is provided as {0} then "
           "args.num_realizations must be defined.")

    try:
        find_percentile_coordinate(current_forecast)
        input_forecast_type = "percentiles"
    except CoordinateNotFoundError:
        input_forecast_type = "realizations"

    if current_forecast.name().startswith("probability_of"):
        input_forecast_type = "probabilities"
        # If probabilities, convert to percentiles.
        conversion_plugin = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=args.ecc_bounds_warning)
    elif input_forecast_type == "percentiles":
        # If percentiles, resample percentiles so that the percentiles are
        # evenly spaced.
        conversion_plugin = ResamplePercentiles(
            ecc_bounds_warning=args.ecc_bounds_warning)

    # If percentiles, resample percentiles and then rebadge.
    # If probabilities, generate percentiles and then rebadge.
    if input_forecast_type in ["percentiles", "probabilities"]:
        if not args.num_realizations:
            raise ValueError(msg.format(input_forecast_type))
        current_forecast = conversion_plugin.process(
            current_forecast, no_of_percentiles=args.num_realizations)
        current_forecast = (
            RebadgePercentilesAsRealizations().process(current_forecast))

    # Default number of ensemble realizations is the number in
    # the raw forecast.
    if not args.num_realizations:
        args.num_realizations = len(
            current_forecast.coord('realization').points)

    # Apply coefficients as part of Ensemble Model Output Statistics (EMOS).
    ac = ApplyCoefficientsFromEnsembleCalibration(
        current_forecast,
        coeffs,
        predictor_of_mean_flag=args.predictor_of_mean)
    calibrated_predictor, calibrated_variance = ac.process()

    # If input forecast is probabilities, convert output into probabilities.
    # If input forecast is percentiles, convert output into percentiles.
    # If input forecast is realizations, convert output into realizations.
    if input_forecast_type == "probabilities":
        result = GenerateProbabilitiesFromMeanAndVariance().process(
            calibrated_predictor, calibrated_variance,
            original_current_forecast)
    elif input_forecast_type == "percentiles":
        perc_coord = find_percentile_coordinate(original_current_forecast)
        result = GeneratePercentilesFromMeanAndVariance().process(
            calibrated_predictor,
            calibrated_variance,
            percentiles=perc_coord.points)
    elif input_forecast_type == "realizations":
        # Ensemble Copula Coupling to generate realizations
        # from mean and variance.
        percentiles = GeneratePercentilesFromMeanAndVariance().process(
            calibrated_predictor,
            calibrated_variance,
            no_of_percentiles=args.num_realizations)
        result = EnsembleReordering().process(
            percentiles,
            current_forecast,
            random_ordering=args.random_ordering,
            random_seed=args.random_seed)
    save_netcdf(result, args.output_filepath)
Exemplo n.º 3
0
def main(argv=None):
    """Load in arguments and get going."""
    parser = ArgParser(
        description="Calculate percentiled data over a given coordinate by "
        "collapsing that coordinate. Typically used to convert realization "
        "data into percentiled data, but may calculate over any "
        "dimension coordinate. Alternatively, calling this CLI with a dataset"
        " containing probabilities will convert those to percentiles using "
        "the ensemble copula coupling plugin. If no particular percentiles "
        "are given at which to calculate values and no 'number of percentiles'"
        " to calculate are specified, the following defaults will be used: "
        "[0, 5, 10, 20, 25, 30, 40, 50, 60, 70, 75, 80, 90, 95, 100]")
    parser.add_argument("input_filepath",
                        metavar="INPUT_FILE",
                        help="A path to an input NetCDF file to be processed")
    parser.add_argument("output_filepath",
                        metavar="OUTPUT_FILE",
                        help="The output path for the processed NetCDF")
    parser.add_argument("--coordinates",
                        metavar="COORDINATES_TO_COLLAPSE",
                        nargs="+",
                        help="Coordinate or coordinates over which to collapse"
                        " data and calculate percentiles; e.g. "
                        "'realization' or 'latitude longitude'. This argument "
                        "must be provided when collapsing a coordinate or "
                        "coordinates to create percentiles, but is redundant "
                        "when converting probabilities to percentiles and may "
                        "be omitted. This coordinate(s) will be removed "
                        "and replaced by a percentile coordinate.")
    parser.add_argument('--ecc_bounds_warning',
                        default=False,
                        action='store_true',
                        help='If True, where calculated percentiles are '
                        'outside the ECC bounds range, raise a warning '
                        'rather than an exception.')
    group = parser.add_mutually_exclusive_group(required=False)
    group.add_argument("--percentiles",
                       metavar="PERCENTILES",
                       nargs="+",
                       default=None,
                       type=float,
                       help="Optional definition of percentiles at which to "
                       "calculate data, e.g. --percentiles 0 33.3 66.6 100")
    group.add_argument('--no-of-percentiles',
                       default=None,
                       type=int,
                       metavar='NUMBER_OF_PERCENTILES',
                       help="Optional definition of the number of percentiles "
                       "to be generated, these distributed regularly with the "
                       "aim of dividing into blocks of equal probability.")

    args = parser.parse_args(args=argv)
    cube = load_cube(args.input_filepath)
    percentiles = args.percentiles
    if args.no_of_percentiles is not None:
        percentiles = choose_set_of_percentiles(args.no_of_percentiles,
                                                sampling="quantile")
    # TODO: Correct when formal cf-standards exists
    if 'probability_of_' in cube.name():
        if args.coordinates:
            warnings.warn("Converting probabilities to percentiles. The "
                          "provided COORDINATES_TO_COLLAPSE variable will "
                          "not be used.")

        result = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=args.ecc_bounds_warning).process(
                cube, percentiles=percentiles)
    else:
        if not args.coordinates:
            raise ValueError("To collapse a coordinate to calculate "
                             "percentiles, a coordinate or list of "
                             "coordinates must be provided.")

        # Switch back to use the slow scipy method if the cube contains masked
        # data which the numpy method cannot handle.
        fast_percentile_method = True

        if np.ma.is_masked(cube.data):
            # Check for masked points:
            fast_percentile_method = False
        elif np.ma.isMaskedArray(cube.data):
            # Check if we have a masked array with an empty mask. If so,
            # replace it with a non-masked array:
            cube.data = cube.data.data

        result = PercentileConverter(
            args.coordinates,
            percentiles=percentiles,
            fast_percentile_method=fast_percentile_method).process(cube)

    save_netcdf(result, args.output_filepath)
Exemplo n.º 4
0
def process(neighbour_cube,
            diagnostic_cube,
            lapse_rate_cube=None,
            apply_lapse_rate_correction=False,
            land_constraint=False,
            minimum_dz=False,
            extract_percentiles=None,
            ecc_bounds_warning=False,
            metadata_dict=None,
            suppress_warnings=False):
    """Module to run spot data extraction.

    Extract diagnostic data from gridded fields for spot data sites. It is
    possible to apply a temperature lapse rate adjustment to temperature data
    that helps to account for differences between the spot site's real altitude
    and that of the grid point from which the temperature data is extracted.

    Args:
        neighbour_cube (iris.cube.Cube):
            Cube of spot-data neighbours and the spot site information.
        diagnostic_cube (iris.cube.Cube):
            Cube containing the diagnostic data to be extracted.
        lapse_rate_cube (iris.cube.Cube):
            Cube containing temperature lapse rates. If this cube is provided
            and a screen temperature cube is being processed, the lapse rates
            will be used to adjust the temperature to better represent each
            spot's site-altitude.
        apply_lapse_rate_correction (bool):
            If True, and a lapse rate cube has been provided, extracted
            screen temperature will be adjusted to better match the altitude
            of the spot site for which they have been extracted.
            Default is False.
        land_constraint (bool):
            If True, the neighbour cube will be interrogated for grid point
            neighbours that were identified using a land constraint. This means
            that the grid points should be land points except for sites where
            none were found within the search radius when the neighbour cube
            was created. May be used with minimum_dz.
            Default is False.
        minimum_dz (bool):
            If True, the neighbour cube will be interrogated for grid point
            neighbours that were identified using the minimum height
            difference constraint. These are grid points that were found to be
            the closest in altitude to the spot site within the search radius
            defined when the neighbour cube was created. May be used with
            land_constraint.
            Default is False.
        extract_percentiles (list or int):
            If set to a percentile value or a list of percentile values,
            data corresponding to those percentiles will be returned. For
            example [25, 50, 75] will result in the 25th, 50th and 75th
            percentiles being returned from a cube of probabilities,
            percentiles or realizations.
            Note that for percentiles inputs, the desired percentile(s) must
            exist in the input cube.
            Default is None.
        ecc_bounds_warning (bool):
            If True, where calculated percentiles are outside the ECC bounds
            range, raises a warning rather than an exception.
            Default is False.
        metadata_dict (dict):
            If provided, this dictionary can be used to modify the metadata
            of the returned cube.
            Default is None.
        suppress_warnings (bool):
            Suppress warning output. This option should only be used if it
            is known that warnings will be generated but they are not required.
            Default is None.

    Returns:
        result (iris.cube.Cube):
           The processed cube.

    Raises:
        ValueError:
            If the percentile diagnostic cube does not contain the requested
            percentile value.
        ValueError:
            If the lapse rate cube was provided but the diagnostic being
            processed is not air temperature.
        ValueError:
            If the lapse rate cube provided does not have the name
            "air_temperature_lapse_rate"
        ValueError:
            If the lapse rate cube does not contain a single valued height
            coordinate.

    Warns:
        warning:
           If diagnostic cube is not a known probabilistic type.
        warning:
            If a lapse rate cube was provided, but the height of the
            temperature does not match that of the data used.
        warning:
            If a lapse rate cube was not provided, but the option to apply
            the lapse rate correction was enabled.

    """
    neighbour_selection_method = NeighbourSelection(
        land_constraint=land_constraint,
        minimum_dz=minimum_dz).neighbour_finding_method_name()
    plugin = SpotExtraction(
        neighbour_selection_method=neighbour_selection_method)
    result = plugin.process(neighbour_cube, diagnostic_cube)

    # If a probability or percentile diagnostic cube is provided, extract
    # the given percentile if available. This is done after the spot-extraction
    # to minimise processing time; usually there are far fewer spot sites than
    # grid points.
    if extract_percentiles is not None:
        try:
            perc_coordinate = find_percentile_coordinate(result)
        except CoordinateNotFoundError:
            if 'probability_of_' in result.name():
                result = GeneratePercentilesFromProbabilities(
                    ecc_bounds_warning=ecc_bounds_warning).process(
                        result, percentiles=extract_percentiles)
                result = iris.util.squeeze(result)
            elif result.coords('realization', dim_coords=True):
                fast_percentile_method = (False if np.ma.isMaskedArray(
                    result.data) else True)
                result = PercentileConverter(
                    'realization',
                    percentiles=extract_percentiles,
                    fast_percentile_method=fast_percentile_method).process(
                        result)
            else:
                msg = ('Diagnostic cube is not a known probabilistic type. '
                       'The {} percentile could not be extracted. Extracting '
                       'data from the cube including any leading '
                       'dimensions.'.format(extract_percentiles))
                if not suppress_warnings:
                    warnings.warn(msg)
        else:
            constraint = [
                '{}={}'.format(perc_coordinate.name(), extract_percentiles)
            ]
            perc_result = extract_subcube(result, constraint)
            if perc_result is not None:
                result = perc_result
            else:
                msg = ('The percentile diagnostic cube does not contain the '
                       'requested percentile value. Requested {}, available '
                       '{}'.format(extract_percentiles,
                                   perc_coordinate.points))
                raise ValueError(msg)
    # Check whether a lapse rate cube has been provided and we are dealing with
    # temperature data and the lapse-rate option is enabled.
    if apply_lapse_rate_correction and lapse_rate_cube:
        if not result.name() == "air_temperature":
            msg = ("A lapse rate cube was provided, but the diagnostic being "
                   "processed is not air temperature and cannot be adjusted.")
            raise ValueError(msg)

        if not lapse_rate_cube.name() == 'air_temperature_lapse_rate':
            msg = ("A cube has been provided as a lapse rate cube but does "
                   "not have the expected name air_temperature_lapse_rate: "
                   "{}".format(lapse_rate_cube.name()))
            raise ValueError(msg)

        try:
            lapse_rate_height_coord = lapse_rate_cube.coord("height")
        except (ValueError, CoordinateNotFoundError):
            msg = ("Lapse rate cube does not contain a single valued height "
                   "coordinate. This is required to ensure it is applied to "
                   "equivalent temperature data.")
            raise ValueError(msg)

        # Check the height of the temperature data matches that used to
        # calculate the lapse rates. If so, adjust temperatures using the lapse
        # rate values.
        if diagnostic_cube.coord("height") == lapse_rate_height_coord:
            plugin = SpotLapseRateAdjust(
                neighbour_selection_method=neighbour_selection_method)
            result = plugin.process(result, neighbour_cube, lapse_rate_cube)
        elif not suppress_warnings:
            warnings.warn(
                "A lapse rate cube was provided, but the height of the "
                "temperature data does not match that of the data used "
                "to calculate the lapse rates. As such the temperatures "
                "were not adjusted with the lapse rates.")

    elif apply_lapse_rate_correction and not lapse_rate_cube:
        if not suppress_warnings:
            warnings.warn(
                "A lapse rate cube was not provided, but the option to "
                "apply the lapse rate correction was enabled. No lapse rate "
                "correction could be applied.")

    # Modify final metadata as described by provided JSON file.
    if metadata_dict:
        result = amend_metadata(result, **metadata_dict)
    # Remove the internal model_grid_hash attribute if present.
    result.attributes.pop('model_grid_hash', None)
    return result
Exemplo n.º 5
0
def main(argv=None):
    """Load in arguments and start spotdata extraction process."""
    parser = ArgParser(
        description="Extract diagnostic data from gridded fields for spot data"
        " sites. It is possible to apply a temperature lapse rate adjustment"
        " to temperature data that helps to account for differences between"
        " the spot sites real altitude and that of the grid point from which"
        " the temperature data is extracted.")

    # Input and output files required.
    parser.add_argument("neighbour_filepath", metavar="NEIGHBOUR_FILEPATH",
                        help="Path to a NetCDF file of spot-data neighbours. "
                        "This file also contains the spot site information.")
    parser.add_argument("diagnostic_filepath", metavar="DIAGNOSTIC_FILEPATH",
                        help="Path to a NetCDF file containing the diagnostic "
                             "data to be extracted.")
    parser.add_argument("temperature_lapse_rate_filepath",
                        metavar="LAPSE_RATE_FILEPATH", nargs='?',
                        help="(Optional) Filepath to a NetCDF file containing"
                        " temperature lapse rates. If this cube is provided,"
                        " and a screen temperature cube is being processed,"
                        " the lapse rates will be used to adjust the"
                        " temperatures to better represent each spot's"
                        " site-altitude.")
    parser.add_argument("output_filepath", metavar="OUTPUT_FILEPATH",
                        help="The output path for the resulting NetCDF")

    parser.add_argument(
        "--apply_lapse_rate_correction",
        default=False, action="store_true",
        help="If the option is set and a lapse rate cube has been "
        "provided, extracted screen temperatures will be adjusted to "
        "better match the altitude of the spot site for which they have "
        "been extracted.")

    method_group = parser.add_argument_group(
        title="Neighbour finding method",
        description="If none of these options are set, the nearest grid point "
        "to a spot site will be used without any other constraints.")
    method_group.add_argument(
        "--land_constraint", default=False, action='store_true',
        help="If set the neighbour cube will be interrogated for grid point"
        " neighbours that were identified using a land constraint. This means"
        " that the grid points should be land points except for sites where"
        " none were found within the search radius when the neighbour cube was"
        " created. May be used with minimum_dz.")
    method_group.add_argument(
        "--minimum_dz", default=False, action='store_true',
        help="If set the neighbour cube will be interrogated for grid point"
        " neighbours that were identified using a minimum height difference"
        " constraint. These are grid points that were found to be the closest"
        " in altitude to the spot site within the search radius defined when"
        " the neighbour cube was created. May be used with land_constraint.")

    percentile_group = parser.add_argument_group(
        title="Extract percentiles",
        description="Extract particular percentiles from probabilistic, "
        "percentile, or realization inputs. If deterministic input is "
        "provided a warning is raised and all leading dimensions are included "
        "in the returned spot-data cube.")
    percentile_group.add_argument(
        "--extract_percentiles", default=None, nargs='+', type=int,
        help="If set to a percentile value or a list of percentile values, "
        "data corresponding to those percentiles will be returned. For "
        "example setting '--extract_percentiles 25 50 75' will result in the "
        "25th, 50th, and 75th percentiles being returned from a cube of "
        "probabilities, percentiles, or realizations. Note that for "
        "percentile inputs, the desired percentile(s) must exist in the input "
        "cube.")
    parser.add_argument(
        "--ecc_bounds_warning", default=False, action="store_true",
        help="If True, where calculated percentiles are outside the ECC "
        "bounds range, raise a warning rather than an exception.")

    meta_group = parser.add_argument_group("Metadata")
    meta_group.add_argument(
        "--metadata_json", metavar="METADATA_JSON", default=None,
        help="If provided, this JSON file can be used to modify the metadata "
        "of the returned netCDF file. Defaults to None.")

    output_group = parser.add_argument_group("Suppress Verbose output")
    # This CLI may be used to prepare data for verification without knowing the
    # form of the input, be it deterministic, realizations or probabilistic.
    # A warning is normally raised when attempting to extract a percentile from
    # deterministic data as this is not possible; the spot-extraction of the
    # entire cube is returned. When preparing data for verification we know
    # that we will produce a large number of these warnings when passing in
    # deterministic data. This option to suppress warnings is provided to
    # reduce the amount of unneeded logging information that is written out.

    output_group.add_argument(
        "--suppress_warnings", default=False, action="store_true",
        help="Suppress warning output. This option should only be used if "
        "it is known that warnings will be generated but they are not "
        "required.")

    args = parser.parse_args(args=argv)
    neighbour_cube = load_cube(args.neighbour_filepath)
    diagnostic_cube = load_cube(args.diagnostic_filepath)

    neighbour_selection_method = NeighbourSelection(
        land_constraint=args.land_constraint,
        minimum_dz=args.minimum_dz).neighbour_finding_method_name()

    plugin = SpotExtraction(
        neighbour_selection_method=neighbour_selection_method)
    result = plugin.process(neighbour_cube, diagnostic_cube)

    # If a probability or percentile diagnostic cube is provided, extract
    # the given percentile if available. This is done after the spot-extraction
    # to minimise processing time; usually there are far fewer spot sites than
    # grid points.
    if args.extract_percentiles:
        try:
            perc_coordinate = find_percentile_coordinate(result)
        except CoordinateNotFoundError:
            if 'probability_of_' in result.name():
                result = GeneratePercentilesFromProbabilities(
                    ecc_bounds_warning=args.ecc_bounds_warning).process(
                        result, percentiles=args.extract_percentiles)
                result = iris.util.squeeze(result)
            elif result.coords('realization', dim_coords=True):
                fast_percentile_method = (
                    False if np.ma.isMaskedArray(result.data) else True)
                result = PercentileConverter(
                    'realization', percentiles=args.extract_percentiles,
                    fast_percentile_method=fast_percentile_method).process(
                        result)
            else:
                msg = ('Diagnostic cube is not a known probabilistic type. '
                       'The {} percentile could not be extracted. Extracting '
                       'data from the cube including any leading '
                       'dimensions.'.format(
                           args.extract_percentiles))
                if not args.suppress_warnings:
                    warnings.warn(msg)
        else:
            constraint = ['{}={}'.format(perc_coordinate.name(),
                                         args.extract_percentiles)]
            perc_result = extract_subcube(result, constraint)
            if perc_result is not None:
                result = perc_result
            else:
                msg = ('The percentile diagnostic cube does not contain the '
                       'requested percentile value. Requested {}, available '
                       '{}'.format(args.extract_percentiles,
                                   perc_coordinate.points))
                raise ValueError(msg)

    # Check whether a lapse rate cube has been provided and we are dealing with
    # temperature data and the lapse-rate option is enabled.
    if (args.temperature_lapse_rate_filepath and
            args.apply_lapse_rate_correction):

        if not result.name() == "air_temperature":
            msg = ("A lapse rate cube was provided, but the diagnostic being "
                   "processed is not air temperature and cannot be adjusted.")
            raise ValueError(msg)

        lapse_rate_cube = load_cube(args.temperature_lapse_rate_filepath)
        if not lapse_rate_cube.name() == 'air_temperature_lapse_rate':
            msg = ("A cube has been provided as a lapse rate cube but does "
                   "not have the expected name air_temperature_lapse_rate: "
                   "{}".format(lapse_rate_cube.name()))
            raise ValueError(msg)

        try:
            lapse_rate_height_coord = lapse_rate_cube.coord("height")
        except (ValueError, CoordinateNotFoundError):
            msg = ("Lapse rate cube does not contain a single valued height "
                   "coordinate. This is required to ensure it is applied to "
                   "equivalent temperature data.")
            raise ValueError(msg)

        # Check the height of the temperature data matches that used to
        # calculate the lapse rates. If so, adjust temperatures using the lapse
        # rate values.
        if diagnostic_cube.coord("height") == lapse_rate_height_coord:
            plugin = SpotLapseRateAdjust(
                neighbour_selection_method=neighbour_selection_method)
            result = plugin.process(result, neighbour_cube, lapse_rate_cube)
        else:
            msg = ("A lapse rate cube was provided, but the height of "
                   "the temperature data does not match that of the data used "
                   "to calculate the lapse rates. As such the temperatures "
                   "were not adjusted with the lapse rates.")
            if not args.suppress_warnings:
                warnings.warn(msg)
    elif (args.apply_lapse_rate_correction and
          not args.temperature_lapse_rate_filepath):
        msg = ("A lapse rate cube was not provided, but the option to "
               "apply the lapse rate correction was enabled. No lapse rate "
               "correction could be applied.")
        if not args.suppress_warnings:
            warnings.warn(msg)

    # Modify final metadata as described by provided JSON file.
    if args.metadata_json:
        with open(args.metadata_json, 'r') as input_file:
            metadata_dict = json.load(input_file)
        result = amend_metadata(result, **metadata_dict)

    # Remove the internal model_grid_hash attribute if present.
    result.attributes.pop('model_grid_hash', None)

    # Save the spot data cube.
    save_netcdf(result, args.output_filepath)
def process(cube,
            raw_forecast=None,
            no_of_realizations=None,
            reordering=False,
            rebadging=False,
            random_seed=None,
            ecc_bounds_warning=False):
    """Convert from probabilities to ensemble realizations.

    Args:
        cube (iris.cube.Cube):
            Cube to be processed.
        raw_forecast (iris.cube.Cube):
            Cube of raw (not post processed) weather data.
            This option is compulsory, if the reordering option is selected.
        no_of_realizations (int):
            Optional definition of the number of ensemble realizations to
            be generated. These are generated though an intermediate
            percentile representation. Theses percentiles will be
            distributed regularly with the aim of dividing into blocks
            of equal probability. If the reordering option is specified
            and the number of realization is not given the number
            of realizations is taken from the number of realizations
            in the raw forecast cube.
            Default is None.
        reordering (bool):
            The option used to create ensemble realizations from percentiles
            by reordering the input percentiles based on the order of the
            raw ensemble.
            Default is False.
        rebadging (bool):
            Th option used to create ensemble realizations from percentiles
            by rebadging the input percentiles.
            Default is False.
        random_seed (int):
            Option to specify a value for the random seed for testing
            purposes, otherwise the default random seed behaviours is
            utilised. The random seed is used in the generation of the
            random numbers used for splitting tied values within the raw
            ensemble, so that the values from the input percentiles can
            be ordered to match the raw ensemble.
            Default is None.
        ecc_bounds_warning (bool):
            If True, where percentiles (calculated as an intermediate output
            before realization) exceed to ECC bounds range, raises a warning
            rather than an exception.
            Default is False.

    Returns:
        iris.cube.Cube:
            Processed result Cube.

    Raises:
        TypeError:
            If rebadging is used with raw_forecast.
        TypeError:
            If rebadging is used with random_seed.
        ValueError:
            If raw_forecast isn't supplied when using reordering.
    """
    if rebadging:
        if raw_forecast is not None:
            raise TypeError('rebadging cannot be used with raw_forecast.')
        if random_seed is not None:
            raise TypeError('rebadging cannot be used with random_seed.')

    if reordering:
        no_of_realizations = no_of_realizations
        # If no_of_realizations is not given, take the number from the raw
        # ensemble cube.
        if no_of_realizations is None:
            no_of_realizations = len(raw_forecast.coord("realization").points)
            if raw_forecast is None:
                message = ("You must supply a raw forecast cube if using the "
                           "reordering option.")
                raise ValueError(message)

        cube = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=ecc_bounds_warning).process(
                cube, no_of_percentiles=no_of_realizations)
        result = EnsembleReordering().process(cube,
                                              raw_forecast,
                                              random_ordering=False,
                                              random_seed=random_seed)
    elif rebadging:
        cube = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=ecc_bounds_warning).process(
                cube, no_of_percentiles=no_of_realizations)
        result = RebadgePercentilesAsRealizations().process(cube)
    return result
Exemplo n.º 7
0
def main(argv=None):
    """Convert from probabilities to ensemble realizations via a CLI."""

    cli_specific_arguments = [(['--no_of_realizations'], {
        'metavar':
        'NUMBER_OF_REALIZATIONS',
        'default':
        None,
        'type':
        int,
        'help':
        ("Optional definition of the number of ensemble realizations to "
         "be generated. These are generated through an intermediate "
         "percentile representation. These percentiles will be "
         "distributed regularly with the aim of dividing into blocks of "
         "equal probability. If the reordering option is specified and "
         "the number of realizations is not given then the number of "
         "realizations is taken from the number of realizations in the "
         "raw forecast NetCDF file.")
    })]

    cli_definition = {
        'central_arguments': ('input_file', 'output_file'),
        'specific_arguments':
        cli_specific_arguments,
        'description': ('Convert a dataset containing '
                        'probabilities into one containing '
                        'ensemble realizations.')
    }
    parser = ArgParser(**cli_definition)
    # add mutually exculsive options rebadge and reorder.
    # If reordering add option for raw ensemble - raise error if
    # raw ens missing.
    group = parser.add_mutually_exclusive_group(required=True)

    group.add_argument('--reordering',
                       default=False,
                       action='store_true',
                       help='The option used to create ensemble realizations '
                       'from percentiles by reordering the input '
                       'percentiles based on the order of the '
                       'raw ensemble forecast.')
    group.add_argument('--rebadging',
                       default=False,
                       action='store_true',
                       help='The option used to create ensemble realizations '
                       'from percentiles by rebadging the input '
                       'percentiles.')

    # If reordering, we need a raw ensemble forecast.
    reordering = parser.add_argument_group(
        'Reordering options', 'Options for reordering the input percentiles '
        'using the raw ensemble forecast as required to create ensemble '
        'realizations.')
    reordering.add_argument('--raw_forecast_filepath',
                            metavar='RAW_FORECAST_FILE',
                            help='A path to an raw forecast NetCDF file to be '
                            'processed. This option is compulsory, if the '
                            'reordering option is selected.')
    reordering.add_argument(
        '--random_seed',
        default=None,
        help='Option to specify a value for the random seed for testing '
        'purposes, otherwise, the default random seed behaviour is '
        'utilised. The random seed is used in the generation of the '
        'random numbers used for splitting tied values '
        'within the raw ensemble, so that the values from the input '
        'percentiles can be ordered to match the raw ensemble.')
    reordering.add_argument(
        '--ecc_bounds_warning',
        default=False,
        action='store_true',
        help='If True, where percentiles (calculated as an intermediate '
        'output before realizations) exceed the ECC bounds range, raise '
        'a warning rather than an exception.')

    args = parser.parse_args(args=argv)

    # CLI argument checking:
    # Can only do one of reordering or rebadging: if options are passed which
    # correspond to the opposite method, raise an exception.
    # Note: Shouldn't need to check that both/none are set, since they are
    # defined as mandatory, but mutually exclusive, options.
    if args.rebadging:
        if ((args.raw_forecast_filepath is not None)
                or (args.random_seed is not None)):
            parser.wrong_args_error('raw_forecast_filepath, random_seed',
                                    'rebadging')

    # Process the data
    cube = load_cube(args.input_filepath)

    if args.reordering:
        if args.raw_forecast_filepath is None:
            message = ("You must supply a raw forecast filepath if using the "
                       "reordering option.")
            raise ValueError(message)
        else:
            raw_forecast = load_cube(args.raw_forecast_filepath)
            try:
                raw_forecast.coord("realization")
            except CoordinateNotFoundError:
                message = ("The netCDF file from the raw_forecast_filepath "
                           "must have a realization coordinate.")
                raise ValueError(message)

        no_of_realizations = args.no_of_realizations
        # If no_of_realizations is not given, take the number from the raw
        # ensemble cube.
        if args.no_of_realizations is None:
            no_of_realizations = len(raw_forecast.coord("realization").points)

        cube = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=args.ecc_bounds_warning).process(
                cube, no_of_percentiles=no_of_realizations)
        cube = EnsembleReordering().process(cube,
                                            raw_forecast,
                                            random_ordering=False,
                                            random_seed=args.random_seed)
    elif args.rebadging:
        cube = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=args.ecc_bounds_warning).process(
                cube, no_of_percentiles=args.no_of_realizations)
        cube = RebadgePercentilesAsRealizations().process(cube)

    save_netcdf(cube, args.output_filepath)
Exemplo n.º 8
0
def process(current_forecast, coeffs, landsea_mask, num_realizations=None,
            random_ordering=False, random_seed=None,
            ecc_bounds_warning=False, predictor_of_mean='mean'):
    """Applying coefficients for Ensemble Model Output Statistics.

    Load in arguments for applying coefficients for Ensemble Model Output
    Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
    Regression (NGR). The coefficients are applied to the forecast
    that is supplied, so as to calibrate the forecast. The calibrated
    forecast is written to a cube. If no coefficients are provided the input
    forecast is returned unchanged.

    Args:
        current_forecast (iris.cube.Cube):
            A Cube containing the forecast to be calibrated. The input format
            could be either realizations, probabilities or percentiles.
        coeffs (iris.cube.Cube or None):
            A cube containing the coefficients used for calibration or None.
            If none then then current_forecast is returned unchanged.
        landsea_mask (iris.cube.Cube or None):
            A cube containing the land-sea mask on the same domain as the
            forecast that is to be calibrated. Land points are "
            "specified by ones and sea points are specified by zeros. "
            "If not None this argument will enable land-only calibration, in "
            "which sea points are returned without the application of "
            "calibration."
        num_realizations (numpy.int32):
            Optional argument to specify the number of ensemble realizations
            to produce. If the current forecast is input as probabilities or
            percentiles then this argument is used to create the requested
            number of realizations. In addition, this argument is used to
            construct the requested number of realizations from the mean and
            variance output after applying the EMOS coefficients.
            Default is None.
        random_ordering (bool):
            Option to reorder the post-processed forecasts randomly. If not
            set, the ordering of the raw ensemble is used. This option is
            only valid when the input format is realizations.
            Default is False.
        random_seed (int):
            Option to specify a value for the random seed for testing
            purposes, otherwise the default random seen behaviour is utilised.
            The random seed is used in the generation of the random numbers
            used for either the random_ordering option to order the input
            percentiles randomly, rather than use the ordering from the raw
            ensemble, or for splitting tied values within the raw ensemble,
            so that the values from the input percentiles can be ordered to
            match the raw ensemble.
            Default is None.
        ecc_bounds_warning (bool):
            If True, where the percentiles exceed the ECC bounds range,
            raises a warning rather than an exception. This occurs when the
            current forecasts is in the form of probabilities and is
            converted to percentiles, as part of converting the input
            probabilities into realizations.
            Default is False.
        predictor_of_mean (str):
            String to specify the predictor used to calibrate the forecast
            mean. Currently the ensemble mean "mean" as the ensemble
            realization "realization" are supported as options.
            Default is 'mean'

    Returns:
        iris.cube.Cube:
            The calibrated forecast cube.

    Raises:
        ValueError:
            If the current forecast is a coefficients cube.
        ValueError:
            If the coefficients cube does not have the right name of
            "emos_coefficients".
        ValueError:
            If the forecast type is 'percentiles' or 'probabilities' while no
            num_realizations are given.

    """
    if coeffs is None:
        msg = ("There are no coefficients provided for calibration. The "
               "uncalibrated forecast will be returned.")
        warnings.warn(msg)
        return current_forecast

    elif coeffs.name() != 'emos_coefficients':
        msg = ("The current coefficients cube does not have the "
               "name 'emos_coefficients'")
        raise ValueError(msg)

    if current_forecast.name() == 'emos_coefficients':
        msg = "The current forecast cube has the name 'emos_coefficients'"
        raise ValueError(msg)

    original_current_forecast = current_forecast.copy()
    try:
        find_percentile_coordinate(current_forecast)
        input_forecast_type = "percentiles"
    except CoordinateNotFoundError:
        input_forecast_type = "realizations"

    if current_forecast.name().startswith("probability_of"):
        input_forecast_type = "probabilities"
        # If probabilities, convert to percentiles.
        conversion_plugin = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=ecc_bounds_warning)
    elif input_forecast_type == "percentiles":
        # If percentiles, resample percentiles so that the percentiles are
        # evenly spaced.
        conversion_plugin = ResamplePercentiles(
            ecc_bounds_warning=ecc_bounds_warning)

    # If percentiles, re-sample percentiles and then re-badge.
    # If probabilities, generate percentiles and then re-badge.
    if input_forecast_type in ["percentiles", "probabilities"]:
        if not num_realizations:
            raise ValueError(
                "The current forecast has been provided as {0}. "
                "These {0} need to be converted to realizations "
                "for ensemble calibration. The num_realizations "
                "argument is used to define the number of realizations "
                "to construct from the input {0}, so if the "
                "current forecast is provided as {0} then "
                "num_realizations must be defined.".format(
                    input_forecast_type))
        current_forecast = conversion_plugin.process(
            current_forecast, no_of_percentiles=num_realizations)
        current_forecast = (
            RebadgePercentilesAsRealizations().process(current_forecast))

    # Default number of ensemble realizations is the number in
    # the raw forecast.
    if not num_realizations:
        num_realizations = len(
            current_forecast.coord('realization').points)

    # Apply coefficients as part of Ensemble Model Output Statistics (EMOS).
    ac = ApplyCoefficientsFromEnsembleCalibration(
        predictor_of_mean_flag=predictor_of_mean)
    calibrated_predictor, calibrated_variance = ac.process(
        current_forecast, coeffs, landsea_mask=landsea_mask)

    # If input forecast is probabilities, convert output into probabilities.
    # If input forecast is percentiles, convert output into percentiles.
    # If input forecast is realizations, convert output into realizations.
    if input_forecast_type == "probabilities":
        result = GenerateProbabilitiesFromMeanAndVariance().process(
            calibrated_predictor, calibrated_variance,
            original_current_forecast)
    elif input_forecast_type == "percentiles":
        perc_coord = find_percentile_coordinate(original_current_forecast)
        result = GeneratePercentilesFromMeanAndVariance().process(
            calibrated_predictor, calibrated_variance,
            percentiles=perc_coord.points)
    elif input_forecast_type == "realizations":
        # Ensemble Copula Coupling to generate realizations
        # from mean and variance.
        percentiles = GeneratePercentilesFromMeanAndVariance().process(
            calibrated_predictor, calibrated_variance,
            no_of_percentiles=num_realizations)
        result = EnsembleReordering().process(
            percentiles, current_forecast,
            random_ordering=random_ordering, random_seed=random_seed)
    return result
Exemplo n.º 9
0
def process(cube: cli.inputcube,
            raw_cube: cli.inputcube = None,
            *,
            realizations_count: int = None,
            random_seed: int = None,
            ignore_ecc_bounds=False):
    """Convert probabilities to ensemble realizations using Ensemble Coupla
    Coupling.

    Probabilities are first converted to percentiles, which are then either
    rebadged as realizations or reordered if the raw_cube argument is given.

    Args:
        cube (iris.cube.Cube):
            Cube to be processed.
        raw_cube (iris.cube.Cube):
            Cube of raw (not post processed) weather data.
            If this argument is given ensemble realizations will be created
            from percentiles by reshuffling them in correspondance to the rank
            order of the raw ensemble. Otherwise, the percentiles are rebadged
            as realizations.
        realizations_count (int):
            Optional definition of the number of ensemble realizations to
            be generated. These are generated though an intermediate
            percentile representation. Theses percentiles will be
            distributed regularly with the aim of dividing into blocks
            of equal probability. If the raw_cube is given
            and the number of realization is not given the number
            of realizations is taken from the number of realizations
            in the raw_cube.
        random_seed (int):
            Option to specify a value for the random seed for testing
            purposes, otherwise the default random seed behaviours is
            utilised. The random seed is used in the generation of the
            random numbers used for splitting tied values within the raw
            ensemble, so that the values from the input percentiles can
            be ordered to match the raw ensemble.
        ignore_ecc_bounds (bool):
            If True, where percentiles (calculated as an intermediate output
            before realization) exceed to ECC bounds range, raises a warning
            rather than an exception.

    Returns:
        iris.cube.Cube:
            Processed result Cube.
    """
    from improver.ensemble_copula_coupling.ensemble_copula_coupling import (
        GeneratePercentilesFromProbabilities, RebadgePercentilesAsRealizations,
        EnsembleReordering)

    if realizations_count is None and raw_cube:
        # If realizations_count is not given, take the number from the raw
        # ensemble cube.
        realizations_count = len(raw_cube.coord("realization").points)

    result = GeneratePercentilesFromProbabilities(
        ecc_bounds_warning=ignore_ecc_bounds).process(
            cube, no_of_percentiles=realizations_count)

    if raw_cube:
        result = EnsembleReordering().process(result,
                                              raw_cube,
                                              random_ordering=False,
                                              random_seed=random_seed)
    else:
        result = RebadgePercentilesAsRealizations().process(result)

    return result
Exemplo n.º 10
0
def process(neighbour_cube: cli.inputcube,
            cube: cli.inputcube,
            lapse_rate: cli.inputcube = None,
            *,
            apply_lapse_rate_correction=False,
            land_constraint=False,
            similar_altitude=False,
            extract_percentiles: cli.comma_separated_list = None,
            ignore_ecc_bounds=False,
            new_title: str = None,
            suppress_warnings=False):
    """Module to run spot data extraction.

    Extract diagnostic data from gridded fields for spot data sites. It is
    possible to apply a temperature lapse rate adjustment to temperature data
    that helps to account for differences between the spot site's real altitude
    and that of the grid point from which the temperature data is extracted.

    Args:
        neighbour_cube (iris.cube.Cube):
            Cube of spot-data neighbours and the spot site information.
        cube (iris.cube.Cube):
            Cube containing the diagnostic data to be extracted.
        lapse_rate (iris.cube.Cube):
            Optional cube containing temperature lapse rates. If this cube is
            provided and a screen temperature cube is being processed, the
            lapse rates will be used to adjust the temperature to better
            represent each spot's site-altitude.
        apply_lapse_rate_correction (bool):
            Use to apply a lapse-rate correction to screen temperature data so
            that the data are a better match the altitude of the spot site for
            which they have been extracted.
        land_constraint (bool):
            Use to select the nearest-with-land-constraint neighbour-selection
            method from the neighbour_cube. This means that the grid points
            should be land points except for sites where none were found within
            the search radius when the neighbour cube was created. May be used
            with similar_altitude.
        similar_altitude (bool):
            Use to select the nearest-with-height-constraint
            neighbour-selection method from the neighbour_cube. These are grid
            points that were found to be the closest in altitude to the spot
            site within the search radius defined when the neighbour cube was
            created. May be used with land_constraint.
        extract_percentiles (list or int):
            If set to a percentile value or a list of percentile values,
            data corresponding to those percentiles will be returned. For
            example "25, 50, 75" will result in the 25th, 50th and 75th
            percentiles being returned from a cube of probabilities,
            percentiles or realizations. Deterministic input data will raise
            a warning message.
            Note that for percentiles inputs, the desired percentile(s) must
            exist in the input cube.
        ignore_ecc_bounds (bool):
            Demotes exceptions where calculated percentiles are outside the ECC
            bounds range to warnings.
        new_title (str):
            New title for the spot-extracted data.  If None, this attribute is
            removed from the output cube since it has no prescribed standard
            and may therefore contain grid information that is no longer
            correct after spot-extraction.
        suppress_warnings (bool):
            Suppress warning output. This option should only be used if it
            is known that warnings will be generated but they are not required.

    Returns:
        iris.cube.Cube:
           Cube of spot data.

    Raises:
        ValueError:
            If the percentile diagnostic cube does not contain the requested
            percentile value.
        ValueError:
            If the lapse rate cube was provided but the diagnostic being
            processed is not air temperature.
        ValueError:
            If the lapse rate cube provided does not have the name
            "air_temperature_lapse_rate"
        ValueError:
            If the lapse rate cube does not contain a single valued height
            coordinate.

    Warns:
        warning:
           If diagnostic cube is not a known probabilistic type.
        warning:
            If a lapse rate cube was provided, but the height of the
            temperature does not match that of the data used.
        warning:
            If a lapse rate cube was not provided, but the option to apply
            the lapse rate correction was enabled.

    """

    import warnings

    import iris
    import numpy as np
    from iris.exceptions import CoordinateNotFoundError

    from improver.ensemble_copula_coupling.ensemble_copula_coupling import \
        GeneratePercentilesFromProbabilities
    from improver.metadata.probabilistic import find_percentile_coordinate
    from improver.percentile import PercentileConverter
    from improver.spotdata.apply_lapse_rate import SpotLapseRateAdjust
    from improver.spotdata.neighbour_finding import NeighbourSelection
    from improver.spotdata.spot_extraction import SpotExtraction
    from improver.utilities.cube_extraction import extract_subcube

    neighbour_selection_method = NeighbourSelection(
        land_constraint=land_constraint,
        minimum_dz=similar_altitude).neighbour_finding_method_name()
    plugin = SpotExtraction(
        neighbour_selection_method=neighbour_selection_method)
    result = plugin.process(neighbour_cube, cube, new_title=new_title)

    # If a probability or percentile diagnostic cube is provided, extract
    # the given percentile if available. This is done after the spot-extraction
    # to minimise processing time; usually there are far fewer spot sites than
    # grid points.
    if extract_percentiles:
        extract_percentiles = [np.float32(x) for x in extract_percentiles]
        try:
            perc_coordinate = find_percentile_coordinate(result)
        except CoordinateNotFoundError:
            if 'probability_of_' in result.name():
                result = GeneratePercentilesFromProbabilities(
                    ecc_bounds_warning=ignore_ecc_bounds).process(
                        result, percentiles=extract_percentiles)
                result = iris.util.squeeze(result)
            elif result.coords('realization', dim_coords=True):
                fast_percentile_method = (False if np.ma.isMaskedArray(
                    result.data) else True)
                result = PercentileConverter(
                    'realization',
                    percentiles=extract_percentiles,
                    fast_percentile_method=fast_percentile_method).process(
                        result)
            else:
                msg = ('Diagnostic cube is not a known probabilistic type. '
                       'The {} percentile could not be extracted. Extracting '
                       'data from the cube including any leading '
                       'dimensions.'.format(extract_percentiles))
                if not suppress_warnings:
                    warnings.warn(msg)
        else:
            constraint = [
                '{}={}'.format(perc_coordinate.name(), extract_percentiles)
            ]
            perc_result = extract_subcube(result, constraint)
            if perc_result is not None:
                result = perc_result
            else:
                msg = ('The percentile diagnostic cube does not contain the '
                       'requested percentile value. Requested {}, available '
                       '{}'.format(extract_percentiles,
                                   perc_coordinate.points))
                raise ValueError(msg)
    # Check whether a lapse rate cube has been provided and we are dealing with
    # temperature data and the lapse-rate option is enabled.
    if apply_lapse_rate_correction and lapse_rate:
        if not result.name() == "air_temperature":
            msg = ("A lapse rate cube was provided, but the diagnostic being "
                   "processed is not air temperature and cannot be adjusted.")
            raise ValueError(msg)

        if not lapse_rate.name() == 'air_temperature_lapse_rate':
            msg = ("A cube has been provided as a lapse rate cube but does "
                   "not have the expected name air_temperature_lapse_rate: "
                   "{}".format(lapse_rate.name()))
            raise ValueError(msg)

        try:
            lapse_rate_height_coord = lapse_rate.coord("height")
        except (ValueError, CoordinateNotFoundError):
            msg = ("Lapse rate cube does not contain a single valued height "
                   "coordinate. This is required to ensure it is applied to "
                   "equivalent temperature data.")
            raise ValueError(msg)

        # Check the height of the temperature data matches that used to
        # calculate the lapse rates. If so, adjust temperatures using the lapse
        # rate values.
        if cube.coord("height") == lapse_rate_height_coord:
            plugin = SpotLapseRateAdjust(
                neighbour_selection_method=neighbour_selection_method)
            result = plugin.process(result, neighbour_cube, lapse_rate)
        elif not suppress_warnings:
            warnings.warn(
                "A lapse rate cube was provided, but the height of the "
                "temperature data does not match that of the data used "
                "to calculate the lapse rates. As such the temperatures "
                "were not adjusted with the lapse rates.")

    elif apply_lapse_rate_correction and not lapse_rate:
        if not suppress_warnings:
            warnings.warn(
                "A lapse rate cube was not provided, but the option to "
                "apply the lapse rate correction was enabled. No lapse rate "
                "correction could be applied.")

    # Remove the internal model_grid_hash attribute if present.
    result.attributes.pop('model_grid_hash', None)
    return result
def main(argv=None):
    """Do ensemble calibration using the EnsembleCalibration plugin.
    """
    parser = ArgParser(
        description='Apply the requested ensemble calibration method using '
        'the current forecast (to be calibrated) in the form of '
        'realizations, probabilities, or percentiles, historical '
        'forecasts in the form of realizations and historical truth data '
        '(to use in calibration). The mean and variance output from the '
        'EnsembleCalibration plugin can be written to an output file '
        'if required. If the current forecast is supplied in the form of '
        'probabilities or percentiles, these are converted to realizations '
        'prior to calibration. After calibration, the mean and variance '
        'computed in the calibration are converted to match the format of the '
        'current forecast i.e. if realizations are input, realizations '
        'are output, if probabilities are input, probabilities are output, '
        'and if percentiles are input, percentiles are output.'
        'If realizations are input, realizations are regenerated using '
        'Ensemble Copula Coupling.')
    # Arguments for EnsembleCalibration
    parser.add_argument(
        'units',
        metavar='UNITS_TO_CALIBRATE_IN',
        help='The unit that calibration should be undertaken in. The current '
        'forecast, historical forecast and truth will be converted as '
        'required.')
    parser.add_argument(
        'distribution',
        metavar='DISTRIBUTION',
        choices=['gaussian', 'truncated gaussian'],
        help='The distribution that will be used for calibration. This will '
        'be dependent upon the input phenomenon. This has to be '
        'supported by the minimisation functions in '
        'ContinuousRankedProbabilityScoreMinimisers.')
    # Filepaths for current, historic and truth data.
    parser.add_argument(
        'input_filepath',
        metavar='INPUT_FILE',
        help='A path to an input NetCDF file containing the current forecast '
        'to be processed. The file provided could be in the form of '
        'realizations, probabilities or percentiles.')
    parser.add_argument(
        'historic_filepath',
        metavar='HISTORIC_DATA_FILE',
        help='A path to an input NetCDF file containing the historic '
        'forecast(s) used for calibration. The file provided must be in '
        'the form of realizations.')
    parser.add_argument(
        'truth_filepath',
        metavar='TRUTH_DATA_FILE',
        help='A path to an input NetCDF file containing the historic truth '
        'analyses used for calibration.')
    parser.add_argument('output_filepath',
                        metavar='OUTPUT_FILE',
                        help='The output path for the processed NetCDF')
    # Optional arguments.
    parser.add_argument(
        '--predictor_of_mean',
        metavar='CALIBRATE_MEAN_FLAG',
        choices=['mean', 'realizations'],
        default='mean',
        help='String to specify the input to calculate the calibrated mean. '
        'Currently the ensemble mean ("mean") and the ensemble '
        'realizations ("realizations") are supported as the predictors. '
        'Default: "mean".')
    parser.add_argument(
        '--save_mean',
        metavar='MEAN_FILE',
        default=False,
        help='Option to save the mean output from EnsembleCalibration plugin. '
        'If used, a path to save the output to must be provided.')
    parser.add_argument(
        '--save_variance',
        metavar='VARIANCE_FILE',
        default=False,
        help='Option to save the variance output from EnsembleCalibration '
        'plugin. If used, a path to save the output to must be provided.')
    parser.add_argument(
        '--num_realizations',
        metavar='NUMBER_OF_REALIZATIONS',
        default=None,
        type=np.int32,
        help='Optional argument to specify the number of '
        'ensemble realizations to produce. '
        'If the current forecast is input as probabilities or '
        'percentiles then this argument is used to create the requested '
        'number of realizations. In addition, this argument is used to '
        'construct the requested number of realizations from the mean '
        'and variance output after applying the EMOS coefficients.'
        'Default will be the number of realizations in the raw input '
        'file, if realizations are provided as input, otherwise if the '
        'input format is probabilities or percentiles, then an error '
        'will be raised if no value is provided.')
    parser.add_argument(
        '--random_ordering',
        default=False,
        action='store_true',
        help='Option to reorder the post-processed forecasts randomly. If not '
        'set, the ordering of the raw ensemble is used. This option is '
        'only valid when the input format is realizations.')
    parser.add_argument(
        '--random_seed',
        metavar='RANDOM_SEED',
        default=None,
        help='Option to specify a value for the random seed for testing '
        'purposes, otherwise, the default random seed behaviour is '
        'utilised. The random seed is used in the generation of the '
        'random numbers used for either the random_ordering option to '
        'order the input percentiles randomly, rather than use the '
        'ordering from the raw ensemble, or for splitting tied values '
        'within the raw ensemble, so that the values from the input '
        'percentiles can be ordered to match the raw ensemble.')
    parser.add_argument(
        '--ecc_bounds_warning',
        default=False,
        action='store_true',
        help='If True, where the percentiles exceed the ECC bounds range, '
        'raise a warning rather than an exception. This occurs when the '
        'current forecast is in the form of probabilities and is '
        'converted to percentiles, as part of converting the input '
        'probabilities into realizations.')
    parser.add_argument(
        '--max_iterations',
        metavar='MAX_ITERATIONS',
        type=np.int32,
        default=1000,
        help='The maximum number of iterations allowed until the minimisation '
        'has converged to a stable solution. If the maximum number of '
        'iterations is reached, but the minimisation has not yet '
        'converged to a stable solution, then the available solution is '
        'used anyway, and a warning is raised. This may be modified for '
        'testing purposes but otherwise kept fixed. If the '
        'predictor_of_mean is "realizations", then the number of '
        'iterations may require increasing, as there will be more '
        'coefficients to solve for.')
    args = parser.parse_args(args=argv)

    current_forecast = load_cube(args.input_filepath)
    historic_forecast = load_cube(args.historic_filepath)
    truth = load_cube(args.truth_filepath)

    original_current_forecast = current_forecast.copy()

    msg = ("The current forecast has been provided as {0}. "
           "These {0} need to be converted to realizations "
           "for ensemble calibration. The args.num_realizations "
           "argument is used to define the number of realizations "
           "to construct from the input {0}, so if the "
           "current forecast is provided as {0} then "
           "args.num_realizations must be defined.")

    try:
        find_percentile_coordinate(current_forecast)
        input_forecast_type = "percentiles"
    except CoordinateNotFoundError:
        input_forecast_type = "realizations"

    if current_forecast.name().startswith("probability_of"):
        input_forecast_type = "probabilities"
        # If probabilities, convert to percentiles.
        conversion_plugin = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=args.ecc_bounds_warning)
    elif input_forecast_type == "percentiles":
        # If percentiles, resample percentiles so that the percentiles are
        # evenly spaced.
        conversion_plugin = ResamplePercentiles(
            ecc_bounds_warning=args.ecc_bounds_warning)

    # If percentiles, resample percentiles and then rebadge.
    # If probabilities, generate percentiles and then rebadge.
    if input_forecast_type in ["percentiles", "probabilities"]:
        if not args.num_realizations:
            raise ValueError(msg.format(input_forecast_type))
        current_forecast = conversion_plugin.process(
            current_forecast, no_of_percentiles=args.num_realizations)
        current_forecast = (
            RebadgePercentilesAsRealizations().process(current_forecast))

    # Default number of ensemble realizations is the number in
    # the raw forecast.
    if not args.num_realizations:
        args.num_realizations = len(
            current_forecast.coord('realization').points)

    # Ensemble-Calibration to calculate the mean and variance.
    forecast_predictor, forecast_variance = EnsembleCalibration(
        args.distribution,
        args.units,
        predictor_of_mean_flag=args.predictor_of_mean,
        max_iterations=args.max_iterations).process(current_forecast,
                                                    historic_forecast, truth)

    # If required, save the mean and variance.
    if args.save_mean:
        save_netcdf(forecast_predictor, args.save_mean)
    if args.save_variance:
        save_netcdf(forecast_variance, args.save_variance)

    # If input forecast is probabilities, convert output into probabilities.
    # If input forecast is percentiles, convert output into percentiles.
    # If input forecast is realizations, convert output into realizations.
    if input_forecast_type == "probabilities":
        result = GenerateProbabilitiesFromMeanAndVariance().process(
            forecast_predictor, forecast_variance, original_current_forecast)
    elif input_forecast_type == "percentiles":
        perc_coord = find_percentile_coordinate(original_current_forecast)
        result = GeneratePercentilesFromMeanAndVariance().process(
            forecast_predictor,
            forecast_variance,
            percentiles=perc_coord.points)
    elif input_forecast_type == "realizations":
        # Ensemble Copula Coupling to generate realizations
        # from mean and variance.
        percentiles = GeneratePercentilesFromMeanAndVariance().process(
            forecast_predictor,
            forecast_variance,
            no_of_percentiles=args.num_realizations)
        result = EnsembleReordering().process(
            percentiles,
            current_forecast,
            random_ordering=args.random_ordering,
            random_seed=args.random_seed)
    save_netcdf(result, args.output_filepath)
Exemplo n.º 12
0
def process(cube: cli.inputcube,
            coefficients: cli.inputcube = None,
            land_sea_mask: cli.inputcube = None,
            *,
            distribution,
            realizations_count: int = None,
            randomise=False,
            random_seed: int = None,
            ignore_ecc_bounds=False,
            predictor_of_mean='mean',
            shape_parameters: cli.comma_separated_list = None):
    """Applying coefficients for Ensemble Model Output Statistics.

    Load in arguments for applying coefficients for Ensemble Model Output
    Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
    Regression (NGR). The coefficients are applied to the forecast
    that is supplied, so as to calibrate the forecast. The calibrated
    forecast is written to a cube. If no coefficients are provided the input
    forecast is returned unchanged.

    Args:
        cube (iris.cube.Cube):
            A Cube containing the forecast to be calibrated. The input format
            could be either realizations, probabilities or percentiles.
        coefficients (iris.cube.Cube):
            A cube containing the coefficients used for calibration or None.
            If none then then input is returned unchanged.
        land_sea_mask (iris.cube.Cube):
            A cube containing the land-sea mask on the same domain as the
            forecast that is to be calibrated. Land points are "
            "specified by ones and sea points are specified by zeros. "
            "If not None this argument will enable land-only calibration, in "
            "which sea points are returned without the application of "
            "calibration."
        distribution (str):
            The distribution for constructing realizations, percentiles or
            probabilities. This should typically match the distribution used
            for minimising the Continuous Ranked Probability Score when
            estimating the EMOS coefficients. The distributions available are
            those supported by :data:`scipy.stats`.
        realizations_count (int):
            Optional argument to specify the number of ensemble realizations
            to produce. If the current forecast is input as probabilities or
            percentiles then this argument is used to create the requested
            number of realizations. In addition, this argument is used to
            construct the requested number of realizations from the mean and
            variance output after applying the EMOS coefficients.
        randomise (bool):
            Option to reorder the post-processed forecasts randomly. If not
            set, the ordering of the raw ensemble is used. This option is
            only valid when the input format is realizations.
        random_seed (int):
            Option to specify a value for the random seed for testing
            purposes, otherwise the default random seen behaviour is utilised.
            The random seed is used in the generation of the random numbers
            used for either the randomise option to order the input
            percentiles randomly, rather than use the ordering from the raw
            ensemble, or for splitting tied values within the raw ensemble,
            so that the values from the input percentiles can be ordered to
            match the raw ensemble.
        ignore_ecc_bounds (bool):
            If True, where the percentiles exceed the ECC bounds range,
            raises a warning rather than an exception. This occurs when the
            current forecasts is in the form of probabilities and is
            converted to percentiles, as part of converting the input
            probabilities into realizations.
        predictor_of_mean (str):
            String to specify the predictor used to calibrate the forecast
            mean. Currently the ensemble mean "mean" as the ensemble
            realization "realization" are supported as options.
        shape_parameters ():
            The shape parameters required for defining the distribution
            specified by the distribution argument. The shape parameters
            should either be a number or 'inf' or '-inf' to represent
            infinity. Further details about appropriate shape parameters
            are available in scipy.stats. For the truncated normal
            distribution with a lower bound of zero, as available when
            estimating EMOS coefficients, the appropriate shape parameters
            are 0 and inf.

    Returns:
        iris.cube.Cube:
            The calibrated forecast cube.

    Raises:
        ValueError:
            If the current forecast is a coefficients cube.
        ValueError:
            If the coefficients cube does not have the right name of
            "emos_coefficients".
        ValueError:
            If the forecast type is 'percentiles' or 'probabilities' while no
            realizations_count are given.

    """
    import warnings

    import numpy as np
    from iris.exceptions import CoordinateNotFoundError

    from improver.ensemble_calibration.ensemble_calibration import (
        ApplyCoefficientsFromEnsembleCalibration)
    from improver.ensemble_copula_coupling.ensemble_copula_coupling import (
        EnsembleReordering, GeneratePercentilesFromMeanAndVariance,
        GeneratePercentilesFromProbabilities,
        GenerateProbabilitiesFromMeanAndVariance,
        RebadgePercentilesAsRealizations, ResamplePercentiles)
    from improver.metadata.probabilistic import find_percentile_coordinate

    current_forecast = cube

    if coefficients is None:
        msg = ("There are no coefficients provided for calibration. The "
               "uncalibrated forecast will be returned.")
        warnings.warn(msg)
        return current_forecast

    elif coefficients.name() != 'emos_coefficients':
        msg = ("The current coefficients cube does not have the "
               "name 'emos_coefficients'")
        raise ValueError(msg)

    if current_forecast.name() == 'emos_coefficients':
        msg = "The current forecast cube has the name 'emos_coefficients'"
        raise ValueError(msg)

    original_current_forecast = current_forecast.copy()
    try:
        find_percentile_coordinate(current_forecast)
        input_forecast_type = "percentiles"
    except CoordinateNotFoundError:
        input_forecast_type = "realizations"

    if current_forecast.name().startswith("probability_of"):
        input_forecast_type = "probabilities"
        # If probabilities, convert to percentiles.
        conversion_plugin = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=ignore_ecc_bounds)
    elif input_forecast_type == "percentiles":
        # If percentiles, resample percentiles so that the percentiles are
        # evenly spaced.
        conversion_plugin = ResamplePercentiles(
            ecc_bounds_warning=ignore_ecc_bounds)

    # If percentiles, re-sample percentiles and then re-badge.
    # If probabilities, generate percentiles and then re-badge.
    if input_forecast_type in ["percentiles", "probabilities"]:
        if not realizations_count:
            raise ValueError(
                "The current forecast has been provided as {0}. "
                "These {0} need to be converted to realizations "
                "for ensemble calibration. The realizations_count "
                "argument is used to define the number of realizations "
                "to construct from the input {0}, so if the "
                "current forecast is provided as {0} then "
                "realizations_count must be defined.".format(
                    input_forecast_type))
        current_forecast = conversion_plugin.process(
            current_forecast, no_of_percentiles=realizations_count)
        current_forecast = (
            RebadgePercentilesAsRealizations().process(current_forecast))

    # Default number of ensemble realizations is the number in
    # the raw forecast.
    if not realizations_count:
        realizations_count = len(current_forecast.coord('realization').points)

    # Apply coefficients as part of Ensemble Model Output Statistics (EMOS).
    ac = ApplyCoefficientsFromEnsembleCalibration(predictor=predictor_of_mean)
    calibrated_predictor, calibrated_variance = ac.process(
        current_forecast, coefficients, landsea_mask=land_sea_mask)

    if shape_parameters:
        shape_parameters = [np.float32(x) for x in shape_parameters]

    # If input forecast is probabilities, convert output into probabilities.
    # If input forecast is percentiles, convert output into percentiles.
    # If input forecast is realizations, convert output into realizations.
    if input_forecast_type == "probabilities":
        result = GenerateProbabilitiesFromMeanAndVariance(
            distribution=distribution,
            shape_parameters=shape_parameters).process(
                calibrated_predictor, calibrated_variance,
                original_current_forecast)
    elif input_forecast_type == "percentiles":
        perc_coord = find_percentile_coordinate(original_current_forecast)
        result = GeneratePercentilesFromMeanAndVariance(
            distribution=distribution,
            shape_parameters=shape_parameters).process(
                calibrated_predictor,
                calibrated_variance,
                original_current_forecast,
                percentiles=perc_coord.points)
    elif input_forecast_type == "realizations":
        # Ensemble Copula Coupling to generate realizations
        # from mean and variance.
        percentiles = GeneratePercentilesFromMeanAndVariance(
            distribution=distribution,
            shape_parameters=shape_parameters).process(
                calibrated_predictor,
                calibrated_variance,
                original_current_forecast,
                no_of_percentiles=realizations_count)
        result = EnsembleReordering().process(percentiles,
                                              current_forecast,
                                              random_ordering=randomise,
                                              random_seed=random_seed)
    return result