def test_valid_single_coord_string_for_time(self): """Test that the plugin handles time being the collapse_coord that is passed in as a string.""" data = [[list(range(1, 12, 1))] * 11] * 3 data = np.array(data).astype(np.float32) data.resize((3, 11, 11)) new_cube = set_up_variable_cube( data, time=datetime(2017, 11, 11, 4, 0), frt=datetime(2017, 11, 11, 0, 0), realizations=[0, 1, 2], ) cube = iris.cube.CubeList([self.cube, new_cube]).merge_cube() collapse_coord = "time" plugin = PercentileConverter(collapse_coord) result = plugin.process(cube) # Check percentile values. self.assertArrayAlmostEqual(result.data[:, 0, 0, 0], self.default_percentiles * 0.01) # Check coordinate name. self.assertEqual(result.coords()[0].name(), "percentile") # Check coordinate units. self.assertEqual(result.coords()[0].units, "%") # Check coordinate points. self.assertArrayEqual( result.coord("percentile").points, [0, 5, 10, 20, 25, 30, 40, 50, 60, 70, 75, 80, 90, 95, 100], ) # Check resulting data shape. self.assertEqual(result.data.shape, (15, 3, 11, 11))
def test_use_with_masked_data(self): """Test that the plugin handles masked data, this requiring the option fast_percentile_method=False.""" mask = np.zeros((3, 11, 11)) mask[:, :, 1:-1:2] = 1 masked_data = np.ma.array(self.cube.data, mask=mask) cube = self.cube.copy(data=masked_data) collapse_coord = "longitude" plugin = PercentileConverter(collapse_coord, fast_percentile_method=False) result = plugin.process(cube) # Check percentile values. self.assertArrayAlmostEqual(result.data[:, 0, 0], self.default_percentiles * 0.1) # Check coordinate name. self.assertEqual(result.coords()[0].name(), "percentile") # Check coordinate units. self.assertEqual(result.coords()[0].units, "%") # Check coordinate points. self.assertArrayEqual( result.coord("percentile").points, [0, 5, 10, 20, 25, 30, 40, 50, 60, 70, 75, 80, 90, 95, 100], ) # Check resulting data shape. self.assertEqual(result.data.shape, (15, 3, 11))
def test_valid_single_coord_string(self): """ Test that the plugin handles a valid collapse_coord passed in as a string. """ collapse_coord = 'longitude' plugin = PercentileConverter(collapse_coord) result = plugin.process(self.cube) # Check percentile values. self.assertArrayAlmostEqual(result.data[:, 0, 0, 0], self.default_percentiles * 0.1) # Check coordinate name. self.assertEqual(result.coords()[0].name(), 'percentile_over_longitude') # Check coordinate points. self.assertArrayEqual( result.coord('percentile_over_longitude').points, [0, 5, 10, 20, 25, 30, 40, 50, 60, 70, 75, 80, 90, 95, 100]) # Check resulting data shape. self.assertEqual(result.data.shape, (15, 3, 1, 11)) # Check demoted longitude coordinate exists as scalar with bounds. self.assertArrayEqual( result.coord('longitude').bounds, [[-180., 180.]])
def test_valid_multi_coord_string_list(self): """ Test that the plugin handles a valid list of collapse_coords passed in as a list of strings. """ collapse_coord = ['longitude', 'latitude'] plugin = PercentileConverter(collapse_coord) result = plugin.process(self.cube) # Check percentile values. self.assertArrayAlmostEqual( result.data[:, 0, 0], [0., 0., 1., 2., 2., 3., 4., 5., 6., 7., 8., 8., 9., 10., 10.]) # Check coordinate name. self.assertEqual(result.coords()[0].name(), 'percentile_over_latitude_longitude') # Check coordinate points. self.assertArrayEqual( result.coord('percentile_over_latitude_longitude').points, [0, 5, 10, 20, 25, 30, 40, 50, 60, 70, 75, 80, 90, 95, 100]) # Check resulting data shape. self.assertEqual(result.data.shape, (15, 3, 1)) # Check demoted dimension coordinates exists as scalars with bounds. self.assertArrayEqual( result.coord('longitude').bounds, [[-180., 180.]]) self.assertArrayEqual(result.coord('latitude').bounds, [[-90., 90.]])
def test_unavailable_collapse_coord(self): """Test that the plugin handles a collapse_coord that is not available in the cube.""" collapse_coord = "not_a_coordinate" plugin = PercentileConverter(collapse_coord) msg = "Coordinate " with self.assertRaisesRegex(CoordinateNotFoundError, msg): plugin.process(self.cube)
def test_single_percentile(self): """Test dimensions of output at median only""" collapse_coord = ["realization"] plugin = PercentileConverter(collapse_coord, percentiles=[50]) result = plugin.process(self.cube) result_coords = get_coord_names(result) self.assertNotIn("realization", result_coords) self.assertIn("percentile", result_coords) self.assertNotIn("percentile", get_dim_coord_names(result))
def test_invalid_collapse_coord_type(self): """Test that the plugin handles invalid collapse_coord type.""" collapse_coord = self.cube msg = "collapse_coord is " with self.assertRaisesRegex(TypeError, msg): PercentileConverter(collapse_coord)
def test_valid_multi_coord_string_list(self): """Test that the plugin handles a valid list of collapse_coords passed in as a list of strings.""" collapse_coord = ["longitude", "latitude"] plugin = PercentileConverter(collapse_coord) result = plugin.process(self.cube) # Check percentile values. self.assertArrayAlmostEqual( result.data[:, 0], [ 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 8.0, 9.0, 10.0, 10.0, ], ) # Check coordinate name. self.assertEqual(result.coords()[0].name(), "percentile") # Check coordinate units. self.assertEqual(result.coords()[0].units, "%") # Check coordinate points. self.assertArrayEqual( result.coord("percentile").points, [0, 5, 10, 20, 25, 30, 40, 50, 60, 70, 75, 80, 90, 95, 100], ) # Check resulting data shape. self.assertEqual(result.data.shape, (15, 3))
def test_valid_single_coord_string(self): """Test that the plugin handles a valid collapse_coord passed in as a string.""" collapse_coord = "longitude" plugin = PercentileConverter(collapse_coord) result = plugin.process(self.cube) # Check percentile values. self.assertArrayAlmostEqual(result.data[:, 0, 0], self.default_percentiles * 0.1) # Check coordinate name. self.assertEqual(result.coords()[0].name(), "percentile") # Check coordinate units. self.assertEqual(result.coords()[0].units, "%") # Check coordinate points. self.assertArrayEqual( result.coord("percentile").points, [0, 5, 10, 20, 25, 30, 40, 50, 60, 70, 75, 80, 90, 95, 100], ) # Check resulting data shape. self.assertEqual(result.data.shape, (15, 3, 11))
def process(cube, coordinates=None, ecc_bounds_warning=False, percentiles=None, no_of_percentiles=None): r"""Collapses cube coordinates and calculate percentiled data. Calculate percentiled data over a given coordinate by collapsing that coordinate. Typically used to convert realization data into percentiled data, but may calculate over any dimension coordinate. Alternatively calling this with a dataset containing probabilities will convert those to percentiles using the ensemble coupla coupling plugin. If no particular percentiles are given at which to calculate values and no 'number of percentiles' to calculate are specified, the following defaults will be used. '[0, 5, 10, 20, 25, 30, 40, 50, 60, 70, 75, 80, 90, 95, 100]' Args: cube (iris.cube.Cube): A Cube for processing. coordinates (str or list): Coordinate or coordinates over which to collapse data and calculate percentiles; e.g. 'realization' or 'latitude longitude'. This argument must be provided when collapsing a coordinate or coordinates to create percentiles, but is redundant when converting probabilities to percentiles and may be omitted. This coordinate(s) will be removed and replaced by a percentile coordinate. Default is None. ecc_bounds_warning (bool): If True, where calculated percentiles are outside the ECC bounds range, raises a warning rather than an exception. Default is False. percentiles (list or None): Optional definition of percentiles at which to calculate data. Default is None. no_of_percentiles (int): Optional definition of the number of percentiles to be generated, these distributed regularly with the aim of dividing into blocks of equal probability. Default is None. Returns: result (iris.cube.Cube): The processed Cube. Raises: ValueError: If the cube name does not contain 'probability_of\_' and coordinates isn't used. Warns: Warning: If 'probability_of\_' is in the cube name and coordinates is used. """ if no_of_percentiles is not None: percentiles = choose_set_of_percentiles(no_of_percentiles, sampling="quantile") # TODO: Correct when formal cf-standards exists if 'probability_of_' in cube.name(): result = GeneratePercentilesFromProbabilities( ecc_bounds_warning=ecc_bounds_warning).process( cube, percentiles=percentiles) if coordinates: warnings.warn("Converting probabilities to percentiles. The " "provided COORDINATES_TO_COLLAPSE variable will " "not be used.") else: if not coordinates: raise ValueError("To collapse a coordinate to calculate " "percentiles, a coordinate or list of " "coordinates must be provided.") # Switch back to use the slow scipy method if the cube contains masked # data which the numpy method cannot handle. fast_percentile_method = True if np.ma.is_masked(cube.data): # Check for masked points: fast_percentile_method = False elif np.ma.isMaskedArray(cube.data): # Check if we have a masked array with an empty mask. If so, # replace it with a non-masked array: cube.data = cube.data.data result = PercentileConverter( coordinates, percentiles=percentiles, fast_percentile_method=fast_percentile_method).process(cube) return result
def process( neighbour_cube: cli.inputcube, cube: cli.inputcube, lapse_rate: cli.inputcube = None, *, apply_lapse_rate_correction=False, land_constraint=False, similar_altitude=False, extract_percentiles: cli.comma_separated_list = None, ignore_ecc_bounds=False, new_title: str = None, suppress_warnings=False, ): """Module to run spot data extraction. Extract diagnostic data from gridded fields for spot data sites. It is possible to apply a temperature lapse rate adjustment to temperature data that helps to account for differences between the spot site's real altitude and that of the grid point from which the temperature data is extracted. Args: neighbour_cube (iris.cube.Cube): Cube of spot-data neighbours and the spot site information. cube (iris.cube.Cube): Cube containing the diagnostic data to be extracted. lapse_rate (iris.cube.Cube): Optional cube containing temperature lapse rates. If this cube is provided and a screen temperature cube is being processed, the lapse rates will be used to adjust the temperature to better represent each spot's site-altitude. apply_lapse_rate_correction (bool): Use to apply a lapse-rate correction to screen temperature data so that the data are a better match the altitude of the spot site for which they have been extracted. land_constraint (bool): Use to select the nearest-with-land-constraint neighbour-selection method from the neighbour_cube. This means that the grid points should be land points except for sites where none were found within the search radius when the neighbour cube was created. May be used with similar_altitude. similar_altitude (bool): Use to select the nearest-with-height-constraint neighbour-selection method from the neighbour_cube. These are grid points that were found to be the closest in altitude to the spot site within the search radius defined when the neighbour cube was created. May be used with land_constraint. extract_percentiles (list or int): If set to a percentile value or a list of percentile values, data corresponding to those percentiles will be returned. For example "25, 50, 75" will result in the 25th, 50th and 75th percentiles being returned from a cube of probabilities, percentiles or realizations. Deterministic input data will raise a warning message. Note that for percentiles inputs, the desired percentile(s) must exist in the input cube. ignore_ecc_bounds (bool): Demotes exceptions where calculated percentiles are outside the ECC bounds range to warnings. new_title (str): New title for the spot-extracted data. If None, this attribute is removed from the output cube since it has no prescribed standard and may therefore contain grid information that is no longer correct after spot-extraction. suppress_warnings (bool): Suppress warning output. This option should only be used if it is known that warnings will be generated but they are not required. Returns: iris.cube.Cube: Cube of spot data. Raises: ValueError: If the percentile diagnostic cube does not contain the requested percentile value. ValueError: If the lapse rate cube was provided but the diagnostic being processed is not air temperature. ValueError: If the lapse rate cube provided does not have the name "air_temperature_lapse_rate" ValueError: If the lapse rate cube does not contain a single valued height coordinate. Warns: warning: If diagnostic cube is not a known probabilistic type. warning: If a lapse rate cube was provided, but the height of the temperature does not match that of the data used. warning: If a lapse rate cube was not provided, but the option to apply the lapse rate correction was enabled. """ import warnings import iris import numpy as np from iris.exceptions import CoordinateNotFoundError from improver.ensemble_copula_coupling.ensemble_copula_coupling import ( ConvertProbabilitiesToPercentiles, ) from improver.metadata.probabilistic import find_percentile_coordinate from improver.percentile import PercentileConverter from improver.spotdata.apply_lapse_rate import SpotLapseRateAdjust from improver.spotdata.neighbour_finding import NeighbourSelection from improver.spotdata.spot_extraction import SpotExtraction from improver.utilities.cube_extraction import extract_subcube neighbour_selection_method = NeighbourSelection( land_constraint=land_constraint, minimum_dz=similar_altitude).neighbour_finding_method_name() result = SpotExtraction( neighbour_selection_method=neighbour_selection_method)( neighbour_cube, cube, new_title=new_title) # If a probability or percentile diagnostic cube is provided, extract # the given percentile if available. This is done after the spot-extraction # to minimise processing time; usually there are far fewer spot sites than # grid points. if extract_percentiles: extract_percentiles = [np.float32(x) for x in extract_percentiles] try: perc_coordinate = find_percentile_coordinate(result) except CoordinateNotFoundError: if "probability_of_" in result.name(): result = ConvertProbabilitiesToPercentiles( ecc_bounds_warning=ignore_ecc_bounds)( result, percentiles=extract_percentiles) result = iris.util.squeeze(result) elif result.coords("realization", dim_coords=True): fast_percentile_method = not np.ma.isMaskedArray(result.data) result = PercentileConverter( "realization", percentiles=extract_percentiles, fast_percentile_method=fast_percentile_method, )(result) else: msg = ("Diagnostic cube is not a known probabilistic type. " "The {} percentile could not be extracted. Extracting " "data from the cube including any leading " "dimensions.".format(extract_percentiles)) if not suppress_warnings: warnings.warn(msg) else: constraint = [ "{}={}".format(perc_coordinate.name(), extract_percentiles) ] perc_result = extract_subcube(result, constraint) if perc_result is not None: result = perc_result else: msg = ("The percentile diagnostic cube does not contain the " "requested percentile value. Requested {}, available " "{}".format(extract_percentiles, perc_coordinate.points)) raise ValueError(msg) # Check whether a lapse rate cube has been provided and we are dealing with # temperature data and the lapse-rate option is enabled. if apply_lapse_rate_correction and lapse_rate: if not result.name() == "air_temperature": msg = ("A lapse rate cube was provided, but the diagnostic being " "processed is not air temperature and cannot be adjusted.") raise ValueError(msg) if not lapse_rate.name() == "air_temperature_lapse_rate": msg = ("A cube has been provided as a lapse rate cube but does " "not have the expected name air_temperature_lapse_rate: " "{}".format(lapse_rate.name())) raise ValueError(msg) try: lapse_rate_height_coord = lapse_rate.coord("height") except (ValueError, CoordinateNotFoundError): msg = ("Lapse rate cube does not contain a single valued height " "coordinate. This is required to ensure it is applied to " "equivalent temperature data.") raise ValueError(msg) # Check the height of the temperature data matches that used to # calculate the lapse rates. If so, adjust temperatures using the lapse # rate values. if cube.coord("height") == lapse_rate_height_coord: plugin = SpotLapseRateAdjust( neighbour_selection_method=neighbour_selection_method) result = plugin(result, neighbour_cube, lapse_rate) elif not suppress_warnings: warnings.warn( "A lapse rate cube was provided, but the height of the " "temperature data does not match that of the data used " "to calculate the lapse rates. As such the temperatures " "were not adjusted with the lapse rates.") elif apply_lapse_rate_correction and not lapse_rate: if not suppress_warnings: warnings.warn( "A lapse rate cube was not provided, but the option to " "apply the lapse rate correction was enabled. No lapse rate " "correction could be applied.") # Remove the internal model_grid_hash attribute if present. result.attributes.pop("model_grid_hash", None) return result
def process( cube: cli.inputcube, *, coordinates: cli.comma_separated_list = None, percentiles: cli.comma_separated_list = None, ignore_ecc_bounds=False, ): r"""Collapses cube coordinates and calculate percentiled data. Calculate percentiled data over a given coordinate by collapsing that coordinate. Typically used to convert realization data into percentiled data, but may calculate over any dimension coordinate. Alternatively calling this with a dataset containing probabilities will convert those to percentiles using the ensemble coupla coupling plugin. If no particular percentiles are given at which to calculate values and no 'number of percentiles' to calculate are specified, the following defaults will be used. '[0, 5, 10, 20, 25, 30, 40, 50, 60, 70, 75, 80, 90, 95, 100]' Args: cube (iris.cube.Cube): A Cube for processing. coordinates (str or list): Coordinate or coordinates over which to collapse data and calculate percentiles; e.g. 'realization' or 'latitude,longitude'. This argument must be provided when collapsing a coordinate or coordinates to create percentiles, but is redundant when converting probabilities to percentiles and may be omitted. This coordinate(s) will be removed and replaced by a percentile coordinate. percentiles (list): Optional definition of percentiles at which to calculate data. ignore_ecc_bounds (bool): If True, where calculated percentiles are outside the ECC bounds range, raises a warning rather than an exception. Returns: iris.cube.Cube: The processed Cube. Raises: ValueError: If the cube name does not contain 'probability_of\_' and coordinates isn't used. Warns: Warning: If 'probability_of\_' is in the cube name and coordinates is used. """ import warnings import numpy as np from improver.ensemble_copula_coupling.ensemble_copula_coupling import ( ConvertProbabilitiesToPercentiles, ) from improver.metadata.probabilistic import is_probability from improver.percentile import PercentileConverter if percentiles is not None: percentiles = [float(p) for p in percentiles] if is_probability(cube): result = ConvertProbabilitiesToPercentiles( ecc_bounds_warning=ignore_ecc_bounds)(cube, percentiles=percentiles) if coordinates: warnings.warn("Converting probabilities to percentiles. The " "provided COORDINATES_TO_COLLAPSE variable will " "not be used.") else: if not coordinates: raise ValueError("To collapse a coordinate to calculate " "percentiles, a coordinate or list of " "coordinates must be provided.") # Switch back to use the slow scipy method if the cube contains masked # data which the numpy method cannot handle. fast_percentile_method = True if np.ma.is_masked(cube.data): # Check for masked points: fast_percentile_method = False elif np.ma.isMaskedArray(cube.data): # Check if we have a masked array with an empty mask. If so, # replace it with a non-masked array: cube.data = cube.data.data result = PercentileConverter( coordinates, percentiles=percentiles, fast_percentile_method=fast_percentile_method, )(cube) return result
def main(argv=None): """Load in arguments and get going.""" parser = ArgParser( description="Calculate percentiled data over a given coordinate by " "collapsing that coordinate. Typically used to convert realization " "data into percentiled data, but may calculate over any " "dimension coordinate. Alternatively, calling this CLI with a dataset" " containing probabilities will convert those to percentiles using " "the ensemble copula coupling plugin. If no particular percentiles " "are given at which to calculate values and no 'number of percentiles'" " to calculate are specified, the following defaults will be used: " "[0, 5, 10, 20, 25, 30, 40, 50, 60, 70, 75, 80, 90, 95, 100]") parser.add_argument("input_filepath", metavar="INPUT_FILE", help="A path to an input NetCDF file to be processed") parser.add_argument("output_filepath", metavar="OUTPUT_FILE", help="The output path for the processed NetCDF") parser.add_argument("--coordinates", metavar="COORDINATES_TO_COLLAPSE", nargs="+", help="Coordinate or coordinates over which to collapse" " data and calculate percentiles; e.g. " "'realization' or 'latitude longitude'. This argument " "must be provided when collapsing a coordinate or " "coordinates to create percentiles, but is redundant " "when converting probabilities to percentiles and may " "be omitted. This coordinate(s) will be removed " "and replaced by a percentile coordinate.") parser.add_argument('--ecc_bounds_warning', default=False, action='store_true', help='If True, where calculated percentiles are ' 'outside the ECC bounds range, raise a warning ' 'rather than an exception.') group = parser.add_mutually_exclusive_group(required=False) group.add_argument("--percentiles", metavar="PERCENTILES", nargs="+", default=None, type=float, help="Optional definition of percentiles at which to " "calculate data, e.g. --percentiles 0 33.3 66.6 100") group.add_argument('--no-of-percentiles', default=None, type=int, metavar='NUMBER_OF_PERCENTILES', help="Optional definition of the number of percentiles " "to be generated, these distributed regularly with the " "aim of dividing into blocks of equal probability.") args = parser.parse_args(args=argv) cube = load_cube(args.input_filepath) percentiles = args.percentiles if args.no_of_percentiles is not None: percentiles = choose_set_of_percentiles(args.no_of_percentiles, sampling="quantile") # TODO: Correct when formal cf-standards exists if 'probability_of_' in cube.name(): if args.coordinates: warnings.warn("Converting probabilities to percentiles. The " "provided COORDINATES_TO_COLLAPSE variable will " "not be used.") result = GeneratePercentilesFromProbabilities( ecc_bounds_warning=args.ecc_bounds_warning).process( cube, percentiles=percentiles) else: if not args.coordinates: raise ValueError("To collapse a coordinate to calculate " "percentiles, a coordinate or list of " "coordinates must be provided.") # Switch back to use the slow scipy method if the cube contains masked # data which the numpy method cannot handle. fast_percentile_method = True if np.ma.is_masked(cube.data): # Check for masked points: fast_percentile_method = False elif np.ma.isMaskedArray(cube.data): # Check if we have a masked array with an empty mask. If so, # replace it with a non-masked array: cube.data = cube.data.data result = PercentileConverter( args.coordinates, percentiles=percentiles, fast_percentile_method=fast_percentile_method).process(cube) save_netcdf(result, args.output_filepath)
def main(argv=None): """Load in arguments and start spotdata extraction process.""" parser = ArgParser( description="Extract diagnostic data from gridded fields for spot data" " sites. It is possible to apply a temperature lapse rate adjustment" " to temperature data that helps to account for differences between" " the spot sites real altitude and that of the grid point from which" " the temperature data is extracted.") # Input and output files required. parser.add_argument("neighbour_filepath", metavar="NEIGHBOUR_FILEPATH", help="Path to a NetCDF file of spot-data neighbours. " "This file also contains the spot site information.") parser.add_argument("diagnostic_filepath", metavar="DIAGNOSTIC_FILEPATH", help="Path to a NetCDF file containing the diagnostic " "data to be extracted.") parser.add_argument("temperature_lapse_rate_filepath", metavar="LAPSE_RATE_FILEPATH", nargs='?', help="(Optional) Filepath to a NetCDF file containing" " temperature lapse rates. If this cube is provided," " and a screen temperature cube is being processed," " the lapse rates will be used to adjust the" " temperatures to better represent each spot's" " site-altitude.") parser.add_argument("output_filepath", metavar="OUTPUT_FILEPATH", help="The output path for the resulting NetCDF") parser.add_argument( "--apply_lapse_rate_correction", default=False, action="store_true", help="If the option is set and a lapse rate cube has been " "provided, extracted screen temperatures will be adjusted to " "better match the altitude of the spot site for which they have " "been extracted.") method_group = parser.add_argument_group( title="Neighbour finding method", description="If none of these options are set, the nearest grid point " "to a spot site will be used without any other constraints.") method_group.add_argument( "--land_constraint", default=False, action='store_true', help="If set the neighbour cube will be interrogated for grid point" " neighbours that were identified using a land constraint. This means" " that the grid points should be land points except for sites where" " none were found within the search radius when the neighbour cube was" " created. May be used with minimum_dz.") method_group.add_argument( "--minimum_dz", default=False, action='store_true', help="If set the neighbour cube will be interrogated for grid point" " neighbours that were identified using a minimum height difference" " constraint. These are grid points that were found to be the closest" " in altitude to the spot site within the search radius defined when" " the neighbour cube was created. May be used with land_constraint.") percentile_group = parser.add_argument_group( title="Extract percentiles", description="Extract particular percentiles from probabilistic, " "percentile, or realization inputs. If deterministic input is " "provided a warning is raised and all leading dimensions are included " "in the returned spot-data cube.") percentile_group.add_argument( "--extract_percentiles", default=None, nargs='+', type=int, help="If set to a percentile value or a list of percentile values, " "data corresponding to those percentiles will be returned. For " "example setting '--extract_percentiles 25 50 75' will result in the " "25th, 50th, and 75th percentiles being returned from a cube of " "probabilities, percentiles, or realizations. Note that for " "percentile inputs, the desired percentile(s) must exist in the input " "cube.") parser.add_argument( "--ecc_bounds_warning", default=False, action="store_true", help="If True, where calculated percentiles are outside the ECC " "bounds range, raise a warning rather than an exception.") meta_group = parser.add_argument_group("Metadata") meta_group.add_argument( "--metadata_json", metavar="METADATA_JSON", default=None, help="If provided, this JSON file can be used to modify the metadata " "of the returned netCDF file. Defaults to None.") output_group = parser.add_argument_group("Suppress Verbose output") # This CLI may be used to prepare data for verification without knowing the # form of the input, be it deterministic, realizations or probabilistic. # A warning is normally raised when attempting to extract a percentile from # deterministic data as this is not possible; the spot-extraction of the # entire cube is returned. When preparing data for verification we know # that we will produce a large number of these warnings when passing in # deterministic data. This option to suppress warnings is provided to # reduce the amount of unneeded logging information that is written out. output_group.add_argument( "--suppress_warnings", default=False, action="store_true", help="Suppress warning output. This option should only be used if " "it is known that warnings will be generated but they are not " "required.") args = parser.parse_args(args=argv) neighbour_cube = load_cube(args.neighbour_filepath) diagnostic_cube = load_cube(args.diagnostic_filepath) neighbour_selection_method = NeighbourSelection( land_constraint=args.land_constraint, minimum_dz=args.minimum_dz).neighbour_finding_method_name() plugin = SpotExtraction( neighbour_selection_method=neighbour_selection_method) result = plugin.process(neighbour_cube, diagnostic_cube) # If a probability or percentile diagnostic cube is provided, extract # the given percentile if available. This is done after the spot-extraction # to minimise processing time; usually there are far fewer spot sites than # grid points. if args.extract_percentiles: try: perc_coordinate = find_percentile_coordinate(result) except CoordinateNotFoundError: if 'probability_of_' in result.name(): result = GeneratePercentilesFromProbabilities( ecc_bounds_warning=args.ecc_bounds_warning).process( result, percentiles=args.extract_percentiles) result = iris.util.squeeze(result) elif result.coords('realization', dim_coords=True): fast_percentile_method = ( False if np.ma.isMaskedArray(result.data) else True) result = PercentileConverter( 'realization', percentiles=args.extract_percentiles, fast_percentile_method=fast_percentile_method).process( result) else: msg = ('Diagnostic cube is not a known probabilistic type. ' 'The {} percentile could not be extracted. Extracting ' 'data from the cube including any leading ' 'dimensions.'.format( args.extract_percentiles)) if not args.suppress_warnings: warnings.warn(msg) else: constraint = ['{}={}'.format(perc_coordinate.name(), args.extract_percentiles)] perc_result = extract_subcube(result, constraint) if perc_result is not None: result = perc_result else: msg = ('The percentile diagnostic cube does not contain the ' 'requested percentile value. Requested {}, available ' '{}'.format(args.extract_percentiles, perc_coordinate.points)) raise ValueError(msg) # Check whether a lapse rate cube has been provided and we are dealing with # temperature data and the lapse-rate option is enabled. if (args.temperature_lapse_rate_filepath and args.apply_lapse_rate_correction): if not result.name() == "air_temperature": msg = ("A lapse rate cube was provided, but the diagnostic being " "processed is not air temperature and cannot be adjusted.") raise ValueError(msg) lapse_rate_cube = load_cube(args.temperature_lapse_rate_filepath) if not lapse_rate_cube.name() == 'air_temperature_lapse_rate': msg = ("A cube has been provided as a lapse rate cube but does " "not have the expected name air_temperature_lapse_rate: " "{}".format(lapse_rate_cube.name())) raise ValueError(msg) try: lapse_rate_height_coord = lapse_rate_cube.coord("height") except (ValueError, CoordinateNotFoundError): msg = ("Lapse rate cube does not contain a single valued height " "coordinate. This is required to ensure it is applied to " "equivalent temperature data.") raise ValueError(msg) # Check the height of the temperature data matches that used to # calculate the lapse rates. If so, adjust temperatures using the lapse # rate values. if diagnostic_cube.coord("height") == lapse_rate_height_coord: plugin = SpotLapseRateAdjust( neighbour_selection_method=neighbour_selection_method) result = plugin.process(result, neighbour_cube, lapse_rate_cube) else: msg = ("A lapse rate cube was provided, but the height of " "the temperature data does not match that of the data used " "to calculate the lapse rates. As such the temperatures " "were not adjusted with the lapse rates.") if not args.suppress_warnings: warnings.warn(msg) elif (args.apply_lapse_rate_correction and not args.temperature_lapse_rate_filepath): msg = ("A lapse rate cube was not provided, but the option to " "apply the lapse rate correction was enabled. No lapse rate " "correction could be applied.") if not args.suppress_warnings: warnings.warn(msg) # Modify final metadata as described by provided JSON file. if args.metadata_json: with open(args.metadata_json, 'r') as input_file: metadata_dict = json.load(input_file) result = amend_metadata(result, **metadata_dict) # Remove the internal model_grid_hash attribute if present. result.attributes.pop('model_grid_hash', None) # Save the spot data cube. save_netcdf(result, args.output_filepath)
def process(neighbour_cube, diagnostic_cube, lapse_rate_cube=None, apply_lapse_rate_correction=False, land_constraint=False, minimum_dz=False, extract_percentiles=None, ecc_bounds_warning=False, metadata_dict=None, suppress_warnings=False): """Module to run spot data extraction. Extract diagnostic data from gridded fields for spot data sites. It is possible to apply a temperature lapse rate adjustment to temperature data that helps to account for differences between the spot site's real altitude and that of the grid point from which the temperature data is extracted. Args: neighbour_cube (iris.cube.Cube): Cube of spot-data neighbours and the spot site information. diagnostic_cube (iris.cube.Cube): Cube containing the diagnostic data to be extracted. lapse_rate_cube (iris.cube.Cube): Cube containing temperature lapse rates. If this cube is provided and a screen temperature cube is being processed, the lapse rates will be used to adjust the temperature to better represent each spot's site-altitude. apply_lapse_rate_correction (bool): If True, and a lapse rate cube has been provided, extracted screen temperature will be adjusted to better match the altitude of the spot site for which they have been extracted. Default is False. land_constraint (bool): If True, the neighbour cube will be interrogated for grid point neighbours that were identified using a land constraint. This means that the grid points should be land points except for sites where none were found within the search radius when the neighbour cube was created. May be used with minimum_dz. Default is False. minimum_dz (bool): If True, the neighbour cube will be interrogated for grid point neighbours that were identified using the minimum height difference constraint. These are grid points that were found to be the closest in altitude to the spot site within the search radius defined when the neighbour cube was created. May be used with land_constraint. Default is False. extract_percentiles (list or int): If set to a percentile value or a list of percentile values, data corresponding to those percentiles will be returned. For example [25, 50, 75] will result in the 25th, 50th and 75th percentiles being returned from a cube of probabilities, percentiles or realizations. Note that for percentiles inputs, the desired percentile(s) must exist in the input cube. Default is None. ecc_bounds_warning (bool): If True, where calculated percentiles are outside the ECC bounds range, raises a warning rather than an exception. Default is False. metadata_dict (dict): If provided, this dictionary can be used to modify the metadata of the returned cube. Default is None. suppress_warnings (bool): Suppress warning output. This option should only be used if it is known that warnings will be generated but they are not required. Default is None. Returns: result (iris.cube.Cube): The processed cube. Raises: ValueError: If the percentile diagnostic cube does not contain the requested percentile value. ValueError: If the lapse rate cube was provided but the diagnostic being processed is not air temperature. ValueError: If the lapse rate cube provided does not have the name "air_temperature_lapse_rate" ValueError: If the lapse rate cube does not contain a single valued height coordinate. Warns: warning: If diagnostic cube is not a known probabilistic type. warning: If a lapse rate cube was provided, but the height of the temperature does not match that of the data used. warning: If a lapse rate cube was not provided, but the option to apply the lapse rate correction was enabled. """ neighbour_selection_method = NeighbourSelection( land_constraint=land_constraint, minimum_dz=minimum_dz).neighbour_finding_method_name() plugin = SpotExtraction( neighbour_selection_method=neighbour_selection_method) result = plugin.process(neighbour_cube, diagnostic_cube) # If a probability or percentile diagnostic cube is provided, extract # the given percentile if available. This is done after the spot-extraction # to minimise processing time; usually there are far fewer spot sites than # grid points. if extract_percentiles is not None: try: perc_coordinate = find_percentile_coordinate(result) except CoordinateNotFoundError: if 'probability_of_' in result.name(): result = GeneratePercentilesFromProbabilities( ecc_bounds_warning=ecc_bounds_warning).process( result, percentiles=extract_percentiles) result = iris.util.squeeze(result) elif result.coords('realization', dim_coords=True): fast_percentile_method = (False if np.ma.isMaskedArray( result.data) else True) result = PercentileConverter( 'realization', percentiles=extract_percentiles, fast_percentile_method=fast_percentile_method).process( result) else: msg = ('Diagnostic cube is not a known probabilistic type. ' 'The {} percentile could not be extracted. Extracting ' 'data from the cube including any leading ' 'dimensions.'.format(extract_percentiles)) if not suppress_warnings: warnings.warn(msg) else: constraint = [ '{}={}'.format(perc_coordinate.name(), extract_percentiles) ] perc_result = extract_subcube(result, constraint) if perc_result is not None: result = perc_result else: msg = ('The percentile diagnostic cube does not contain the ' 'requested percentile value. Requested {}, available ' '{}'.format(extract_percentiles, perc_coordinate.points)) raise ValueError(msg) # Check whether a lapse rate cube has been provided and we are dealing with # temperature data and the lapse-rate option is enabled. if apply_lapse_rate_correction and lapse_rate_cube: if not result.name() == "air_temperature": msg = ("A lapse rate cube was provided, but the diagnostic being " "processed is not air temperature and cannot be adjusted.") raise ValueError(msg) if not lapse_rate_cube.name() == 'air_temperature_lapse_rate': msg = ("A cube has been provided as a lapse rate cube but does " "not have the expected name air_temperature_lapse_rate: " "{}".format(lapse_rate_cube.name())) raise ValueError(msg) try: lapse_rate_height_coord = lapse_rate_cube.coord("height") except (ValueError, CoordinateNotFoundError): msg = ("Lapse rate cube does not contain a single valued height " "coordinate. This is required to ensure it is applied to " "equivalent temperature data.") raise ValueError(msg) # Check the height of the temperature data matches that used to # calculate the lapse rates. If so, adjust temperatures using the lapse # rate values. if diagnostic_cube.coord("height") == lapse_rate_height_coord: plugin = SpotLapseRateAdjust( neighbour_selection_method=neighbour_selection_method) result = plugin.process(result, neighbour_cube, lapse_rate_cube) elif not suppress_warnings: warnings.warn( "A lapse rate cube was provided, but the height of the " "temperature data does not match that of the data used " "to calculate the lapse rates. As such the temperatures " "were not adjusted with the lapse rates.") elif apply_lapse_rate_correction and not lapse_rate_cube: if not suppress_warnings: warnings.warn( "A lapse rate cube was not provided, but the option to " "apply the lapse rate correction was enabled. No lapse rate " "correction could be applied.") # Modify final metadata as described by provided JSON file. if metadata_dict: result = amend_metadata(result, **metadata_dict) # Remove the internal model_grid_hash attribute if present. result.attributes.pop('model_grid_hash', None) return result