def _calculate_shower_condition(self, shape: Tuple) -> ndarray: """Calculate deterministic "precipitation is showery" field""" showery_points = np.ones(shape, dtype=FLOAT_DTYPE) for cube in self.cubes: name = get_diagnostic_cube_name_from_probability_name(cube.name()) slice_constraint = iris.Constraint( coord_values={ name: lambda cell: np.isclose( cell.point, self.tree[name]["diagnostic_threshold"]) }) threshold_slice = cube.extract(slice_constraint) if threshold_slice is None: msg = "Cube {} does not contain required threshold {}" raise ValueError( msg.format(cube.name(), self.tree[name]["diagnostic_threshold"])) prob = self.tree[name]["probability_threshold"] if self.tree[name]["operator"] == "above": condition_met = np.where(threshold_slice.data >= prob, 1, 0) else: condition_met = np.where(threshold_slice.data < prob, 1, 0) showery_points = np.multiply(showery_points, condition_met) return showery_points.astype(FLOAT_DTYPE)
def check_probability_cube_metadata(self, cube: Cube) -> None: """Checks probability-specific metadata""" if cube.units != "1": self.errors.append( f"Expected units of 1 on probability data, got {cube.units}") try: self.diagnostic = get_diagnostic_cube_name_from_probability_name( cube.name()) except ValueError as cause: # if the probability name is not valid self.errors.append(str(cause)) expected_threshold_name = get_threshold_coord_name_from_probability_name( cube.name()) if not cube.coords(expected_threshold_name): msg = f"Cube does not have expected threshold coord '{expected_threshold_name}'; " try: threshold_name = find_threshold_coordinate(cube).name() except CoordinateNotFoundError: coords = [coord.name() for coord in cube.coords()] msg += ( f"no coord with var_name='threshold' found in all coords: {coords}" ) self.errors.append(msg) else: msg += f"threshold coord has incorrect name '{threshold_name}'" self.errors.append(msg) self.check_threshold_coordinate_properties( cube.name(), cube.coord(threshold_name)) else: threshold_coord = cube.coord(expected_threshold_name) self.check_threshold_coordinate_properties(cube.name(), threshold_coord)
def test_in_vicinity(self): """Test the full vicinity name is returned from a vicinity probability field""" diagnostic = "precipitation_rate" result = get_diagnostic_cube_name_from_probability_name( f"probability_of_{diagnostic}_in_vicinity_above_threshold") self.assertEqual(result, f"{diagnostic}_in_vicinity")
def process( self, cube_list, new_diagnostic_name, broadcast_to_threshold=False, ): """ Multiply data from a list of input cubes into a single cube. The first cube in the input list provides the combined cube metadata. Args: cube_list (iris.cube.CubeList or list): List of cubes to combine. new_diagnostic_name (str): New name for the combined diagnostic. This should be the diagnostic name, eg rainfall_rate or rainfall_rate_in_vicinity, rather than the name of the probabilistic output cube. broadcast_to_threshold (bool): True if the first cube has a threshold coordinate to which the following cube(s) need(s) to be broadcast prior to combining data. Returns: iris.cube.Cube: Cube containing the combined data. Raises: ValueError: If the cube_list contains only one cube. TypeError: If combining data results in float64 data. """ if len(cube_list) < 2: msg = "Expecting 2 or more cubes in cube_list" raise ValueError(msg) if broadcast_to_threshold: cube_list = self._setup_coords_for_broadcast(cube_list) self._check_dimensions_match( cube_list, comparators=[eq, self._coords_are_broadcastable]) result = self._combine_cube_data(cube_list) if broadcast_to_threshold: probabilistic_name = cube_list[0].name() diagnostic_name = get_diagnostic_cube_name_from_probability_name( probabilistic_name) # Rename the threshold coordinate to match the name of the diagnostic # that results from the combine operation. new_threshold_name = new_diagnostic_name.replace( "_in_vicinity", "") result.coord(var_name="threshold").rename(new_threshold_name) result.coord(new_threshold_name).var_name = "threshold" new_diagnostic_name = probabilistic_name.replace( diagnostic_name, new_diagnostic_name) result.rename(new_diagnostic_name) return result
def process( self, cube_list: Union[List[Cube], CubeList], new_diagnostic_name: str ) -> Cube: """ Multiply data from a list of input cubes into a single cube. The first cube in the input list provides the combined cube metadata. Args: cube_list: List of cubes to combine. new_diagnostic_name: New name for the combined diagnostic. This should be the diagnostic name, eg rainfall_rate or rainfall_rate_in_vicinity, rather than the name of the probabilistic output cube. Returns: Cube containing the combined data. Raises: ValueError: If the cube_list contains only one cube. TypeError: If combining data results in float64 data. """ if len(cube_list) < 2: msg = "Expecting 2 or more cubes in cube_list" raise ValueError(msg) if self.broadcast_to_threshold: cube_list = self._setup_coords_for_broadcast(cube_list) self._check_dimensions_match( cube_list, comparators=[eq, self._coords_are_broadcastable] ) result = self._combine_cube_data(cube_list) # Used for renaming the threshold coordinate and modifying cell methods # where necessary; excludes the in_vicinity component. new_base_name = new_diagnostic_name.replace("_in_vicinity", "") probabilistic_name = cube_list[0].name() if self.broadcast_to_threshold: diagnostic_name = get_diagnostic_cube_name_from_probability_name( probabilistic_name ) # Rename the threshold coordinate to match the name of the diagnostic # that results from the combine operation. result.coord(var_name="threshold").rename(new_base_name) result.coord(new_base_name).var_name = "threshold" new_diagnostic_name = probabilistic_name.replace( diagnostic_name, new_diagnostic_name ) # Modify cell methods that include the variable name to match the new # name. cell_methods = cube_list[0].cell_methods if cell_methods: result.cell_methods = self._update_cell_methods( cell_methods, probabilistic_name, new_base_name ) result.rename(new_diagnostic_name) return result
def split_forecasts_and_truth(cubes, truth_attribute): """ A common utility for splitting the various inputs cubes required for calibration CLIs. These are generally the forecast cubes, historic truths, and in some instances a land-sea mask is also required. Args: cubes (list): A list of input cubes which will be split into relevant groups. These include the historical forecasts, in the format supported by the calibration CLIs, and the truth cubes. truth_attribute (str): An attribute and its value in the format of "attribute=value", which must be present on truth cubes. Returns: (tuple): tuple containing: **forecast** (iris.cube.Cube): A cube containing all the historic forecasts. **truth** (iris.cube.Cube): A cube containing all the truth data. **land_sea_mask** (iris.cube.Cube or None): If found within the input cubes list a land-sea mask will be returned, else None is returned. Raises: ValueError: An unexpected number of distinct cube names were passed in. IOError: More than one cube was identified as a land-sea mask. IOError: Missing truth or historical forecast in input cubes. """ grouped_cubes = {} for cube in cubes: try: cube_name = get_diagnostic_cube_name_from_probability_name( cube.name()) except ValueError: cube_name = cube.name() grouped_cubes.setdefault(cube_name, []).append(cube) if len(grouped_cubes) == 1: # Only one group - all forecast/truth cubes land_sea_mask = None diag_name = list(grouped_cubes.keys())[0] elif len(grouped_cubes) == 2: # Two groups - the one with exactly one cube matching a name should # be the land_sea_mask, since we require more than 2 cubes in # the forecast/truth group grouped_cubes = OrderedDict( sorted(grouped_cubes.items(), key=lambda kv: len(kv[1]))) # landsea name should be the key with the lowest number of cubes (1) landsea_name, diag_name = list(grouped_cubes.keys()) land_sea_mask = grouped_cubes[landsea_name][0] if len(grouped_cubes[landsea_name]) != 1: raise IOError("Expected one cube for land-sea mask.") else: raise ValueError("Must have cubes with 1 or 2 distinct names.") # split non-land_sea_mask cubes on forecast vs truth truth_key, truth_value = truth_attribute.split("=") input_cubes = grouped_cubes[diag_name] grouped_cubes = {"truth": [], "historical forecast": []} for cube in input_cubes: if cube.attributes.get(truth_key) == truth_value: grouped_cubes["truth"].append(cube) else: grouped_cubes["historical forecast"].append(cube) missing_inputs = " and ".join(k for k, v in grouped_cubes.items() if not v) if missing_inputs: raise IOError(f"Missing {missing_inputs} input.") truth = MergeCubes()(grouped_cubes["truth"]) forecast = MergeCubes()(grouped_cubes["historical forecast"]) return forecast, truth, land_sea_mask
def test_error_not_probability(self): """Test exception if input is not a probability cube name""" with self.assertRaises(ValueError): get_diagnostic_cube_name_from_probability_name( "lwe_precipitation_rate")
def test_basic(self): """Test correct name is returned from a point probability field""" diagnostic = "air_temperature" result = get_diagnostic_cube_name_from_probability_name( f"probability_of_{diagnostic}_above_threshold") self.assertEqual(result, diagnostic)