def process(self, cubes, coordinates=None):
        """
        Aggregate the input reliability calibration table cubes and return the
        result.

        Args:
            cubes (list or iris.cube.CubeList):
                The cube or cubes containing the reliability calibration tables
                to aggregate.
            coordinates (list or None):
                A list of coordinates over which to aggregate the reliability
                calibration table using summation. If the argument is None and
                a single cube is provided, this cube will be returned
                unchanged.
        """
        coordinates = [] if coordinates is None else coordinates

        try:
            (cube, ) = cubes
        except ValueError:
            cubes = iris.cube.CubeList(cubes)
            self._check_frt_coord(cubes)
            cube = cubes.merge_cube()
            coordinates.append("forecast_reference_time")
        else:
            if not coordinates:
                return cube

        result = collapsed(cube, coordinates, iris.analysis.SUM)
        frt = create_unified_frt_coord(cube.coord("forecast_reference_time"))
        result.replace_coord(frt)
        return result
Exemplo n.º 2
0
    def _calculate_location_parameter_from_mean(self, optimised_coeffs):
        """
        Function to calculate the location parameter when the ensemble mean at
        each grid point is the predictor.

        Further information is available in the :mod:`module level docstring \
<improver.calibration.ensemble_calibration>`.

        Args:
            optimised_coeffs (dict):
                A dictionary containing the calibration coefficient names as
                keys with their corresponding values.

        Returns:
            numpy.ndarray:
                Location parameter calculated using the ensemble mean as the
                predictor.

        """
        forecast_predictor = collapsed(self.current_forecast,
                                       "realization",
                                       iris.analysis.MEAN)

        # Calculate location parameter = a + b*X, where X is the
        # raw ensemble mean. In this case, b = beta.
        location_parameter = (
            optimised_coeffs["alpha"] +
            optimised_coeffs["beta"] * forecast_predictor.data).astype(
                np.float32)

        return location_parameter
Exemplo n.º 3
0
    def collapse_mask_coord(self, cube: Cube) -> Cube:
        """
        Collapse the chosen coordinate with the available weights. The result
        of the neighbourhood processing is taken into account to renormalize
        any weights corresponding to a NaN in the result from neighbourhooding.
        In this case the weights are re-normalized so that we do not lose
        probability.

        Args:
            cube:
                Cube containing the array to which the square neighbourhood
                with a mask has been applied.
                Dimensions self.coord_for_masking, y and x.

        Returns:
            Cube containing the weighted mean from neighbourhood after
            collapsing the chosen coordinate.
        """
        # Mask out any NaNs in the neighbourhood data so that Iris ignores
        # them when calculating the weighted mean.
        cube.data = ma.masked_invalid(cube.data, copy=False)
        # Collapse the coord_for_masking. Renormalization of the weights happen
        # within the underlying call to a numpy function within the Iris method.
        result = collapsed(
            cube,
            self.coord_for_masking,
            iris.analysis.MEAN,
            weights=self.collapse_weights.data,
        )
        # Set masked invalid data points back to np.nans
        if np.ma.is_masked(result.data):
            result.data.data[result.data.mask] = np.nan
        # Remove references to self.coord_masked in the result cube.
        result.remove_coord(self.coord_for_masking)
        return result
Exemplo n.º 4
0
    def weighted_mean(self, cube, weights):
        """
        Blend data using a weighted mean using the weights provided.

        Args:
            cube (iris.cube.Cube):
                The cube which is being blended over self.blend_coord.
            weights (iris.cube.Cube or None):
                Cube of blending weights or None.

        Returns:
            iris.cube.Cube:
                The cube with values blended over self.blend_coord, with
                suitable weightings applied.
        """
        weights_array = self.non_percentile_weights(cube, weights)

        # Calculate the weighted average.
        cube_new = collapsed(cube,
                             self.blend_coord,
                             iris.analysis.MEAN,
                             weights=weights_array)
        cube_new.data = cube_new.data.astype(np.float32)

        return cube_new
Exemplo n.º 5
0
    def percentile_weighted_mean(self, cube, weights):
        """
        Blend percentile data using the weights provided.

        Args:
            cube (iris.cube.Cube):
                The cube which is being blended over self.blend_coord. Assumes
                self.blend_coord and percentile are leading coordinates (enforced
                in process).
            weights (iris.cube.Cube):
                Cube of blending weights.
        Returns:
            iris.cube.Cube:
                The cube with percentile values blended over self.blend_coord,
                with suitable weightings applied.
        """
        non_perc_slice = next(cube.slices_over(PERC_COORD))
        weights_array = self.get_weights_array(non_perc_slice, weights)
        weights_array = self._normalise_weights(weights_array)

        # Set up aggregator
        PERCENTILE_BLEND = Aggregator(
            "mean",  # Use CF-compliant cell method.
            PercentileBlendingAggregator.aggregate,
        )

        cube_new = collapsed(
            cube,
            self.blend_coord,
            PERCENTILE_BLEND,
            percentiles=cube.coord(PERC_COORD).points,
            arr_weights=weights_array,
        )

        return cube_new
Exemplo n.º 6
0
 def test_two_methods(self):
     """Test that a cube keeps its original cell method but another
     isn't added.
     """
     cube = self.cube
     method = iris.coords.CellMethod("test")
     cube.add_cell_method(method)
     result = collapsed(cube, "realization", iris.analysis.MEAN)
     self.assertTupleEqual(result.cell_methods, (method,))
     self.assertTrue((result.data == self.expected_data).all())
Exemplo n.º 7
0
 def _calculate_time_average(wind_cubes, time_coord):
     """Average input cubelist over time"""
     cube = wind_cubes.merge_cube()
     try:
         mean = collapsed(cube, "time", iris.analysis.MEAN)
     except CoordinateCollapseError:
         # collapse will fail if there is only one time point
         return cube
     mean.coord("time").points = time_coord.points
     mean.coord("time").units = time_coord.units
     return mean
Exemplo n.º 8
0
 def test_two_coords(self):
     """Test behaviour collapsing over 2 coordinates, including not escalating
     precision when collapsing a float coordinate (latitude)"""
     expected_data = self.cube.collapsed(
         ["realization", "latitude"], iris.analysis.MEAN
     ).data
     result = collapsed(self.cube, ["realization", "latitude"], iris.analysis.MEAN)
     self.assertTrue((result.data == expected_data).all())
     self.assertEqual(
         result.coord("latitude").dtype, self.cube.coord("latitude").dtype
     )
Exemplo n.º 9
0
    def percentile_weighted_mean(self, cube, weights, perc_coord):
        """
        Blend percentile data using the weights provided.

        Args:
            cube (iris.cube.Cube):
                The cube which is being blended over self.blend_coord.
            weights (iris.cube.Cube):
                Cube of blending weights.
            perc_coord (iris.coords.DimCoord):
                The percentile coordinate for this cube.
        Returns:
            iris.cube.Cube:
                The cube with percentile values blended over self.blend_coord,
                with suitable weightings applied.
        """
        percentiles = np.array(perc_coord.points, dtype=np.float32)
        (perc_dim, ) = cube.coord_dims(perc_coord.name())

        # The iris.analysis.Aggregator moves the coordinate being
        # collapsed to index=-1 in initialisation, before the
        # aggregation method is called. This reduces by 1 the index
        # of all coordinates with an initial index higher than the
        # collapsing coordinate. As we need to know the index of
        # the percentile coordinate at a later step, if it will be
        # changed by this process, we adjust our record (perc_dim)
        # here.
        if cube.coord_dims(self.blend_coord)[0] < perc_dim:
            perc_dim -= 1

        weights_array = self.percentile_weights(cube, weights, perc_coord)

        # Set up aggregator
        PERCENTILE_BLEND = Aggregator(
            "mean",  # Use CF-compliant cell method.
            PercentileBlendingAggregator.aggregate,
        )

        cube_new = collapsed(
            cube,
            self.blend_coord,
            PERCENTILE_BLEND,
            arr_percent=percentiles,
            arr_weights=weights_array,
            perc_dim=perc_dim,
        )

        cube_new.data = cube_new.data.astype(np.float32)
        # Ensure collapsed coordinates do not promote themselves
        # to float64.
        for coord in cube_new.coords():
            if coord.points.dtype == np.float64:
                coord.points = coord.points.astype(np.float32)
        return cube_new
Exemplo n.º 10
0
    def process(self, cube):
        """
        Collapse the chosen coordinates with the available weights. The result
        of the neighbourhood processing is taken into account to renormalize
        any weights corresponding to a NaN in the result from neighbourhooding.
        In this case the weights are re-normalized so that we do not lose
        probability.

        Args:
            cube (iris.cube.Cube):
                Cube containing the array to which the square neighbourhood
                with a mask has been applied.

        Returns:
            iris.cube.Cube:
                Cube containing the weighted mean from neighbourhood after
                collapsing the chosen coordinate.

        """
        # Mask out any NaNs in the neighbourhood data so that Iris ignores
        # them when calculating the weighted mean.
        cube.data = ma.masked_invalid(cube.data, copy=False)
        yname = cube.coord(axis="y").name()
        xname = cube.coord(axis="x").name()

        renormalize = True
        if self.weights.shape == cube.shape:
            weights = self.renormalize_weights(cube)
            renormalize = False

        # Loop over any extra dimensions
        cubelist = iris.cube.CubeList([])
        for slice_3d in cube.slices([self.coord_masked, yname, xname]):
            if renormalize:
                weights = self.renormalize_weights(slice_3d)
                renormalize = False
            collapsed_slice = collapsed(slice_3d,
                                        self.coord_masked,
                                        iris.analysis.MEAN,
                                        weights=weights)
            cubelist.append(collapsed_slice)

        result = cubelist.merge_cube()
        # Promote any scalar coordinates with one point back to dimension
        # coordinates if they were dimensions in the input cube.
        # Take a slice over the coordinate we are collapsing as we do not
        # expect this in the output cube.
        first_slice = next(cube.slices_over([self.coord_masked]))
        result = check_cube_coordinates(first_slice, result)
        # Remove references to self.coord_masked in the result cube.
        result.remove_coord(self.coord_masked)
        return result
Exemplo n.º 11
0
    def _group_timezones(self, timezone_mask):
        """
        If the ancillary will be used with data that is not available at hourly
        intervals, the masks can be grouped to match the intervals of the data.
        For example, 3-hourly interval data might group UTC offsets:

            {12: [-12, -11], 9: [-10, -9, -8], 6: [-7, -6, -5], etc.}

        The dictionary specifying the groupings has a key, which provides the
        UTC offset to be used for the group. The list contains the UTC offsets
        that should be grouped together.

        The grouped UTC_offset cubes are collapsed together over the UTC_offset
        coordinate using iris.analysis.MIN. This means all the unmasked (0)
        points in each cube are preserved as the dimension is collapsed,
        enlarging the unmasked region to include all unmasked points from all
        the cubes.

        Args:
            timezone_mask (iris.cube.CubeList):
                A cube list containing a mask cube for each UTC offset that
                has been found necessary.
        Returns:
            iris.cube.CubeList:
                A cube list containing cubes created by blending together
                different UTC offset cubes to create larger masked regions.
        """
        grouped_timezone_masks = iris.cube.CubeList()
        for offset, group in self.groupings.items():

            # If offset key comes from a json file it will be a string
            offset = int(offset)

            constraint = iris.Constraint(
                UTC_offset=lambda cell: group[0] <= cell <= group[-1]
            )
            subset = timezone_mask.extract(constraint)
            if not subset:
                continue
            subset = subset.merge_cube()
            if subset.coord("UTC_offset").shape[0] > 1:
                subset = collapsed(subset, "UTC_offset", iris.analysis.MIN)
                subset.coord("UTC_offset").points = [offset]
            else:
                (point,) = subset.coord("UTC_offset").points
                subset.coord("UTC_offset").points = [offset]
                subset.coord("UTC_offset").bounds = [
                    min(point, offset),
                    max(point, offset),
                ]
            grouped_timezone_masks.append(subset)
        return grouped_timezone_masks
Exemplo n.º 12
0
    def process(self, cube):
        """
        Create a cube containing the percentiles as a new dimension.

        What's generated by default is:
            * 15 percentiles - (0%, 5%, 10%, 20%, 25%, 30%, 40%, 50%, 60%,
              70%, 75%, 80%, 90%, 95%, 100%)

        Args:
            cube (iris.cube.Cube):
                Given the collapse coordinate, convert the set of values
                along that coordinate into a PDF and extract percentiles.

        Returns:
            iris.cube.Cube:
                A single merged cube of all the cubes produced by each
                percentile collapse.

        """
        # Store data type and enforce the same type on return.
        data_type = cube.dtype
        # Test that collapse coords are present in cube before proceeding.
        n_collapse_coords = len(self.collapse_coord)
        n_valid_coords = sum([
            test_coord == coord.name() for coord in cube.coords()
            for test_coord in self.collapse_coord
        ])
        # Rename the percentile coordinate to "percentile" and also
        # makes sure that the associated unit is %.
        if n_valid_coords == n_collapse_coords:
            result = collapsed(
                cube,
                self.collapse_coord,
                iris.analysis.PERCENTILE,
                percent=self.percentiles,
                fast_percentile_method=self.fast_percentile_method,
            )

            result.data = result.data.astype(data_type)
            for coord in self.collapse_coord:
                result.remove_coord(coord)
            percentile_coord = find_percentile_coordinate(result)
            result.coord(percentile_coord).rename("percentile")
            result.coord(percentile_coord).units = "%"
            return result

        raise CoordinateNotFoundError(
            "Coordinate '{}' not found in cube passed to {}.".format(
                self.collapse_coord, self.__class__.__name__))
Exemplo n.º 13
0
    def normalised_masked_weights(weights_cube, blend_coord):
        """
        Normalise spatial weights along dimension associated with the
        blend_coord. If for a given point the sum of the weights along
        the blend_coord is zero then the returned normalised weight for
        that point will also be zero. This correspsonds to the case where
        there is missing data for that point for all slices along the
        blend_coord.

        Args:
            weights_cube (iris.cube.Cube):
                A cube with spatial weights and any other leading dimension.
                This cube must have a coordinate matching the name given by
                blend_coord which corresponds to the dimension along
                which the normalisation is needed.
            blend_coord (str):
                The string that will match to a coordinate in both input cube.
                This coordinate corresponds to the dimension along which the
                normalisation is needed.

        Returns:
            iris.cube.Cube:
                A cube with the same dimensions as the input cube, but with
                the weights normalised along the blend_coord dimension.
                The blend_coord will be the leading dimension on the
                output cube.
        """
        summed_weights = collapsed(weights_cube, blend_coord,
                                   iris.analysis.SUM)

        result = iris.cube.CubeList()
        # Slice over blend_coord so the dimensions match.
        for weight_slice in weights_cube.slices_over(blend_coord):
            # Only divide where the sum of weights are positive. Setting
            # the out keyword args sets the default value for where
            # the sum of the weights are zero.
            normalised_data = np.divide(
                weight_slice.data,
                summed_weights.data,
                out=np.zeros_like(weight_slice.data),
                where=(summed_weights.data > 0),
            )
            result.append(weight_slice.copy(data=normalised_data))
        return result.merge_cube()
Exemplo n.º 14
0
    def weighted_mean(self, cube, weights):
        """
        Blend data using a weighted mean using the weights provided.

        Args:
            cube (iris.cube.Cube):
                The cube which is being blended over self.blend_coord.
            weights (iris.cube.Cube or None):
                Cube of blending weights or None.

        Returns:
            iris.cube.Cube:
                The cube with values blended over self.blend_coord, with
                suitable weightings applied.
        """
        weights_array = self.non_percentile_weights(cube, weights)

        (collapse_dim,) = cube.coord_dims(self.blend_coord)
        if collapse_dim == 0:
            slice_dim = 1
        else:
            slice_dim = 0

        allow_slicing = cube.ndim > 3

        if allow_slicing:
            cube_slices = cube.slices_over(slice_dim)
        else:
            cube_slices = [cube]

        weights_slices = (
            np.moveaxis(weights_array, slice_dim, 0)
            if allow_slicing
            else [weights_array]
        )

        result_slices = iris.cube.CubeList(
            collapsed(c_slice, self.blend_coord, iris.analysis.MEAN, weights=w_slice)
            for c_slice, w_slice in zip(cube_slices, weights_slices)
        )

        result = result_slices.merge_cube() if allow_slicing else result_slices[0]

        return result
Exemplo n.º 15
0
    def weighted_mean(self, cube: Cube, weights: Optional[Cube]) -> Cube:
        """
        Blend data using a weighted mean using the weights provided.

        Args:
            cube:
                The cube which is being blended over self.blend_coord.
                Assumes leading blend dimension (enforced in process)
            weights:
                Cube of blending weights or None.

        Returns:
            The cube with values blended over self.blend_coord, with
            suitable weightings applied.
        """
        weights_array = self.get_weights_array(cube, weights)

        slice_dim = 1
        allow_slicing = cube.ndim > 3

        if allow_slicing:
            cube_slices = cube.slices_over(slice_dim)
        else:
            cube_slices = [cube]

        weights_slices = (
            np.moveaxis(weights_array, slice_dim, 0)
            if allow_slicing
            else [weights_array]
        )

        result_slices = iris.cube.CubeList(
            collapsed(c_slice, self.blend_coord, iris.analysis.MEAN, weights=w_slice)
            for c_slice, w_slice in zip(cube_slices, weights_slices)
        )

        result = result_slices.merge_cube() if allow_slicing else result_slices[0]

        return result
Exemplo n.º 16
0
    def process(self, historic_forecast, truth, landsea_mask=None):
        """
        Using Nonhomogeneous Gaussian Regression/Ensemble Model Output
        Statistics, estimate the required coefficients from historical
        forecasts.

        The main contents of this method is:

        1. Check that the predictor is valid.
        2. Filter the historic forecasts and truth to ensure that these
           inputs match in validity time.
        3. Apply unit conversion to ensure that the historic forecasts and
           truth have the desired units for calibration.
        4. Calculate the variance of the historic forecasts. If the chosen
           predictor is the mean, also calculate the mean of the historic
           forecasts.
        5. If a land-sea mask is provided then mask out sea points in the truth
           and predictor from the historic forecasts.
        6. Calculate initial guess at coefficient values by performing a
           linear regression, if requested, otherwise default values are
           used.
        7. Perform minimisation.

        Args:
            historic_forecast (iris.cube.Cube):
                The cube containing the historical forecasts used
                for calibration.
            truth (iris.cube.Cube):
                The cube containing the truth used for calibration.
            landsea_mask (iris.cube.Cube):
                The optional cube containing a land-sea mask. If provided, only
                land points are used to calculate the coefficients. Within the
                land-sea mask cube land points should be specified as ones,
                and sea points as zeros.

        Returns:
            iris.cube.Cube:
                Cube containing the coefficients estimated using EMOS.
                The cube contains a coefficient_index dimension coordinate
                and a coefficient_name auxiliary coordinate.

        Raises:
            ValueError: If either the historic_forecast or truth cubes were not
                passed in.
            ValueError: If the units of the historic and truth cubes do not
                match.

        """
        if not (historic_forecast and truth):
            raise ValueError("historic_forecast and truth cubes must be "
                             "provided.")

        # Ensure predictor is valid.
        check_predictor(self.predictor)

        historic_forecast, truth = (
            filter_non_matching_cubes(historic_forecast, truth))

        # Make sure inputs have the same units.
        if self.desired_units:
            historic_forecast.convert_units(self.desired_units)
            truth.convert_units(self.desired_units)

        if historic_forecast.units != truth.units:
            msg = ("The historic forecast units of {} do not match "
                   "the truth units {}. These units must match, so that "
                   "the coefficients can be estimated.")
            raise ValueError(msg)

        if self.predictor.lower() == "mean":
            no_of_realizations = None
            forecast_predictor = collapsed(
                historic_forecast, "realization", iris.analysis.MEAN)
        elif self.predictor.lower() == "realizations":
            no_of_realizations = len(
                historic_forecast.coord("realization").points)
            forecast_predictor = historic_forecast

        forecast_var = collapsed(
            historic_forecast, "realization", iris.analysis.VARIANCE)

        # If a landsea_mask is provided mask out the sea points
        if landsea_mask:
            self.mask_cube(forecast_predictor, landsea_mask)
            self.mask_cube(forecast_var, landsea_mask)
            self.mask_cube(truth, landsea_mask)

        # Computing initial guess for EMOS coefficients
        initial_guess = self.compute_initial_guess(
            truth, forecast_predictor, self.predictor,
            self.ESTIMATE_COEFFICIENTS_FROM_LINEAR_MODEL_FLAG,
            no_of_realizations=no_of_realizations)

        # Calculate coefficients if there are no nans in the initial guess.
        if np.any(np.isnan(initial_guess)):
            optimised_coeffs = initial_guess
        else:
            optimised_coeffs = (
                self.minimiser(
                    initial_guess, forecast_predictor,
                    truth, forecast_var,
                    self.predictor,
                    self.distribution.lower()))
        coefficients_cube = (
            self.create_coefficients_cube(optimised_coeffs, historic_forecast))
        return coefficients_cube
Exemplo n.º 17
0
 def test_single_method(self):
     """Test that a collapsed cube is returned with no cell method added"""
     result = collapsed(self.cube, "realization", iris.analysis.MEAN)
     self.assertTupleEqual(result.cell_methods, ())
     self.assertTrue((result.data == self.expected_data).all())