class Test_process_spatial_weights(IrisTest):
    """Test the process method with spatial weights options"""
    def setUp(self):
        """Set up a masked nowcast and unmasked UKV cube"""
        self.cubelist = set_up_masked_cubes()
        self.plugin = WeightAndBlend(
            "model_id",
            "dict",
            weighting_coord="forecast_period",
            wts_dict=MODEL_WEIGHTS,
        )

    @ManageWarnings(ignored_messages=[
        "Collapsing a non-contiguous coordinate",
        "Deleting unmatched attribute",
    ])
    def test_default(self):
        """Test plugin returns a cube with expected values where default fuzzy
        length is less than grid length (no smoothing)"""
        # data is 50:50 where radar is valid, 100% UKV where radar is masked
        expected_data = np.array(
            [
                np.broadcast_to([0.95, 0.95, 0.95, 0.9, 0.9], (5, 5)),
                np.broadcast_to([0.55, 0.55, 0.55, 0.5, 0.5], (5, 5)),
                np.broadcast_to([0.1, 0.1, 0.1, 0.0, 0.0], (5, 5)),
            ],
            dtype=np.float32,
        )
        result = self.plugin.process(
            self.cubelist,
            model_id_attr="mosg__model_configuration",
            spatial_weights=True,
        )
        self.assertIsInstance(result, iris.cube.Cube)
        self.assertArrayAlmostEqual(result.data, expected_data)

    @ManageWarnings(ignored_messages=[
        "Collapsing a non-contiguous coordinate",
        "Deleting unmatched attribute",
    ])
    def test_fuzzy_length(self):
        """Test values where fuzzy length is equal to 2 grid lengths"""
        # proportion of radar data is reduced at edge of valid region; still
        # 100% UKV where radar data is masked
        expected_data = np.array(
            [
                np.broadcast_to([0.95, 0.95, 0.9333333, 0.9, 0.9], (5, 5)),
                np.broadcast_to([0.55, 0.55, 0.5333333, 0.5, 0.5], (5, 5)),
                np.broadcast_to([0.1, 0.1, 0.0666666, 0.0, 0.0], (5, 5)),
            ],
            dtype=np.float32,
        )
        result = self.plugin.process(
            self.cubelist,
            model_id_attr="mosg__model_configuration",
            spatial_weights=True,
            fuzzy_length=400000,
        )
        self.assertArrayAlmostEqual(result.data, expected_data)
Пример #2
0
 def test_blend_with_zero_weight_one_model_valid(self):
     """Test plugin can cope with only one remaining model in the list to blend"""
     plugin = WeightAndBlend(
         "model_id",
         "dict",
         weighting_coord="forecast_period",
         wts_dict=MODEL_WEIGHTS_WITH_ZERO,
     )
     expected_data = self.nowcast_cube.data.copy()
     result = plugin.process(
         [self.ukv_cube, self.nowcast_cube],
         model_id_attr="mosg__model_configuration",
         cycletime=self.cycletime,
     )
     self.assertArrayAlmostEqual(result.data, expected_data)
     self.assertEqual(result.attributes["mosg__model_configuration"],
                      "nc_det")
Пример #3
0
 def test_blend_with_zero_weight_one_model_input(self):
     """Test plugin returns data unchanged from a single model, even if that
     model had zero weight"""
     plugin = WeightAndBlend(
         "model_id",
         "dict",
         weighting_coord="forecast_period",
         wts_dict=MODEL_WEIGHTS_WITH_ZERO,
     )
     expected_data = self.ukv_cube.data.copy()
     result = plugin.process(
         self.ukv_cube,
         model_id_attr="mosg__model_configuration",
         cycletime=self.cycletime,
     )
     self.assertArrayAlmostEqual(result.data, expected_data)
     self.assertEqual(result.attributes["mosg__model_configuration"],
                      "uk_det")
Пример #4
0
 def test_blend_with_zero_weight(self):
     """Test plugin produces correct values and attributes when some models read
     into the plugin have zero weighting"""
     plugin = WeightAndBlend(
         "model_id",
         "dict",
         weighting_coord="forecast_period",
         wts_dict=MODEL_WEIGHTS_WITH_ZERO,
     )
     expected_data = np.array([[[0.85]], [[0.45]], [[0.1]]],
                              dtype=np.float32)
     result = plugin.process(
         [self.ukv_cube, self.enukx_cube, self.nowcast_cube],
         model_id_attr="mosg__model_configuration",
         cycletime=self.cycletime,
     )
     self.assertArrayAlmostEqual(result.data, expected_data)
     self.assertEqual(result.attributes["mosg__model_configuration"],
                      "nc_det uk_ens")
Пример #5
0
 def test_error_blend_coord_absent(self):
     """Test error is raised if blend coord is not present on input cubes"""
     plugin = WeightAndBlend("kittens", "linear", y0val=1, ynval=1)
     msg = "kittens coordinate is not present on all input cubes"
     with self.assertRaisesRegex(ValueError, msg):
         plugin.process([self.ukv_cube, self.ukv_cube_latest])
Пример #6
0
class Test_process(IrisTest):
    """Test the process method"""
    def setUp(self):
        """Set up test cubes (each with a single point and 3 thresholds)"""
        thresholds = np.array([0.5, 1, 2], dtype=np.float32)
        units = "mm h-1"
        name = "lwe_precipitation_rate"
        datatime = dt(2018, 9, 10, 7)
        cycletime = dt(2018, 9, 10, 3)

        # a UKV cube with some rain and a 4 hr forecast period
        rain_data = np.array([[[0.9]], [[0.5]], [[0]]], dtype=np.float32)
        self.ukv_cube = set_up_probability_cube(
            rain_data,
            thresholds,
            variable_name=name,
            threshold_units=units,
            time=datatime,
            frt=cycletime,
            standard_grid_metadata="uk_det")

        # a UKV cube from a more recent cycle with more rain
        more_rain_data = np.array([[[1]], [[0.6]], [[0.2]]], dtype=np.float32)
        self.ukv_cube_latest = set_up_probability_cube(
            more_rain_data,
            thresholds,
            variable_name=name,
            threshold_units=units,
            time=datatime,
            frt=dt(2018, 9, 10, 4),
            standard_grid_metadata="uk_det")

        # a nowcast cube with more rain and a 2 hr forecast period
        self.nowcast_cube = set_up_probability_cube(
            more_rain_data,
            thresholds,
            variable_name=name,
            threshold_units=units,
            time=datatime,
            frt=dt(2018, 9, 10, 5),
            attributes={"mosg__model_configuration": "nc_det"})

        # a MOGREPS-UK cube with less rain and a 4 hr forecast period
        less_rain_data = np.array([[[0.7]], [[0.3]], [[0]]], dtype=np.float32)
        self.enukx_cube = set_up_probability_cube(
            less_rain_data,
            thresholds,
            variable_name=name,
            threshold_units=units,
            time=datatime,
            frt=cycletime,
            standard_grid_metadata="uk_ens")

        self.plugin_cycle = WeightAndBlend("forecast_reference_time",
                                           "linear",
                                           y0val=1,
                                           ynval=1)
        self.plugin_model = WeightAndBlend("model_id",
                                           "dict",
                                           weighting_coord="forecast_period",
                                           wts_dict=MODEL_WEIGHTS)

    @ManageWarnings(
        ignored_messages=["Collapsing a non-contiguous coordinate"])
    def test_basic(self):
        """Test output is a cube"""
        result = self.plugin_cycle.process(
            [self.ukv_cube, self.ukv_cube_latest])
        self.assertIsInstance(result, iris.cube.Cube)

    @ManageWarnings(
        ignored_messages=["Collapsing a non-contiguous coordinate"])
    def test_cycle_blend_linear(self):
        """Test plugin produces correct cycle blended output with equal
        linear weightings"""
        expected_data = np.array([[[0.95]], [[0.55]], [[0.1]]],
                                 dtype=np.float32)
        result = self.plugin_cycle.process(
            [self.ukv_cube, self.ukv_cube_latest])
        self.assertArrayAlmostEqual(result.data, expected_data)
        # make sure output cube has the forecast reference time and period
        # from the most recent contributing cycle
        for coord in ["time", "forecast_reference_time", "forecast_period"]:
            self.assertEqual(result.coord(coord),
                             self.ukv_cube_latest.coord(coord))

    @ManageWarnings(
        ignored_messages=["Collapsing a non-contiguous coordinate"])
    def test_model_blend(self):
        """Test plugin produces correct output for UKV-ENUKX model blend
        with 50-50 weightings defined by dictionary"""
        expected_data = np.array([[[0.8]], [[0.4]], [[0]]], dtype=np.float32)
        result = self.plugin_model.process(
            [self.ukv_cube, self.enukx_cube],
            model_id_attr="mosg__model_configuration")
        self.assertArrayAlmostEqual(result.data, expected_data)
        self.assertEqual(result.attributes["mosg__model_configuration"],
                         "blend")
        result_coords = [coord.name() for coord in result.coords()]
        self.assertNotIn("model_id", result_coords)
        self.assertNotIn("model_configuration", result_coords)

    @ManageWarnings(ignored_messages=[
        "Collapsing a non-contiguous coordinate",
        "Deleting unmatched attribute"
    ])
    def test_blend_three_models(self):
        """Test plugin produces correct output for 3-model blend when all
        models have (equal) non-zero weights. Each model in WEIGHTS_DICT has
        a weight of 0.5 at 4 hours lead time, and the total weights are
        re-normalised during the process, so the final blend contains 1/3
        contribution from each of the three models."""
        expected_data = np.array([[[0.8666667]], [[0.4666667]], [[0.0666667]]],
                                 dtype=np.float32)
        result = self.plugin_model.process(
            [self.ukv_cube, self.enukx_cube, self.nowcast_cube],
            model_id_attr="mosg__model_configuration")
        self.assertArrayAlmostEqual(result.data, expected_data)
        # make sure output cube has the forecast reference time and period
        # from the most recent contributing model
        for coord in ["time", "forecast_period", "forecast_reference_time"]:
            self.assertEqual(result.coord(coord),
                             self.nowcast_cube.coord(coord))

    def test_one_cube(self):
        """Test the plugin returns a single input cube with identical data and
        suitably updated metadata"""
        result = self.plugin_model.process(
            [self.enukx_cube], model_id_attr="mosg__model_configuration")
        self.assertArrayAlmostEqual(result.data, self.enukx_cube.data)
        self.assertEqual(result.attributes['mosg__model_configuration'],
                         'blend')
        self.assertEqual(result.attributes['title'], 'IMPROVER Model Forecast')

    def test_one_cube_with_cycletime(self):
        """Test the plugin returns a single input cube with an updated forecast
        reference time and period if given the "cycletime" option."""
        expected_frt = (
            self.enukx_cube.coord("forecast_reference_time").points[0] + 3600)
        expected_fp = self.enukx_cube.coord("forecast_period").points[0] - 3600
        result = self.plugin_model.process(
            [self.enukx_cube],
            model_id_attr="mosg__model_configuration",
            cycletime='20180910T0400Z')
        self.assertEqual(
            result.coord("forecast_reference_time").points[0], expected_frt)
        self.assertEqual(
            result.coord("forecast_period").points[0], expected_fp)

    def test_error_blend_coord_absent(self):
        """Test error is raised if blend coord is not present on input cubes"""
        plugin = WeightAndBlend("kittens", "linear", y0val=1, ynval=1)
        msg = "kittens coordinate is not present on all input cubes"
        with self.assertRaisesRegex(ValueError, msg):
            plugin.process([self.ukv_cube, self.ukv_cube_latest])
Пример #7
0
def process(cubelist,
            wts_calc_method,
            coordinate,
            cycletime,
            weighting_coord,
            weights_dict=None,
            y0val=None,
            ynval=None,
            cval=None,
            model_id_attr='mosg__model_configuration',
            spatial_weights_from_mask=False,
            fuzzy_length=20000.0):
    """Module to run weighted blending.

    Load in arguments and ensure they are set correctly.
    Then load in the data to blend and calculate weights
    using the method chosen before carrying out the blending.

    Args:
        cubelist (iris.cube.CubeList):
            Cubelist of cubes to be blended.
        wts_calc_method (str):
            Method to use to calculate weights used in blending.
            "linear" (default): calculate linearly varying blending weights.
            "nonlinear": calculate blending weights that decrease
            exponentially with increasing blending coordinates.
            "dicts": calculate weights using a dictionary passed in.
        coordinate (str):
            The coordinate over which the blending will be applied.
        cycletime (str):
            The forecast reference time to be used after blending has been
            applied, in the format YYYYMMDDTHHMMZ. If not provided, the
            blended file take the latest available forecast reference time
            from the input cubes supplied.
        weighting_coord (str):
            Name of coordinate over which linear weights should be scaled.
            This coordinate must be available in the weights dictionary.
        weights_dict (dict):
            Dictionary from which to calculate blending weights. Dictionary
            format is as specified in the
            improver.blending.weights.ChoosingWeightsLinear
        y0val (float):
            The relative value of the weighting start point (lowest value of
            blend coord) for choosing default linear weights.
            If used this must be a positive float or 0.
        ynval (float):
            The relative value of the weighting end point (highest value of
            blend coord) for choosing default linear weights. This must be a
            positive float or 0.
            Note that if blending over forecast reference time, ynval >= y0val
            would normally be expected (to give greater weight to the more
            recent forecast).
        cval (float):
            Factor used to determine how skewed the non-linear weights will be.
            A value of 1 implies equal weighting.
        model_id_attr (str):
            The name of the cube attribute to be used to identify the source
            model for multi-model blends. Default assume Met Office model
            metadata. Must be present on all if blending over models.
            Default is 'mosg__model_configuration'.
        spatial_weights_from_mask (bool):
            If True, this option will result in the generation of spatially
            varying weights based on the masks of the data we are blending.
            The one dimensional weights are first calculated using the chosen
            weights calculation method, but the weights will then be adjusted
            spatially based on where there is masked data in the data we are
            blending. The spatial weights are calculated using the
            SpatiallyVaryingWeightsFromMask plugin.
            Default is False.
        fuzzy_length (float):
            When calculating spatially varying weights we can smooth the
            weights so that areas close to areas that are masked have lower
            weights than those further away. This fuzzy length controls the
            scale over which the weights are smoothed. The fuzzy length is in
            terms of m, the default is 20km. This distance is then converted
            into a number of grid squares, which does not have to be an
            integer. Assumes the grid spacing is the same in the x and y
            directions and raises an error if this is not true. See
            SpatiallyVaryingWeightsFromMask for more details.
            Default is 20000.0.

    Returns:
        iris.cube.Cube:
            Merged and blended Cube.

    Raises:
        RuntimeError:
            If calc_method is linear and cval is not None.
        RuntimeError:
            If calc_method is nonlinear and either y0val and ynval is not None.
        RuntimeError:
            If calc_method is dict and weights_dict is None.
    """

    if (wts_calc_method == "linear") and cval:
        raise RuntimeError('Method: linear does not accept arguments: cval')
    elif (wts_calc_method == "nonlinear") and np.any([y0val, ynval]):
        raise RuntimeError('Method: non-linear does not accept arguments:'
                           ' y0val, ynval')
    elif (wts_calc_method == "dict") and weights_dict is None:
        raise RuntimeError('Dictionary is required if wts_calc_method="dict"')

    plugin = WeightAndBlend(coordinate,
                            wts_calc_method,
                            weighting_coord=weighting_coord,
                            wts_dict=weights_dict,
                            y0val=y0val,
                            ynval=ynval,
                            cval=cval)
    result = plugin.process(cubelist,
                            cycletime=cycletime,
                            model_id_attr=model_id_attr,
                            spatial_weights=spatial_weights_from_mask,
                            fuzzy_length=fuzzy_length)
    return result
Пример #8
0
def main(argv=None):
    """Load in arguments and ensure they are set correctly.
       Then load in the data to blend and calculate default weights
       using the method chosen before carrying out the blending."""
    parser = ArgParser(
        description='Calculate the default weights to apply in weighted '
        'blending plugins using the ChooseDefaultWeightsLinear or '
        'ChooseDefaultWeightsNonLinear plugins. Then apply these '
        'weights to the dataset using the BasicWeightedAverage plugin.'
        ' Required for ChooseDefaultWeightsLinear: y0val and ynval.'
        ' Required for ChooseDefaultWeightsNonLinear: cval.'
        ' Required for ChooseWeightsLinear with dict: wts_dict.')

    parser.add_argument('--wts_calc_method',
                        metavar='WEIGHTS_CALCULATION_METHOD',
                        choices=['linear', 'nonlinear', 'dict'],
                        default='linear', help='Method to use to calculate '
                        'weights used in blending. "linear" (default): '
                        'calculate linearly varying blending weights. '
                        '"nonlinear": calculate blending weights that decrease'
                        ' exponentially with increasing blending coordinate. '
                        '"dict": calculate weights using a dictionary passed '
                        'in as a command line argument.')

    parser.add_argument('coordinate', type=str,
                        metavar='COORDINATE_TO_AVERAGE_OVER',
                        help='The coordinate over which the blending '
                             'will be applied.')
    parser.add_argument('--cycletime', metavar='CYCLETIME', type=str,
                        help='The forecast reference time to be used after '
                        'blending has been applied, in the format '
                        'YYYYMMDDTHHMMZ. If not provided, the blended file '
                        'will take the latest available forecast reference '
                        'time from the input cubes supplied.')
    parser.add_argument('--model_id_attr', metavar='MODEL_ID_ATTR', type=str,
                        default="mosg__model_configuration",
                        help='The name of the netCDF file attribute to be '
                             'used to identify the source model for '
                             'multi-model blends. Default assumes Met Office '
                             'model metadata. Must be present on all input '
                             'files if blending over models.')
    parser.add_argument('--spatial_weights_from_mask',
                        action='store_true', default=False,
                        help='If set this option will result in the generation'
                             ' of spatially varying weights based on the'
                             ' masks of the data we are blending. The'
                             ' one dimensional weights are first calculated '
                             ' using the chosen weights calculation method,'
                             ' but the weights will then be adjusted spatially'
                             ' based on where there is masked data in the data'
                             ' we are blending. The spatial weights are'
                             ' calculated using the'
                             ' SpatiallyVaryingWeightsFromMask plugin.')

    parser.add_argument('input_filepaths', metavar='INPUT_FILES',
                        nargs="+",
                        help='Paths to input files to be blended.')
    parser.add_argument('output_filepath', metavar='OUTPUT_FILE',
                        help='The output path for the processed NetCDF.')

    spatial = parser.add_argument_group(
        'Spatial weights from mask options',
        'Options for calculating the spatial weights using the '
        'SpatiallyVaryingWeightsFromMask plugin.')
    spatial.add_argument('--fuzzy_length', metavar='FUZZY_LENGTH', type=float,
                         default=20000,
                         help='When calculating spatially varying weights we'
                              ' can smooth the weights so that areas close to'
                              ' areas that are masked have lower weights than'
                              ' those further away. This fuzzy length controls'
                              ' the scale over which the weights are smoothed.'
                              ' The fuzzy length is in terms of m, the'
                              ' default is 20km. This distance is then'
                              ' converted into a number of grid squares,'
                              ' which does not have to be an integer. Assumes'
                              ' the grid spacing is the same in the x and y'
                              ' directions, and raises an error if this is not'
                              ' true. See SpatiallyVaryingWeightsFromMask for'
                              ' more detail.')

    linear = parser.add_argument_group('linear weights options',
                                       'Options for the linear weights '
                                       'calculation in '
                                       'ChooseDefaultWeightsLinear')
    linear.add_argument('--y0val', metavar='LINEAR_STARTING_POINT', type=float,
                        help='The relative value of the weighting start point '
                        '(lowest value of blend coord) for choosing default '
                        'linear weights. This must be a positive float or 0.')
    linear.add_argument('--ynval', metavar='LINEAR_END_POINT',
                        type=float, help='The relative value of the weighting '
                        'end point (highest value of blend coord) for choosing'
                        ' default linear weights. This must be a positive '
                        'float or 0.  Note that if blending over forecast '
                        'reference time, ynval >= y0val would normally be '
                        'expected (to give greater weight to the more recent '
                        'forecast).')

    nonlinear = parser.add_argument_group('nonlinear weights options',
                                          'Options for the non-linear '
                                          'weights calculation in '
                                          'ChooseDefaultWeightsNonLinear')
    nonlinear.add_argument('--cval', metavar='NON_LINEAR_FACTOR', type=float,
                           help='Factor used to determine how skewed the '
                                'non linear weights will be. '
                                'A value of 1 implies equal weighting. If not '
                                'set, a default value of cval=0.85 is set.')

    wts_dict = parser.add_argument_group('dict weights options',
                                         'Options for linear weights to be '
                                         'calculated based on parameters '
                                         'read from a json file dict')
    wts_dict.add_argument('--wts_dict', metavar='WEIGHTS_DICTIONARY',
                          help='Path to json file containing dictionary from '
                          'which to calculate blending weights. Dictionary '
                          'format is as specified in the improver.blending.'
                          'weights.ChooseWeightsLinear plugin.')
    wts_dict.add_argument('--weighting_coord', metavar='WEIGHTING_COORD',
                          default='forecast_period', help='Name of '
                          'coordinate over which linear weights should be '
                          'scaled. This coordinate must be available in the '
                          'weights dictionary.')

    args = parser.parse_args(args=argv)

    # reject incorrect argument combinations
    if (args.wts_calc_method == "linear") and args.cval:
        parser.wrong_args_error('cval', 'linear')
    if ((args.wts_calc_method == "nonlinear") and np.any([args.y0val,
                                                          args.ynval])):
        parser.wrong_args_error('y0val, ynval', 'non-linear')
    if (args.wts_calc_method == "dict") and not args.wts_dict:
        parser.error('Dictionary is required if --wts_calc_method="dict"')

    # load cubes to be blended
    cubelist = load_cubelist(args.input_filepaths)

    if args.wts_calc_method == "dict":
        with open(args.wts_dict, 'r') as wts:
            weights_dict = json.load(wts)
    else:
        weights_dict = None

    plugin = WeightAndBlend(
        args.coordinate, args.wts_calc_method,
        weighting_coord=args.weighting_coord, wts_dict=weights_dict,
        y0val=args.y0val, ynval=args.ynval, cval=args.cval)
    result = plugin.process(
        cubelist, cycletime=args.cycletime,
        model_id_attr=args.model_id_attr,
        spatial_weights=args.spatial_weights_from_mask,
        fuzzy_length=args.fuzzy_length)

    save_netcdf(result, args.output_filepath)
Пример #9
0
def process(cube,
            threshold_values=None,
            threshold_dict=None,
            threshold_units=None,
            comparison_operator='>',
            fuzzy_factor=None,
            collapse_coord="None",
            vicinity=None):
    """Module to apply thresholding to a parameter dataset.

    Calculate the threshold truth values of input data relative to the
    provided threshold value. By default data are tested to be above the
    threshold, though the below_threshold boolean enables testing below
    thresholds.
    A fuzzy factor or fuzzy bounds may be provided to capture data that is
    close to the threshold.

    Args:
        cube (iris.cube.Cube):
             A cube to be processed.
        threshold_values (float):
            Threshold value or values about which to calculate the truth
            values; e.g. 270 300. Must be omitted if 'threshold_config'
            is used.
            Default is None.
        threshold_dict (dict):
            Threshold configuration containing threshold values and
            (optionally) fuzzy bounds. Best used in combination with
            'threshold_units' It should contain a dictionary of strings that
            can be interpreted as floats with the structure:
            "THRESHOLD_VALUE": [LOWER_BOUND, UPPER_BOUND]
            e.g: {"280.0": [278.0, 282.0], "290.0": [288.0, 292.0]},
            or with structure
            "THRESHOLD_VALUE": "None" (no fuzzy bounds).
            Repeated thresholds with different bounds are not
            handled well. Only the last duplicate will be used.
            Default is None.
        threshold_units (str):
            Units of the threshold values. If not provided the units are
            assumed to be the same as those of the input cube. Specifying
            the units here will allow a suitable conversion to match
            the input units if possible.
        comparison_operator (str):
            Indicates the comparison_operator to use with the threshold.
            e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to
            evaluate data < threshold. When using fuzzy thresholds, there is
            no difference between < and <= or > and >=.
            Default is >. Valid choices: > >= < <= gt ge lt le.
        fuzzy_factor (float):
            A decimal fraction defining the factor about the threshold value(s)
            which should be treated as fuzzy. Data which fail a test against
            the hard threshold value may return a fractional truth value if
            they fall within this fuzzy factor region.
            Fuzzy factor must be in the range 0-1, with higher values
            indicating a narrower fuzzy factor region / sharper threshold.
            N.B. A fuzzy factor cannot be used with a zero threshold or a
            threshold_dict.
        collapse_coord (str):
            An optional ability to set which coordinate we want to collapse
            over. The default is set to None.
        vicinity (float):
            If True, distance in metres used to define the vicinity within
            which to search for an occurrence.

    Returns:
        iris.cube.Cube:
            processed Cube.

    Raises:
        RuntimeError:
            If threshold_dict and threshold_values are both used.

     Warns:
        warning:
            If collapsing coordinates with a masked array.

    """
    if threshold_dict and threshold_values:
        raise RuntimeError('threshold_dict cannot be used '
                           'with threshold_values')
    if threshold_dict:
        try:
            thresholds = []
            fuzzy_bounds = []
            is_fuzzy = True
            for key in threshold_dict.keys():
                thresholds.append(float(key))
                if is_fuzzy:
                    # If the first threshold has no bounds, fuzzy_bounds is
                    # set to None and subsequent bounds checks are skipped
                    if threshold_dict[key] == "None":
                        is_fuzzy = False
                        fuzzy_bounds = None
                    else:
                        fuzzy_bounds.append(tuple(threshold_dict[key]))
        except ValueError as err:
            # Extend error message with hint for common JSON error.
            raise type(err)(
                "{} in threshold dictionary file. \nHINT: Try adding a zero "
                "after the decimal point.".format(err))
        except Exception as err:
            # Extend any errors with message about WHERE this occurred.
            raise type(err)("{} in dictionary file.".format(err))
    else:
        thresholds = threshold_values
        fuzzy_bounds = None

    result_no_collapse_coord = BasicThreshold(
        thresholds,
        fuzzy_factor=fuzzy_factor,
        fuzzy_bounds=fuzzy_bounds,
        threshold_units=threshold_units,
        comparison_operator=comparison_operator).process(cube)

    if vicinity is not None:
        # smooth thresholded occurrences over local vicinity
        result_no_collapse_coord = OccurrenceWithinVicinity(vicinity).process(
            result_no_collapse_coord)
        new_cube_name = in_vicinity_name_format(
            result_no_collapse_coord.name())
        result_no_collapse_coord.rename(new_cube_name)

    if collapse_coord == "None":
        result = result_no_collapse_coord
    else:
        # Raise warning if result_no_collapse_coord is masked array
        if np.ma.isMaskedArray(result_no_collapse_coord.data):
            warnings.warn("Collapse-coord option not fully tested with "
                          "masked data.")
        # Take a weighted mean across realizations with equal weights
        plugin = WeightAndBlend(collapse_coord, "linear", y0val=1.0, ynval=1.0)
        result_collapse_coord = plugin.process(result_no_collapse_coord)
        result = result_collapse_coord
    return result
Пример #10
0
class Test_process(IrisTest):
    """Test the process method"""
    def setUp(self):
        """Set up test cubes (each with a single point and 3 thresholds)"""
        thresholds = np.array([0.5, 1, 2], dtype=np.float32)
        units = "mm h-1"
        name = "lwe_precipitation_rate"
        datatime = dt(2018, 9, 10, 7)

        # a UKV cube with some rain and a 4 hr forecast period
        rain_data = np.array([[[0.9]], [[0.5]], [[0]]], dtype=np.float32)
        self.ukv_cube = set_up_probability_cube(
            rain_data,
            thresholds,
            variable_name=name,
            threshold_units=units,
            time=datatime,
            frt=dt(2018, 9, 10, 3),
            standard_grid_metadata="uk_det",
        )

        # a UKV cube from a more recent cycle with more rain
        more_rain_data = np.array([[[1]], [[0.6]], [[0.2]]], dtype=np.float32)
        self.ukv_cube_latest = set_up_probability_cube(
            more_rain_data,
            thresholds,
            variable_name=name,
            threshold_units=units,
            time=datatime,
            frt=dt(2018, 9, 10, 4),
            standard_grid_metadata="uk_det",
        )

        # a nowcast cube with more rain and a 2 hr forecast period
        self.nowcast_cube = set_up_probability_cube(
            more_rain_data,
            thresholds,
            variable_name=name,
            threshold_units=units,
            time=datatime,
            frt=dt(2018, 9, 10, 5),
            attributes={"mosg__model_configuration": "nc_det"},
        )

        # a MOGREPS-UK cube with less rain and a 4 hr forecast period
        less_rain_data = np.array([[[0.7]], [[0.3]], [[0]]], dtype=np.float32)
        self.enukx_cube = set_up_probability_cube(
            less_rain_data,
            thresholds,
            variable_name=name,
            threshold_units=units,
            time=datatime,
            frt=dt(2018, 9, 10, 3),
            standard_grid_metadata="uk_ens",
        )

        # cycletime from the most recent forecast (simulates current cycle)
        self.cycletime = "20180910T0500Z"
        self.expected_frt = self.nowcast_cube.coord(
            "forecast_reference_time").copy()
        self.expected_fp = self.nowcast_cube.coord("forecast_period").copy()

        self.plugin_cycle = WeightAndBlend("forecast_reference_time",
                                           "linear",
                                           y0val=1,
                                           ynval=1)
        self.plugin_model = WeightAndBlend(
            "model_id",
            "dict",
            weighting_coord="forecast_period",
            wts_dict=MODEL_WEIGHTS,
        )

    @ManageWarnings(
        ignored_messages=["Collapsing a non-contiguous coordinate"])
    def test_basic(self):
        """Test output is a cube"""
        result = self.plugin_cycle.process(
            [self.ukv_cube, self.ukv_cube_latest],
            cycletime=self.cycletime,
        )
        self.assertIsInstance(result, iris.cube.Cube)

    @ManageWarnings(record=True)
    def test_masked_blending_warning(self, warning_list=None):
        """Test a warning is raised if blending masked data with non-spatial
        weights."""
        ukv_cube = self.ukv_cube.copy(data=np.ma.masked_where(
            self.ukv_cube.data < 0.5, self.ukv_cube.data))
        self.plugin_cycle.process(
            [ukv_cube, self.ukv_cube_latest],
            cycletime=self.cycletime,
        )
        message = "Blending masked data without spatial weights"
        self.assertTrue(any(message in str(item) for item in warning_list))

    @ManageWarnings(
        ignored_messages=["Collapsing a non-contiguous coordinate"])
    def test_cycle_blend_linear(self):
        """Test plugin produces correct cycle blended output with equal
        linear weightings"""
        expected_data = np.array([[[0.95]], [[0.55]], [[0.1]]],
                                 dtype=np.float32)
        result = self.plugin_cycle.process(
            [self.ukv_cube, self.ukv_cube_latest],
            cycletime=self.cycletime,
        )
        self.assertArrayAlmostEqual(result.data, expected_data)
        self.assertArrayEqual(
            result.coord("time").points,
            self.ukv_cube_latest.coord("time").points)
        self.assertArrayEqual(
            result.coord("forecast_reference_time").points,
            self.expected_frt.points)
        self.assertArrayEqual(
            result.coord("forecast_period").points, self.expected_fp.points)
        for coord in ["forecast_reference_time", "forecast_period"]:
            self.assertIn("deprecation_message",
                          result.coord(coord).attributes)

    @ManageWarnings(
        ignored_messages=["Collapsing a non-contiguous coordinate"])
    def test_model_blend(self):
        """Test plugin produces correct output for UKV-ENUKX model blend
        with 50-50 weightings defined by dictionary"""
        expected_data = np.array([[[0.8]], [[0.4]], [[0]]], dtype=np.float32)
        result = self.plugin_model.process(
            [self.ukv_cube, self.enukx_cube],
            model_id_attr="mosg__model_configuration",
            cycletime=self.cycletime,
        )
        self.assertArrayAlmostEqual(result.data, expected_data)
        self.assertEqual(result.attributes["mosg__model_configuration"],
                         "uk_det uk_ens")
        result_coords = [coord.name() for coord in result.coords()]
        self.assertNotIn("model_id", result_coords)
        self.assertNotIn("model_configuration", result_coords)
        for coord in ["forecast_reference_time", "forecast_period"]:
            self.assertIn("deprecation_message",
                          result.coord(coord).attributes)
            self.assertIn(
                "will be removed",
                result.coord(coord).attributes["deprecation_message"])

    @ManageWarnings(ignored_messages=[
        "Collapsing a non-contiguous coordinate",
        "Deleting unmatched attribute",
    ])
    def test_attributes_dict(self):
        """Test output attributes can be updated through argument"""
        attribute_changes = {
            "mosg__model_configuration": "remove",
            "source": "IMPROVER",
            "title": "IMPROVER Post-Processed Multi-Model Blend",
        }
        expected_attributes = {
            "source": "IMPROVER",
            "title": "IMPROVER Post-Processed Multi-Model Blend",
            "institution": MANDATORY_ATTRIBUTE_DEFAULTS["institution"],
        }
        result = self.plugin_model.process(
            [self.ukv_cube, self.nowcast_cube],
            model_id_attr="mosg__model_configuration",
            attributes_dict=attribute_changes,
            cycletime=self.cycletime,
        )
        self.assertDictEqual(result.attributes, expected_attributes)

    @ManageWarnings(ignored_messages=[
        "Collapsing a non-contiguous coordinate",
        "Deleting unmatched attribute",
    ])
    def test_blend_three_models(self):
        """Test plugin produces correct output for 3-model blend when all
        models have (equal) non-zero weights. Each model in WEIGHTS_DICT has
        a weight of 0.5 at 4 hours lead time, and the total weights are
        re-normalised during the process, so the final blend contains 1/3
        contribution from each of the three models."""
        expected_data = np.array([[[0.8666667]], [[0.4666667]], [[0.0666667]]],
                                 dtype=np.float32)
        result = self.plugin_model.process(
            [self.ukv_cube, self.enukx_cube, self.nowcast_cube],
            model_id_attr="mosg__model_configuration",
            cycletime=self.cycletime,
        )
        self.assertArrayAlmostEqual(result.data, expected_data)
        # make sure output cube has the forecast reference time and period
        # from the most recent contributing model
        for coord in ["time", "forecast_period", "forecast_reference_time"]:
            self.assertArrayEqual(
                result.coord(coord).points,
                self.nowcast_cube.coord(coord).points)

    def test_forecast_coord_deprecation(self):
        """Test model blending works if some (but not all) inputs have previously
        been cycle blended"""
        for cube in [self.ukv_cube, self.enukx_cube]:
            for coord in ["forecast_period", "forecast_reference_time"]:
                cube.coord(coord).attributes.update(
                    {"deprecation_message": "blah"})
        result = self.plugin_model.process(
            [self.ukv_cube, self.enukx_cube, self.nowcast_cube],
            model_id_attr="mosg__model_configuration",
            cycletime=self.cycletime,
        )
        for coord in ["forecast_reference_time", "forecast_period"]:
            self.assertIn("deprecation_message",
                          result.coord(coord).attributes)
            self.assertIn(
                "will be removed",
                result.coord(coord).attributes["deprecation_message"])

    def test_one_cube(self):
        """Test the plugin returns a single input cube with updated attributes
        and time coordinates"""
        expected_coords = {coord.name() for coord in self.enukx_cube.coords()}
        expected_coords.update({"blend_time"})
        result = self.plugin_model.process(
            [self.enukx_cube],
            model_id_attr="mosg__model_configuration",
            cycletime=self.cycletime,
            attributes_dict={"source": "IMPROVER"},
        )
        self.assertArrayAlmostEqual(result.data, self.enukx_cube.data)
        self.assertSetEqual({coord.name()
                             for coord in result.coords()}, expected_coords)
        self.assertEqual(result.attributes["source"], "IMPROVER")

    def test_one_cube_error_no_cycletime(self):
        """Test an error is raised if no cycletime is provided for model blending"""
        msg = "Current cycle time is required"
        with self.assertRaisesRegex(ValueError, msg):
            self.plugin_model.process(
                [self.enukx_cube], model_id_attr="mosg__model_configuration")

    def test_one_cube_with_cycletime_model_blending(self):
        """Test the plugin returns a single input cube with an updated forecast
        reference time and period if given the "cycletime" option."""
        expected_frt = self.enukx_cube.coord(
            "forecast_reference_time").points[0] + 3600
        expected_fp = self.enukx_cube.coord("forecast_period").points[0] - 3600
        result = self.plugin_model.process(
            [self.enukx_cube],
            model_id_attr="mosg__model_configuration",
            cycletime="20180910T0400Z",
        )
        self.assertEqual(
            result.coord("forecast_reference_time").points[0], expected_frt)
        self.assertEqual(
            result.coord("forecast_period").points[0], expected_fp)

    def test_one_cube_with_cycletime_cycle_blending(self):
        """Test the plugin returns a single input cube with an updated forecast
        reference time and period if given the "cycletime" option."""
        expected_frt = self.enukx_cube.coord(
            "forecast_reference_time").points[0] + 3600
        expected_fp = self.enukx_cube.coord("forecast_period").points[0] - 3600
        result = self.plugin_cycle.process([self.enukx_cube],
                                           cycletime="20180910T0400Z")
        self.assertEqual(
            result.coord("forecast_reference_time").points[0], expected_frt)
        self.assertEqual(
            result.coord("forecast_period").points[0], expected_fp)

    def test_error_blend_coord_absent(self):
        """Test error is raised if blend coord is not present on input cubes"""
        plugin = WeightAndBlend("kittens", "linear", y0val=1, ynval=1)
        msg = "kittens coordinate is not present on all input cubes"
        with self.assertRaisesRegex(ValueError, msg):
            plugin.process([self.ukv_cube, self.ukv_cube_latest])
Пример #11
0
def process(cube: cli.inputcube,
            *,
            threshold_values: cli.comma_separated_list = None,
            threshold_config: cli.inputjson = None,
            threshold_units: str = None,
            comparison_operator='>',
            fuzzy_factor: float = None,
            collapse_coord: str = None,
            vicinity: float = None):
    """Module to apply thresholding to a parameter dataset.

    Calculate the threshold truth values of input data relative to the
    provided threshold value. A fuzzy factor or fuzzy bounds may be provided
    to smooth probabilities where values are close to the threshold.

    Args:
        cube (iris.cube.Cube):
            A cube to be processed.
        threshold_values (list of float):
            Threshold value or values about which to calculate the truth
            values; e.g. 270,300. Must be omitted if 'threshold_config'
            is used.
        threshold_config (dict):
            Threshold configuration containing threshold values and
            (optionally) fuzzy bounds. Best used in combination with
            'threshold_units' It should contain a dictionary of strings that
            can be interpreted as floats with the structure:
            "THRESHOLD_VALUE": [LOWER_BOUND, UPPER_BOUND]
            e.g: {"280.0": [278.0, 282.0], "290.0": [288.0, 292.0]},
            or with structure "THRESHOLD_VALUE": "None" (no fuzzy bounds).
            Repeated thresholds with different bounds are ignored; only the
            last duplicate will be used.
        threshold_units (str):
            Units of the threshold values. If not provided the units are
            assumed to be the same as those of the input cube. Specifying
            the units here will allow a suitable conversion to match
            the input units if possible.
        comparison_operator (str):
            Indicates the comparison_operator to use with the threshold.
            e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to
            evaluate data < threshold. When using fuzzy thresholds, there is
            no difference between < and <= or > and >=.
            Options: > >= < <= gt ge lt le.
        fuzzy_factor (float of None):
            A decimal fraction defining the factor about the threshold value(s)
            which should be treated as fuzzy. Data which fail a test against
            the hard threshold value may return a fractional truth value if
            they fall within this fuzzy factor region.
            Fuzzy factor must be in the range 0-1, with higher values
            indicating a narrower fuzzy factor region / sharper threshold.
            A fuzzy factor cannot be used with a zero threshold or a
            threshold_config file.
        collapse_coord (str):
            An optional ability to set which coordinate we want to collapse
            over.
        vicinity (float):
            Distance in metres used to define the vicinity within which to
            search for an occurrence

    Returns:
        iris.cube.Cube:
            Cube of probabilities relative to the given thresholds

    Raises:
        ValueError: If threshold_config and threshold_values are both set
        ValueError: If threshold_config is used for fuzzy thresholding

     Warns:
        UserWarning: If collapsing coordinates with a masked array

    """
    import warnings
    import numpy as np

    from improver.blending.calculate_weights_and_blend import WeightAndBlend
    from improver.metadata.probabilistic import in_vicinity_name_format
    from improver.threshold import BasicThreshold
    from improver.utilities.spatial import OccurrenceWithinVicinity

    if threshold_config and threshold_values:
        raise ValueError(
            "--threshold-config and --threshold-values are mutually exclusive "
            "- please set one or the other, not both")
    if threshold_config and fuzzy_factor:
        raise ValueError(
            "--threshold-config cannot be used for fuzzy thresholding")

    if threshold_config:
        thresholds = []
        fuzzy_bounds = []
        for key in threshold_config.keys():
            thresholds.append(np.float32(key))
            # If the first threshold has no bounds, fuzzy_bounds is
            # set to None and subsequent bounds checks are skipped
            if threshold_config[key] == "None":
                fuzzy_bounds = None
                continue
            fuzzy_bounds.append(tuple(threshold_config[key]))
    else:
        thresholds = [np.float32(x) for x in threshold_values]
        fuzzy_bounds = None

    result_no_collapse_coord = BasicThreshold(
        thresholds,
        fuzzy_factor=fuzzy_factor,
        fuzzy_bounds=fuzzy_bounds,
        threshold_units=threshold_units,
        comparison_operator=comparison_operator).process(cube)

    if vicinity is not None:
        # smooth thresholded occurrences over local vicinity
        result_no_collapse_coord = OccurrenceWithinVicinity(vicinity).process(
            result_no_collapse_coord)
        new_cube_name = in_vicinity_name_format(
            result_no_collapse_coord.name())
        result_no_collapse_coord.rename(new_cube_name)

    if collapse_coord is None:
        return result_no_collapse_coord

    # Raise warning if result_no_collapse_coord is masked array
    if np.ma.isMaskedArray(result_no_collapse_coord.data):
        warnings.warn("Collapse-coord option not fully tested with "
                      "masked data.")
    # Take a weighted mean across realizations with equal weights
    plugin = WeightAndBlend(collapse_coord, "linear", y0val=1.0, ynval=1.0)
    return plugin.process(result_no_collapse_coord)
Пример #12
0
def main(argv=None):
    """Load in arguments and get going."""
    parser = ArgParser(
        description="Calculate the threshold truth value of input data "
        "relative to the provided threshold value. By default data are "
        "tested to be above the thresholds, though the --below_threshold "
        "flag enables testing below thresholds. A fuzzy factor or fuzzy "
        "bounds may be provided to capture data that is close to the "
        "threshold.")
    parser.add_argument("input_filepath",
                        metavar="INPUT_FILE",
                        help="A path to an input NetCDF file to be processed")
    parser.add_argument("output_filepath",
                        metavar="OUTPUT_FILE",
                        help="The output path for the processed NetCDF")
    parser.add_argument("threshold_values",
                        metavar="THRESHOLD_VALUES",
                        nargs="*",
                        type=float,
                        help="Threshold value or values about which to "
                        "calculate the truth values; e.g. 270 300. "
                        "Must be omitted if --threshold_config is used.")
    parser.add_argument("--threshold_config",
                        metavar="THRESHOLD_CONFIG",
                        type=str,
                        help="Threshold configuration JSON file containing "
                        "thresholds and (optionally) fuzzy bounds. Best used "
                        "in combination  with --threshold_units. "
                        "It should contain a dictionary of strings that can "
                        "be interpreted as floats with the structure: "
                        " \"THRESHOLD_VALUE\": [LOWER_BOUND, UPPER_BOUND] "
                        "e.g: {\"280.0\": [278.0, 282.0], "
                        "\"290.0\": [288.0, 292.0]}, or with structure "
                        " \"THRESHOLD_VALUE\": \"None\" (no fuzzy bounds). "
                        "Repeated thresholds with different bounds are not "
                        "handled well. Only the last duplicate will be used.")
    parser.add_argument("--threshold_units",
                        metavar="THRESHOLD_UNITS",
                        default=None,
                        type=str,
                        help="Units of the threshold values. If not provided "
                        "the units are assumed to be the same as those of the "
                        "input dataset. Specifying the units here will allow "
                        "a suitable conversion to match the input units if "
                        "possible.")
    parser.add_argument("--below_threshold",
                        default=False,
                        action='store_true',
                        help="By default truth values of 1 are returned for "
                        "data ABOVE the threshold value(s). Using this flag "
                        "changes this behaviour to return 1 for data below "
                        "the threshold values.")
    parser.add_argument("--fuzzy_factor",
                        metavar="FUZZY_FACTOR",
                        default=None,
                        type=float,
                        help="A decimal fraction defining the factor about "
                        "the threshold value(s) which should be treated as "
                        "fuzzy. Data which fail a test against the hard "
                        "threshold value may return a fractional truth value "
                        "if they fall within this fuzzy factor region. Fuzzy "
                        "factor must be in the range 0-1, with higher values "
                        "indicating a narrower fuzzy factor region / sharper "
                        "threshold. NB A fuzzy factor cannot be used with a "
                        "zero threshold or a threshold_config file.")
    parser.add_argument("--collapse-coord",
                        type=str,
                        metavar="COLLAPSE-COORD",
                        default="None",
                        help="An optional ability to set which coordinate "
                        "we want to collapse over. The default is set "
                        "to None.")
    parser.add_argument("--vicinity",
                        type=float,
                        default=None,
                        help="If set,"
                        " distance in metres used to define the vicinity "
                        "within which to search for an occurrence.")

    args = parser.parse_args(args=argv)

    # Deal with mutual-exclusions that ArgumentParser can't handle:
    if args.threshold_values and args.threshold_config:
        raise parser.error("--threshold_config option is not compatible "
                           "with THRESHOLD_VALUES list.")
    if args.fuzzy_factor and args.threshold_config:
        raise parser.error("--threshold_config option is not compatible "
                           "with --fuzzy_factor option.")

    cube = load_cube(args.input_filepath)

    if args.threshold_config:
        try:
            # Read in threshold configuration from JSON file.
            with open(args.threshold_config, 'r') as input_file:
                thresholds_from_file = json.load(input_file)
            thresholds = []
            fuzzy_bounds = []
            is_fuzzy = True
            for key in thresholds_from_file.keys():
                thresholds.append(float(key))
                if is_fuzzy:
                    # If the first threshold has no bounds, fuzzy_bounds is
                    # set to None and subsequent bounds checks are skipped
                    if thresholds_from_file[key] == "None":
                        is_fuzzy = False
                        fuzzy_bounds = None
                    else:
                        fuzzy_bounds.append(tuple(thresholds_from_file[key]))
        except ValueError as err:
            # Extend error message with hint for common JSON error.
            raise type(err)(err + " in JSON file {}. \nHINT: Try "
                            "adding a zero after the decimal point.".format(
                                args.threshold_config))
        except Exception as err:
            # Extend any errors with message about WHERE this occurred.
            raise type(err)(err +
                            " in JSON file {}".format(args.threshold_config))
    else:
        thresholds = args.threshold_values
        fuzzy_bounds = None

    result_no_collapse_coord = BasicThreshold(
        thresholds,
        fuzzy_factor=args.fuzzy_factor,
        fuzzy_bounds=fuzzy_bounds,
        threshold_units=args.threshold_units,
        below_thresh_ok=args.below_threshold).process(cube)

    if args.vicinity is not None:
        # smooth thresholded occurrences over local vicinity
        result_no_collapse_coord = OccurrenceWithinVicinity(
            args.vicinity).process(result_no_collapse_coord)

        new_cube_name = in_vicinity_name_format(
            result_no_collapse_coord.name())

        result_no_collapse_coord.rename(new_cube_name)

    if args.collapse_coord == "None":
        save_netcdf(result_no_collapse_coord, args.output_filepath)
    else:
        # Raise warning if result_no_collapse_coord is masked array
        if np.ma.isMaskedArray(result_no_collapse_coord.data):
            warnings.warn("Collapse-coord option not fully tested with "
                          "masked data.")
        # Take a weighted mean across realizations with equal weights
        plugin = WeightAndBlend(args.collapse_coord,
                                "linear",
                                y0val=1.0,
                                ynval=1.0)
        result_collapse_coord = plugin.process(result_no_collapse_coord)
        save_netcdf(result_collapse_coord, args.output_filepath)