def test_process_deterministic(deterministic_forecast, deterministic_features,
                               dummy_lightgbm_models):
    """Test process routine using lightgbm booster."""
    plugin = ApplyRainForestsCalibrationLightGBM(model_config_dict={})
    plugin.tree_models, plugin.error_thresholds = dummy_lightgbm_models

    for output_realization_count in (None, 10):
        result = plugin.process(
            deterministic_forecast,
            deterministic_features,
            error_percentiles_count=4,
            output_realizations_count=output_realization_count,
        )

    assert result.standard_name == deterministic_forecast.standard_name
    assert result.long_name == deterministic_forecast.long_name
    assert result.var_name == deterministic_forecast.var_name
    assert result.units == deterministic_forecast.units

    # Check that all non-realization are equal
    assert result.coords(dim_coords=True)[1:], deterministic_forecast.coords(
        dim_coords=True)[1:]
    if output_realization_count is None:
        assert result.coord("realization").points.size == 4
    else:
        assert result.coord(
            "realization").points.size == output_realization_count
    assert result.coords(dim_coords=False) == deterministic_forecast.coords(
        dim_coords=False)
    assert result.attributes == deterministic_forecast.attributes
def test__align_feature_variables_misaligned_dim_coords(ensemble_features):
    """Check ValueError raised when feature/forecast cubes have differing dimension
    coordinates."""
    # Test case where non-realization dimension differ.
    misaligned_forecast_cube = set_up_variable_cube(
        np.maximum(0, np.random.normal(0.002, 0.001,
                                       (5, 10, 15))).astype(np.float32),
        name="lwe_thickness_of_precipitation_amount",
        units="m",
        realizations=np.arange(5),
    )
    with pytest.raises(ValueError):
        ApplyRainForestsCalibrationLightGBM(
            model_config_dict={})._align_feature_variables(
                ensemble_features, misaligned_forecast_cube)
    # Test case where realization dimension differ.
    misaligned_forecast_cube = set_up_variable_cube(
        np.maximum(0, np.random.normal(0.002, 0.001,
                                       (10, 10, 10))).astype(np.float32),
        name="lwe_thickness_of_precipitation_amount",
        units="m",
        realizations=np.arange(10),
    )
    with pytest.raises(ValueError):
        ApplyRainForestsCalibrationLightGBM(
            model_config_dict={})._align_feature_variables(
                ensemble_features, misaligned_forecast_cube)
def test__new__(model_config, monkeypatch):
    monkeypatch.setattr(lightgbm, "Booster", MockBooster)
    # Check that we get the expected subclass
    result = ApplyRainForestsCalibrationLightGBM(model_config)
    assert type(result).__name__ == "ApplyRainForestsCalibrationLightGBM"
    model_config
    # Test exception raised when file path is missing.
    model_config["0.0000"].pop("lightgbm_model", None)
    with pytest.raises(ValueError):
        ApplyRainForestsCalibrationLightGBM(model_config)
def test__prepare_error_probability_cube(ensemble_forecast, error_thresholds,
                                         error_threshold_cube):
    """Test the preparation of error probability cube from input
    forecast cube."""
    plugin = ApplyRainForestsCalibrationLightGBM(model_config_dict={})
    plugin.error_thresholds = error_thresholds
    result = plugin._prepare_error_probability_cube(ensemble_forecast)

    assert result.long_name == error_threshold_cube.long_name
    assert result.units == error_threshold_cube.units
    assert result.coords() == error_threshold_cube.coords()
    assert result.attributes == error_threshold_cube.attributes
def test__check_num_features(ensemble_features, dummy_lightgbm_models):
    """Test number of features expected by tree_models matches features passed in."""
    plugin = ApplyRainForestsCalibrationLightGBM(model_config_dict={})
    plugin.tree_models, _ = dummy_lightgbm_models
    plugin._check_num_features(ensemble_features)
    with pytest.raises(ValueError):
        plugin._check_num_features(ensemble_features[:-1])
def test__stack_subensembles(error_percentile_cube):
    """Test the stacking of realization-percentile dimensions into a single
    realization dimension."""
    column = np.arange(20, dtype=np.float32)

    error_percentile_cube.data = np.broadcast_to(
        column.reshape(5, 4)[:, :, np.newaxis, np.newaxis], (5, 4, 10, 10))
    expected_data = np.broadcast_to(column[:, np.newaxis, np.newaxis],
                                    (20, 10, 10))
    result = ApplyRainForestsCalibrationLightGBM(
        model_config_dict={})._stack_subensembles(error_percentile_cube)
    # Result should not contain percentile coordinate
    assert result.coords("percentile") == []
    # Result should have realization coordinate composed on of length
    # realization.points.size * percentile.points.size
    assert (result.coord("realization").points.size ==
            error_percentile_cube.coord("realization").points.size *
            error_percentile_cube.coord("percentile").points.size)
    # All remaining coords should be consistent
    assert (result.coords(dim_coords=True)[1:] == error_percentile_cube.coords(
        dim_coords=True)[2:])
    assert result.coords(dim_coords=False) == error_percentile_cube.coords(
        dim_coords=False)
    assert result.attributes == error_percentile_cube.attributes
    # Check data ordered as expected.
    np.testing.assert_equal(result.data, expected_data)

    # Test the case where cubes are not in expected order.
    error_percentile_cube.transpose([1, 0, 2, 3])
    with pytest.raises(ValueError):
        ApplyRainForestsCalibrationLightGBM(
            model_config_dict={})._stack_subensembles(error_percentile_cube)
def test__apply_error_to_forecast(ensemble_forecast, error_percentile_cube):
    """Test the application of forecast error (percentile) values to the forecast cube."""
    result = ApplyRainForestsCalibrationLightGBM(
        model_config_dict={})._apply_error_to_forecast(ensemble_forecast,
                                                       error_percentile_cube)

    assert result.standard_name == ensemble_forecast.standard_name
    assert result.long_name == ensemble_forecast.long_name
    assert result.var_name == ensemble_forecast.var_name
    assert result.units == ensemble_forecast.units
    # Aux coords should be consistent with forecast
    assert result.coords(dim_coords=False) == ensemble_forecast.coords(
        dim_coords=False)
    # Dim coords should be consistent with forecast error.
    assert result.coords(dim_coords=True) == error_percentile_cube.coords(
        dim_coords=True)
    assert result.attributes == ensemble_forecast.attributes
    assert np.all(result.data >= 0.0)
def test__calculate_error_probabilities(ensemble_features, ensemble_forecast,
                                        dummy_lightgbm_models):
    """Test calculation of error probability cube when using lightgbm Boosters."""
    plugin = ApplyRainForestsCalibrationLightGBM(model_config_dict={})
    plugin.tree_models, plugin.error_thresholds = dummy_lightgbm_models
    result = plugin._calculate_error_probabilities(ensemble_forecast,
                                                   ensemble_features)

    # Check that data has sensible probability values.
    # Note: here we are NOT checking the returned value against an expected value
    # as this will be sensitive to changes in associated GBDT libraries, given that
    # the tree models are created dynamically within fixtures. Here we implicitly trust
    # the output from the tree models are correct based on the specified inputs, and so
    # only test to ensure that the dataset overall conforms to the bounds for probability
    # data.
    assert np.all(result.data >= 0.0)
    assert np.all(result.data <= 1.0)
    assert np.all(np.isfinite(result.data))
    # Check that data is monotonically decreasing
    assert np.all(np.diff(result.data, axis=0) <= 0.0)
def test__prepare_features_dataframe(ensemble_features):
    """Test dataframe preparation given set of feature cubes."""
    feature_names = [cube.name() for cube in ensemble_features]
    expected_size = ensemble_features.extract_cube(
        "lwe_thickness_of_precipitation_amount").data.size
    result = ApplyRainForestsCalibrationLightGBM(
        model_config_dict={})._prepare_features_dataframe(ensemble_features)

    assert list(result.columns) == list(sorted(feature_names))
    assert len(result) == expected_size

    # Drop realization coordinate from one of the ensemble features, to produce
    # cubes of differing length.
    cube_lacking_realization = ensemble_features.pop(-1).extract(
        Constraint(realization=0))
    cube_lacking_realization.remove_coord("realization")
    ensemble_features.append(cube_lacking_realization)
    with pytest.raises(ValueError):
        ApplyRainForestsCalibrationLightGBM(
            model_config_dict={})._prepare_features_dataframe(
                ensemble_features)
def test__extract_error_percentiles(error_threshold_cube,
                                    error_percentile_cube):
    """Test the extraction of error percentiles from error-probability cube."""
    result = ApplyRainForestsCalibrationLightGBM(
        model_config_dict={})._extract_error_percentiles(
            error_threshold_cube, 4)

    assert result.long_name == error_percentile_cube.long_name
    assert result.units == error_percentile_cube.units
    assert result.coords() == error_percentile_cube.coords()
    assert result.attributes == error_percentile_cube.attributes

    # Test the case where error_threshold_cube has unit realization dimension
    error_threshold_cube = error_threshold_cube.extract(
        Constraint(realization=0))
    error_threshold_cube = new_axis(error_threshold_cube, "realization")
    error_threshold_cube.transpose([1, 0, 2, 3])
    error_percentile_cube = error_percentile_cube.extract(
        Constraint(realization=0))
    error_percentile_cube = new_axis(error_percentile_cube, "realization")
    result = ApplyRainForestsCalibrationLightGBM(
        model_config_dict={})._extract_error_percentiles(
            error_threshold_cube, 4)

    assert result.long_name == error_percentile_cube.long_name
    assert result.units == error_percentile_cube.units
    assert result.coords() == error_percentile_cube.coords()
    assert result.attributes == error_percentile_cube.attributes
def test__init__(model_config, ordered_inputs, default_threads,
                 error_thresholds, monkeypatch):
    monkeypatch.setattr(lightgbm, "Booster", MockBooster)

    if not ordered_inputs:
        tmp_value = model_config.pop("0.0000", None)
        model_config["0.0000"] = tmp_value

    if default_threads:
        expected_threads = 1
        result = ApplyRainForestsCalibrationLightGBM(model_config)
    else:
        expected_threads = 8
        result = ApplyRainForestsCalibrationLightGBM(model_config,
                                                     threads=expected_threads)
    # Check thresholds and model types match
    assert np.all(result.error_thresholds == error_thresholds)
    for model in result.tree_models:
        assert model.model_class == "lightgbm-Booster"
        assert model.threads == expected_threads
    # Ensure threshold and files match
    for threshold, model in zip(result.error_thresholds, result.tree_models):
        assert f"{threshold:06.4f}" in model.model_file
def test_make_decreasing():
    """Test that make_increasing returns an array that is non-decreasing
    in the first dimension."""
    # Test on standard use case.
    input_array = np.array([[5, 5], [2, 3], [3, 4], [4, 2], [1, 1]]) / 5.0
    expected = np.array([[5, 5], [3, 3.5], [3, 3.5], [3, 2], [1, 1]]) / 5.0
    result = ApplyRainForestsCalibrationLightGBM(
        model_config_dict={})._make_decreasing(input_array)
    np.testing.assert_almost_equal(expected, result)
    # Test on case where all data is already monotonically decreasing.
    input_array = np.array([[5, 5], [4, 4], [3, 3], [2, 2], [1, 1]]) / 5.0
    expected = np.array([[5, 5], [4, 4], [3, 3], [2, 2], [1, 1]]) / 5.0
    result = ApplyRainForestsCalibrationLightGBM(
        model_config_dict={})._make_decreasing(input_array)
    np.testing.assert_almost_equal(expected, result)
    # Test on case where some data is monotonically increasing.
    input_array = np.array([[1, 5], [2, 3], [3, 4], [4, 2], [5, 1]]) / 5.0
    expected = np.array([[3, 5], [3, 3.5], [3, 3.5], [3, 2], [3, 1]]) / 5.0
    result = ApplyRainForestsCalibrationLightGBM(
        model_config_dict={})._make_decreasing(input_array)
    np.testing.assert_almost_equal(expected, result)
    # Test on case where data increasing along second dimension; this
    # should be preserved in final output, with leading dimension monotonically
    # decreasing.
    input_array = np.array([[4, 5], [2, 3], [3, 4], [1, 2], [1, 1]]) / 5.0
    expected = np.array([[4, 5], [2.5, 3.5], [2.5, 3.5], [1, 2], [1, 1]]) / 5.0
    result = ApplyRainForestsCalibrationLightGBM(
        model_config_dict={})._make_decreasing(input_array)
    np.testing.assert_almost_equal(expected, result)
    # Test on case where data has shape (n, 1).
    input_array = np.array([[5], [3], [4], [2], [1]]) / 5.0
    expected = np.array([[5], [3.5], [3.5], [2], [1]]) / 5.0
    result = ApplyRainForestsCalibrationLightGBM(
        model_config_dict={})._make_decreasing(input_array)
    np.testing.assert_almost_equal(expected, result)
    # Test on case where data has shape (1, n).
    input_array = np.array([[5, 3, 4, 2, 1]]) / 5.0
    expected = np.array([[5, 3, 4, 2, 1]]) / 5.0
    result = ApplyRainForestsCalibrationLightGBM(
        model_config_dict={})._make_decreasing(input_array)
    np.testing.assert_almost_equal(expected, result)
    # Test on case where data has shape (n).
    input_array = np.array([5, 3, 4, 2, 1]) / 5.0
    expected = np.array([5, 3.5, 3.5, 2, 1]) / 5.0
    result = ApplyRainForestsCalibrationLightGBM(
        model_config_dict={})._make_decreasing(input_array)
    np.testing.assert_almost_equal(expected, result)
def test__align_feature_variables_ensemble(ensemble_features,
                                           ensemble_forecast):
    """Check cube alignment when using feature and forecast variables when realization
    coordinate present in some cube variables."""
    expected_features = ensemble_features.copy()
    # Drop realization coordinate from one of the ensemble features
    dervied_field_cube = ensemble_features.pop(-1).extract(
        Constraint(realization=0))
    dervied_field_cube.remove_coord("realization")
    ensemble_features.append(dervied_field_cube)

    (
        aligned_features,
        aligned_forecast,
    ) = ApplyRainForestsCalibrationLightGBM(
        model_config_dict={})._align_feature_variables(ensemble_features,
                                                       ensemble_forecast)

    assert aligned_features == expected_features
    assert aligned_forecast == ensemble_forecast
def test__combine_subensembles(error_percentile_cube):
    """Test extraction of realization values from full superensemble."""
    result = ApplyRainForestsCalibrationLightGBM(
        model_config_dict={})._combine_subensembles(
            error_percentile_cube, output_realizations_count=None)

    assert (result.coord("realization").points.size ==
            error_percentile_cube.coord("realization").points.size *
            error_percentile_cube.coord("percentile").points.size)

    result = ApplyRainForestsCalibrationLightGBM(
        model_config_dict={})._combine_subensembles(
            error_percentile_cube, output_realizations_count=10)

    assert result.coord("realization").points.size == 10
def test__align_feature_variables_deterministic(deterministic_features,
                                                deterministic_forecast):
    """Check cube alignment when using feature and forecast variables when no realization
    coordinate present in any of the cube variables."""
    expected_features = deterministic_features.copy()
    expected_forecast = deterministic_forecast.copy()
    # Drop realization from all features.
    deterministic_features = deterministic_features.extract(
        Constraint(realization=0))
    [feature.remove_coord("realization") for feature in deterministic_features]
    # Drop realization from forecast.
    deterministic_forecast = deterministic_forecast.extract(
        Constraint(realization=0))
    deterministic_forecast.remove_coord("realization")

    (
        aligned_features,
        aligned_forecast,
    ) = ApplyRainForestsCalibrationLightGBM(
        model_config_dict={})._align_feature_variables(deterministic_features,
                                                       deterministic_forecast)

    assert aligned_features == expected_features
    assert aligned_forecast == expected_forecast