def test_emcam_lowest_bin_non_monotonic(reliability_table_slice): """Test expected values are returned where the lowest observation count bin is non-monotonic.""" expected_data = np.array([[1000, 500, 500, 750], [250, 500, 750, 1000], [2000, 1000, 1000, 1000]]) expected_bin_coord_points = np.array([0.2, 0.5, 0.7, 0.9], dtype=np.float32) expected_bin_coord_bounds = np.array( [[0.0, 0.4], [0.4, 0.6], [0.6, 0.8], [0.8, 1.0]], dtype=np.float32, ) reliability_table_slice.data = np.array([ [1000, 0, 250, 500, 750], # Observation count [0, 250, 500, 750, 1000], # Sum of forecast probability [1000, 1000, 1000, 1000, 1000], # Forecast count ]) result = Plugin()._enforce_min_count_and_montonicity( reliability_table_slice.copy()) assert_array_equal(result.data, expected_data) assert_allclose( result.coord("probability_bin").points, expected_bin_coord_points) assert_allclose( result.coord("probability_bin").bounds, expected_bin_coord_bounds)
def test_emcam_combine_undersampled_bins_non_monotonic( reliability_table_slice): """Test expected values are returned when a bin is below the minimum forecast count when the observed frequency is non-monotonic.""" expected_data = np.array([[1000, 425, 1000], [1000, 425, 1000], [2000, 600, 1000]]) expected_bin_coord_points = np.array([0.2, 0.6, 0.9], dtype=np.float32) expected_bin_coord_bounds = np.array( [[0.0, 0.4], [0.4, 0.8], [0.8, 1.0]], dtype=np.float32, ) reliability_table_slice.data = np.array( [ [750, 250, 50, 375, 1000], # Observation count [750, 250, 50, 375, 1000], # Sum of forecast probability [1000, 1000, 100, 500, 1000], # Forecast count ], dtype=np.float32, ) result = Plugin()._enforce_min_count_and_montonicity( reliability_table_slice.copy()) assert_array_equal(result.data, expected_data) assert_allclose( result.coord("probability_bin").points, expected_bin_coord_points) assert_allclose( result.coord("probability_bin").bounds, expected_bin_coord_bounds)
def setUp(self): """Set up monotonic bins as default and plugin for testing.""" super().setUp() self.obs_count = np.array([0, 250, 500, 750, 1000], dtype=np.float32) self.forecast_probability_sum = np.array([0, 250, 500, 750, 1000], dtype=np.float32) self.plugin = Plugin()
def process( reliability_table: cli.inputcube, *, minimum_forecast_count: int = 200, ): """ Manipulate a reliability table to ensure sufficient sample counts in as many bins as possible by combining bins with low sample counts. Also enforces a monotonic observation frequency. Args: reliability_table (iris.cube.Cube): The reliability table that needs to be manipulated after the spatial dimensions have been aggregated. minimum_forecast_count (int): The minimum number of forecast counts in a forecast probability bin for it to be used in calibration. The default value of 200 is that used in Flowerdew 2014. Returns: iris.cube.CubeList: The reliability table that has been manipulated to ensure sufficient sample counts in each probability bin and a monotonic observation frequency. The cubelist contains a separate cube for each threshold in the original reliability table. """ from improver.calibration.reliability_calibration import ManipulateReliabilityTable plugin = ManipulateReliabilityTable( minimum_forecast_count=minimum_forecast_count) return plugin(reliability_table)
class Test__combine_undersampled_bins(Test_setup): """Test the _combine_undersampled_bins method.""" def setUp(self): """Set up monotonic bins as default and plugin for testing.""" super().setUp() self.obs_count = np.array([0, 250, 500, 750, 1000], dtype=np.float32) self.forecast_probability_sum = np.array([0, 250, 500, 750, 1000], dtype=np.float32) self.plugin = Plugin() def test_no_undersampled_bins(self): """Test no bins are combined when no bins are under-sampled.""" forecast_count = np.array([1000, 1000, 1000, 1000, 1000], dtype=np.float32) result = self.plugin._combine_undersampled_bins( self.obs_count, self.forecast_probability_sum, forecast_count, self.probability_bin_coord, ) assert_array_equal( result[:3], [self.obs_count, self.forecast_probability_sum, forecast_count]) self.assertEqual(result[3], self.probability_bin_coord) def test_poorly_sampled_bins(self): """Test when all bins are poorly sampled and the minimum forecast count cannot be reached.""" obs_count = np.array([0, 2, 5, 8, 10], dtype=np.float32) forecast_probability_sum = np.array([0, 2, 5, 8, 10], dtype=np.float32) forecast_count = np.array([10, 10, 10, 10, 10], dtype=np.float32) expected = np.array([ [25], # Observation count [25], # Sum of forecast probability [50], # Forecast count ]) result = self.plugin._combine_undersampled_bins( obs_count, forecast_probability_sum, forecast_count, self.probability_bin_coord, ) assert_array_equal(result[:3], expected) expected_bin_coord_points = np.array([0.5], dtype=np.float32) expected_bin_coord_bounds = np.array( [[0.0, 1.0]], dtype=np.float32, ) assert_allclose(expected_bin_coord_points, result[3].points) assert_allclose(expected_bin_coord_bounds, result[3].bounds) def test_one_undersampled_bin_at_top(self): """Test when the highest probability bin is under-sampled.""" obs_count = np.array([0, 250, 500, 750, 100], dtype=np.float32) forecast_probability_sum = np.array([0, 250, 500, 750, 100], dtype=np.float32) forecast_count = np.array([1000, 1000, 1000, 1000, 100], dtype=np.float32) expected = np.array([ [0, 250, 500, 850], # Observation count [0, 250, 500, 850], # Sum of forecast probability [1000, 1000, 1000, 1100], # Forecast count ]) result = self.plugin._combine_undersampled_bins( obs_count, forecast_probability_sum, forecast_count, self.probability_bin_coord, ) assert_array_equal(result[:3], expected) expected_bin_coord_points = np.array([0.1, 0.3, 0.5, 0.8], dtype=np.float32) expected_bin_coord_bounds = np.array( [[0.0, 0.2], [0.2, 0.4], [0.4, 0.6], [0.6, 1.0]], dtype=np.float32, ) assert_allclose(expected_bin_coord_points, result[3].points) assert_allclose(expected_bin_coord_bounds, result[3].bounds) def test_one_undersampled_bin_at_bottom(self): """Test when the lowest probability bin is under-sampled.""" forecast_count = np.array([100, 1000, 1000, 1000, 1000], dtype=np.float32) expected = np.array([ [250, 500, 750, 1000], # Observation count [250, 500, 750, 1000], # Sum of forecast probability [1100, 1000, 1000, 1000], # Forecast count ]) result = self.plugin._combine_undersampled_bins( self.obs_count, self.forecast_probability_sum, forecast_count, self.probability_bin_coord, ) assert_array_equal(result[:3], expected) expected_bin_coord_points = np.array([0.2, 0.5, 0.7, 0.9], dtype=np.float32) expected_bin_coord_bounds = np.array( [[0.0, 0.4], [0.4, 0.6], [0.6, 0.8], [0.8, 1.0]], dtype=np.float32, ) assert_allclose(expected_bin_coord_points, result[3].points) assert_allclose(expected_bin_coord_bounds, result[3].bounds) def test_one_undersampled_bin_lower_neighbour(self): """Test for one under-sampled bin that is combined with its lower neighbour.""" obs_count = np.array([0, 250, 50, 1500, 1000], dtype=np.float32) forecast_probability_sum = np.array([0, 250, 50, 1500, 1000], dtype=np.float32) forecast_count = np.array([1000, 1000, 100, 2000, 1000], dtype=np.float32) expected = np.array([ [0, 300, 1500, 1000], # Observation count [0, 300, 1500, 1000], # Sum of forecast probability [1000, 1100, 2000, 1000], # Forecast count ]) result = self.plugin._combine_undersampled_bins( obs_count, forecast_probability_sum, forecast_count, self.probability_bin_coord, ) assert_array_equal(result[:3], expected) expected_bin_coord_points = np.array([0.1, 0.4, 0.7, 0.9], dtype=np.float32) expected_bin_coord_bounds = np.array( [[0.0, 0.2], [0.2, 0.6], [0.6, 0.8], [0.8, 1.0]], dtype=np.float32, ) assert_allclose(expected_bin_coord_points, result[3].points) assert_allclose(expected_bin_coord_bounds, result[3].bounds) def test_one_undersampled_bin_upper_neighbour(self): """Test for one under-sampled bin that is combined with its upper neighbour.""" obs_count = np.array([0, 500, 50, 750, 1000], dtype=np.float32) forecast_probability_sum = np.array([0, 500, 50, 750, 1000], dtype=np.float32) forecast_count = np.array([1000, 2000, 100, 1000, 1000], dtype=np.float32) expected = np.array([ [0, 500, 800, 1000], # Observation count [0, 500, 800, 1000], # Sum of forecast probability [1000, 2000, 1100, 1000], # Forecast count ]) result = self.plugin._combine_undersampled_bins( obs_count, forecast_probability_sum, forecast_count, self.probability_bin_coord, ) assert_array_equal(result[:3], expected) expected_bin_coord_points = np.array([0.1, 0.3, 0.6, 0.9], dtype=np.float32) expected_bin_coord_bounds = np.array( [[0.0, 0.2], [0.2, 0.4], [0.4, 0.8], [0.8, 1.0]], dtype=np.float32, ) assert_allclose(expected_bin_coord_points, result[3].points) assert_allclose(expected_bin_coord_bounds, result[3].bounds) def test_two_undersampled_bins(self): """Test when two bins are under-sampled.""" obs_count = np.array([0, 12, 250, 75, 250], dtype=np.float32) forecast_probability_sum = np.array([0, 12, 250, 75, 250], dtype=np.float32) forecast_count = np.array([1000, 50, 500, 100, 250], dtype=np.float32) expected = np.array([ [0, 262, 325], # Observation count [0, 262, 325], # Sum of forecast probability [1000, 550, 350], # Forecast count ]) result = self.plugin._combine_undersampled_bins( obs_count, forecast_probability_sum, forecast_count, self.probability_bin_coord, ) assert_array_equal(result[:3], expected) expected_bin_coord_points = np.array([0.1, 0.4, 0.8], dtype=np.float32) expected_bin_coord_bounds = np.array( [[0.0, 0.2], [0.2, 0.6], [0.6, 1.0]], dtype=np.float32, ) assert_allclose(expected_bin_coord_points, result[3].points) assert_allclose(expected_bin_coord_bounds, result[3].bounds) def test_two_equal_undersampled_bins(self): """Test when two bins are under-sampled and the under-sampled bins have an equal forecast count.""" obs_count = np.array([0, 25, 250, 75, 250], dtype=np.float32) forecast_probability_sum = np.array([0, 25, 250, 75, 250], dtype=np.float32) forecast_count = np.array([1000, 100, 500, 100, 250], dtype=np.float32) expected = np.array([ [0, 275, 325], # Observation count [0, 275, 325], # Sum of forecast probability [1000, 600, 350], # Forecast count ]) result = self.plugin._combine_undersampled_bins( obs_count, forecast_probability_sum, forecast_count, self.probability_bin_coord, ) assert_array_equal(result[:3], expected) expected_bin_coord_points = np.array([0.1, 0.4, 0.8], dtype=np.float32) expected_bin_coord_bounds = np.array( [[0.0, 0.2], [0.2, 0.6], [0.6, 1.0]], dtype=np.float32, ) assert_allclose(expected_bin_coord_points, result[3].points) assert_allclose(expected_bin_coord_bounds, result[3].bounds) def test_three_equal_undersampled_bin_neighbours(self): """Test when three neighbouring bins are under-sampled.""" obs_count = np.array([0, 25, 50, 75, 250], dtype=np.float32) forecast_probability_sum = np.array([0, 25, 50, 75, 250], dtype=np.float32) forecast_count = np.array([1000, 100, 100, 100, 250], dtype=np.float32) expected = np.array([ [0, 150, 250], # Observation count [0, 150, 250], # Sum of forecast probability [1000, 300, 250], # Forecast count ]) result = self.plugin._combine_undersampled_bins( obs_count, forecast_probability_sum, forecast_count, self.probability_bin_coord, ) assert_array_equal(result[:3], expected) expected_bin_coord_points = np.array([0.1, 0.5, 0.9], dtype=np.float32) expected_bin_coord_bounds = np.array( [[0.0, 0.2], [0.2, 0.8], [0.8, 1.0]], dtype=np.float32, ) assert_allclose(expected_bin_coord_points, result[3].points) assert_allclose(expected_bin_coord_bounds, result[3].bounds)