def _create_windows(self, temp, orog): """Uses neighbourhood tools to pad and generate rolling windows of the temp and orog datasets. Args: temp (numpy.ndarray): 2D array (single realization) of temperature data, in Kelvin orog (numpy.ndarray): 2D array of orographies, in metres Returns: (tuple): tuple_containing: **views of temp** (numpy.ndarray): Rolling windows of the padded temperature dataset. **views of orog** (numpy.ndarray): Rolling windows of the padded orography dataset. """ window_shape = (self.nbhood_size, self.nbhood_size) orog_windows = neighbourhood_tools.pad_and_roll(orog, window_shape, mode="constant", constant_values=np.nan) temp_windows = neighbourhood_tools.pad_and_roll(temp, window_shape, mode="constant", constant_values=np.nan) return temp_windows, orog_windows
def _create_windows(self, temp: ndarray, orog: ndarray) -> Tuple[ndarray, ndarray]: """Uses neighbourhood tools to pad and generate rolling windows of the temp and orog datasets. Args: temp: 2D array (single realization) of temperature data, in Kelvin orog: 2D array of orographies, in metres Returns: - Rolling windows of the padded temperature dataset. - Rolling windows of the padded orography dataset. """ window_shape = (self.nbhood_size, self.nbhood_size) orog_windows = neighbourhood_tools.pad_and_roll(orog, window_shape, mode="constant", constant_values=np.nan) temp_windows = neighbourhood_tools.pad_and_roll(temp, window_shape, mode="constant", constant_values=np.nan) return temp_windows, orog_windows
def _find_and_interpolate_speckle(self, cube: Cube) -> None: """Identify and interpolate "speckle" points, where "speckle" is defined as areas of "no data" that are small enough to fill by interpolation without affecting data integrity. We would not wish to interpolate large areas as this may give false confidence in "no precipitation", where in fact precipitation exists in a "no data" region. Masked pixels near the borders of the input data array are not considered for interpolation. Args: cube: Cube containing rainrates (mm/h). Data modified in place. """ mask_windows = neighbourhood_tools.pad_and_roll(cube.data.mask, self.window_shape, mode="constant", constant_values=1) data_windows = neighbourhood_tools.pad_and_roll(cube.data, self.window_shape, mode="constant", constant_values=np.nan) # find indices of "speckle" pixels indices = np.where( (mask_windows[..., self.r_speckle, self.r_speckle] == 1) & (np.sum(mask_windows, axis=(-2, -1)) < self.max_masked_values)) # average data from the 5x5 nbhood around each "speckle" point bounds = slice(self.r_speckle - self.r_interp, self.r_speckle + self.r_interp + 1) data = data_windows[indices][..., bounds, bounds] mask = mask_windows[indices][..., bounds, bounds] for row_ind, col_ind, data_win, mask_win in zip(*indices, data, mask): valid_points = data_win[mask_win == 0] mean = np.mean( np.where(valid_points > self.MIN_RR_MMH, np.log10(valid_points), np.nan)) # when data value is set, mask is removed at that point if np.isnan(mean): cube.data[row_ind, col_ind] = 0 else: cube.data[row_ind, col_ind] = np.power(10, mean)
def test_padding_non_zero(array_size_5): """Test padding with a number other than the default of 0.""" padded = pad_and_roll(array_size_5, (2, 2), mode="constant", constant_values=1) border_index = ([[0, i, 0, j] for i in range(5) for j in [0, 1]] + [[5, i, 1, j] for i in range(5) for j in [0, 1]] + [[i, 0, j, 0] for i in range(5) for j in [0, 1]] + [[i, 5, j, 1] for i in range(5) for j in [0, 1]]) outer_part = padded[list(zip(*border_index))] np.testing.assert_array_equal(outer_part, np.ones(40, dtype=np.int32))
def test_padding_neighbourhood_size_2(array_size_5): """Test that result is same as result of rolling_window with a border of zeros.""" padded = pad_and_roll(array_size_5, (2, 2), mode="constant") window = rolling_window(array_size_5, (2, 2)) inner_part = padded[1:-1, 1:-1, ::] np.testing.assert_array_equal(inner_part, window) border_index = ([[0, i, 0, j] for i in range(5) for j in [0, 1]] + [[5, i, 1, j] for i in range(5) for j in [0, 1]] + [[i, 0, j, 0] for i in range(5) for j in [0, 1]] + [[i, 5, j, 1] for i in range(5) for j in [0, 1]]) outer_part = padded[list(zip(*border_index))] np.testing.assert_array_equal(outer_part, np.zeros(40, dtype=np.int32))
def pad_and_unpad_cube(self, slice_2d: Cube, kernel: ndarray) -> Cube: """ Method to pad and unpad a two dimensional cube. The input array is padded and percentiles are calculated using a neighbourhood around each point. The resulting percentile data are unpadded and put into a cube. Args: slice_2d: 2d cube to be padded with a halo. kernel: Kernel used to specify the neighbourhood to consider when calculating the percentiles within a neighbourhood. Returns: A cube containing percentiles generated from a neighbourhood. Examples: 1. Take the input slice_2d cube with the data, where 1 is an occurrence and 0 is an non-occurrence:: [[1., 1., 1.,], [1., 0., 1.], [1., 1., 1.]] 2. Define a kernel. This kernel is effectively placed over each point within the input data. Note that the input data is padded prior to placing the kernel over each point, so that the kernel does not exceed the bounds of the padded data:: [[ 0., 0., 1., 0., 0.], [ 0., 1., 1., 1., 0.], [ 1., 1., 1., 1., 1.], [ 0., 1., 1., 1., 0.], [ 0., 0., 1., 0., 0.]] 3. Pad the input data. The extent of the padding is given by the shape of the kernel. The number of values included within the calculation of the mean is determined by the size of the kernel:: [[ 0.75, 0.75, 1. , 0.5 , 1. , 0.75, 0.75], [ 0.75, 0.75, 1. , 0.5 , 1. , 0.75, 0.75], [ 1. , 1. , 1. , 1. , 1. , 1. , 1. ], [ 0.5 , 0.5 , 1. , 0. , 1. , 0.5 , 0.5 ], [ 1. , 1. , 1. , 1. , 1. , 1. , 1. ], [ 0.75, 0.75, 1. , 0.5 , 1. , 0.75, 0.75], [ 0.75, 0.75, 1. , 0.5 , 1. , 0.75, 0.75]] 4. Calculate the values at the percentiles: [10]. For the point in the upper right corner within the original input data e.g. :: [[->1.<-, 1., 1.,], [ 1., 0., 1.], [ 1., 1., 1.]] When the kernel is placed over this point within the padded data, then the following points are included:: [[ 0.75, 0.75, ->1.<-, 0.5 , 1. , 0.75, 0.75], [ 0.75, ->0.75, 1. , 0.5<-, 1. , 0.75, 0.75], [ ->1. , 1. , 1. , 1. , 1.<-, 1. , 1. ], [ 0.5 , ->0.5 , 1. , 0.<-, 1. , 0.5 , 0.5 ], [ 1. , 1. , ->1.<-, 1. , 1. , 1. , 1. ], [ 0.75, 0.75, 1. , 0.5 , 1. , 0.75, 0.75], [ 0.75, 0.75, 1. , 0.5 , 1. , 0.75, 0.75]] This gives:: [0, 0.5, 0.5, 0.75, 1., 1., 1., 1., 1., 1., 1., 1., 1.] As there are 13 points within the kernel, this gives the following relationship between percentiles and values. ====== ========== Values Percentile ====== ========== 0. 0 0.5 8.33 0.5 16.67 0.75 25.0 1. 33.33 1. 41.67 1. 50.0 1. 58.33 1. 66.67 1. 75.0 1. 83.33 1. 91.66 1. 100. ====== ========== Therefore, for the 10th percentile at the value returned for the point in the upper right corner of the original input data is 0.5. When this process is applied to every point within the original input data, the result is:: [[[ 0.75, 0.75, 0.5 , 0.5 , 0.5 , 0.75, 0.75], [ 0.75, 0.55, 0.55, 0.5 , 0.55, 0.55, 0.55], [ 0.55, 0.55, 0.5 , 0.5 , 0.5 , 0.5 , 0.5 ], [ 0.5 , 0.5 , 0.5 , 0.5 , 0.5 , 0.5 , 0.5 ], [ 0.5 , 0.5 , 0.5 , 0.5 , 0.5 , 0.55, 0.55], [ 0.55, 0.55, 0.55, 0.5 , 0.55, 0.55, 0.75], [ 0.75, 0.75, 0.5 , 0.5 , 0.5 , 0.75, 0.75]]], 5. The padding is then removed to give:: [[[ 0.5, 0.5, 0.5], [ 0.5, 0.5, 0.5], [ 0.5, 0.5, 0.5]]] """ kernel_mask = kernel > 0 nb_slices = pad_and_roll(slice_2d.data, kernel.shape, mode="mean", stat_length=max(kernel.shape) // 2) percentiles = np.array(self.percentiles, dtype=np.float32) # Create cube for output percentile data. pctcube = self.make_percentile_cube(slice_2d) # Collapse neighbourhood windows into percentiles. # (Loop over outer dimension to reduce memory footprint.) for nb_chunk, perc_chunk in zip(nb_slices, pctcube.data.swapaxes(0, 1)): np.percentile( nb_chunk[..., kernel_mask], percentiles, axis=-1, out=perc_chunk, overwrite_input=True, ) return pctcube