def test_2d_convolution_gpu(convolve_2d_data, kernel_circle_1_1_1, convolution_kernel_circle_1_1_1, kernel_annulus_2_2_2_1, convolution_kernel_annulus_2_2_1): import cupy cupy_data = cupy.asarray(convolve_2d_data) kernel_custom = np.ones((1, 1)) result_kernel_custom = convolve_2d(cupy_data, kernel_custom) assert isinstance(result_kernel_custom, cupy.ndarray) # kernel is [[1]], thus the result equals input data np.testing.assert_allclose(result_kernel_custom.get(), convolve_2d_data, equal_nan=True) result_kernel_circle = convolve_2d(cupy_data, kernel_circle_1_1_1) assert isinstance(result_kernel_circle, cupy.ndarray) np.testing.assert_allclose(result_kernel_circle.get(), convolution_kernel_circle_1_1_1, equal_nan=True) result_kernel_annulus = convolve_2d(cupy_data, kernel_annulus_2_2_2_1) assert isinstance(result_kernel_annulus, cupy.ndarray) np.testing.assert_allclose(result_kernel_annulus.get(), convolution_kernel_annulus_2_2_1, equal_nan=True) # dask + cupy case not implemented dask_cupy_agg = xr.DataArray( da.from_array(cupy.asarray(convolve_2d_data), chunks=(3, 3))) with pytest.raises(NotImplementedError) as e_info: convolve_2d(dask_cupy_agg.data, kernel_custom) assert e_info
def _hotspots_dask_numpy(raster, kernel): # apply kernel to raster values mean_array = convolve_2d(raster.data, kernel / kernel.sum()) # calculate z-scores global_mean = da.nanmean(raster.data) global_std = da.nanstd(raster.data) # commented out to avoid early compute to check if global_std is zero # if global_std == 0: # raise ZeroDivisionError( # "Standard deviation of the input raster values is 0." # ) z_array = (mean_array - global_mean) / global_std _func = partial(_calc_hotspots_numpy) pad_h = kernel.shape[0] // 2 pad_w = kernel.shape[1] // 2 out = z_array.map_overlap(_func, depth=(pad_h, pad_w), boundary=np.nan, meta=np.array(())) return out
def test_convolution_numpy(convolve_2d_data, kernel_circle_1_1_1, convolution_kernel_circle_1_1_1, kernel_annulus_2_2_2_1, convolution_kernel_annulus_2_2_1): kernel_custom = np.ones((1, 1)) result_kernel_custom = convolve_2d(convolve_2d_data, kernel_custom) assert isinstance(result_kernel_custom, np.ndarray) # kernel is [[1]], thus the result equals input data np.testing.assert_allclose(result_kernel_custom, convolve_2d_data, equal_nan=True) result_kernel_circle = convolve_2d(convolve_2d_data, kernel_circle_1_1_1) assert isinstance(result_kernel_circle, np.ndarray) np.testing.assert_allclose(result_kernel_circle, convolution_kernel_circle_1_1_1, equal_nan=True) result_kernel_annulus = convolve_2d(convolve_2d_data, kernel_annulus_2_2_2_1) assert isinstance(result_kernel_annulus, np.ndarray) np.testing.assert_allclose(result_kernel_annulus, convolution_kernel_annulus_2_2_1, equal_nan=True)
def test_convolution_dask_numpy(convolve_2d_data, kernel_circle_1_1_1, convolution_kernel_circle_1_1_1, kernel_annulus_2_2_2_1, convolution_kernel_annulus_2_2_1): dask_data = da.from_array(convolve_2d_data, chunks=(3, 3)) kernel_custom = np.ones((1, 1)) result_kernel_custom = convolve_2d(dask_data, kernel_custom) assert isinstance(result_kernel_custom, da.Array) # kernel is [[1]], thus the result equals input data np.testing.assert_allclose(result_kernel_custom.compute(), convolve_2d_data, equal_nan=True) result_kernel_circle = convolve_2d(dask_data, kernel_circle_1_1_1) assert isinstance(result_kernel_circle, da.Array) np.testing.assert_allclose(result_kernel_circle.compute(), convolution_kernel_circle_1_1_1, equal_nan=True) result_kernel_annulus = convolve_2d(dask_data, kernel_annulus_2_2_2_1) assert isinstance(result_kernel_annulus, da.Array) np.testing.assert_allclose(result_kernel_annulus.compute(), convolution_kernel_annulus_2_2_1, equal_nan=True)
def test_convolution_numpy(convolve_2d_data, convolution_custom_kernel, kernel_circle_1_1_1, convolution_kernel_circle_1_1_1, kernel_annulus_2_2_2_1, convolution_kernel_annulus_2_2_1): kernel_custom, expected_result_custom = convolution_custom_kernel result_kernel_custom = convolve_2d(convolve_2d_data, kernel_custom) assert isinstance(result_kernel_custom, np.ndarray) np.testing.assert_allclose(result_kernel_custom, expected_result_custom, equal_nan=True) result_kernel_circle = convolve_2d(convolve_2d_data, kernel_circle_1_1_1) assert isinstance(result_kernel_circle, np.ndarray) np.testing.assert_allclose(result_kernel_circle, convolution_kernel_circle_1_1_1, equal_nan=True) result_kernel_annulus = convolve_2d(convolve_2d_data, kernel_annulus_2_2_2_1) assert isinstance(result_kernel_annulus, np.ndarray) np.testing.assert_allclose(result_kernel_annulus, convolution_kernel_annulus_2_2_1, equal_nan=True)
def _hotspots_cupy(raster, kernel): if not (issubclass(raster.data.dtype.type, cupy.integer) or issubclass(raster.data.dtype.type, cupy.floating)): raise ValueError("data type must be integer or float") # apply kernel to raster values mean_array = convolve_2d(raster.data, kernel / kernel.sum()) # calculate z-scores global_mean = cupy.nanmean(raster.data) global_std = cupy.nanstd(raster.data) if global_std == 0: raise ZeroDivisionError( "Standard deviation of the input raster values is 0.") z_array = (mean_array - global_mean) / global_std out = _calc_hotspots_cupy(z_array) return out
def test_2d_convolution_gpu_equals_cpu(): import cupy data = convolve_2d_data numpy_agg = xr.DataArray(data) cupy_agg = xr.DataArray(cupy.asarray(data)) kernel1 = np.ones((1, 1)) output_numpy1 = convolve_2d(numpy_agg.data, kernel1) output_cupy1 = convolve_2d(cupy_agg.data, kernel1) assert isinstance(output_cupy1, cupy.ndarray) np.testing.assert_allclose(output_numpy1, output_cupy1.get(), equal_nan=True) kernel2 = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]) output_numpy2 = convolve_2d(numpy_agg.data, kernel2) output_cupy2 = convolve_2d(cupy_agg.data, kernel2) assert isinstance(output_cupy2, cupy.ndarray) np.testing.assert_allclose(output_numpy2, output_cupy2.get(), equal_nan=True) kernel3 = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]]) output_numpy3 = convolve_2d(numpy_agg.data, kernel3) output_cupy3 = convolve_2d(cupy_agg.data, kernel3) assert isinstance(output_cupy3, cupy.ndarray) np.testing.assert_allclose(output_numpy3, output_cupy3.get(), equal_nan=True) # dask + cupy case not implemented dask_cupy_agg = xr.DataArray( da.from_array(cupy.asarray(data), chunks=(3, 3))) with pytest.raises(NotImplementedError) as e_info: convolve_2d(dask_cupy_agg.data, kernel3) assert e_info
def _hotspots_cupy(raster, kernel): if not (issubclass(raster.data.dtype.type, cupy.integer) or issubclass(raster.data.dtype.type, cupy.floating)): raise ValueError("data type must be integer or float") data = raster.data.astype(cupy.float32) # apply kernel to raster values mean_array = convolve_2d(data, kernel / kernel.sum()) # calculate z-scores global_mean = cupy.nanmean(data) global_std = cupy.nanstd(data) if global_std == 0: raise ZeroDivisionError( "Standard deviation of the input raster values is 0.") z_array = (mean_array - global_mean) / global_std out = cupy.zeros_like(z_array, dtype=cupy.int8) griddim, blockdim = cuda_args(z_array.shape) _run_gpu_hotspots[griddim, blockdim](z_array, out) return out
def test_convolution(): data = convolve_2d_data dask_data = da.from_array(data, chunks=(3, 3)) kernel1 = np.ones((1, 1)) numpy_output_1 = convolve_2d(data, kernel1) expected_output_1 = np.array([[0., 1., 1., 1., 1., 1.], [1., 0., 1., 1., 1., 1.], [1., 1., 0., 1., 1., 1.], [1., 1., 1., np.nan, 1., 1.], [1., 1., 1., 1., 0., 1.], [1., 1., 1., 1., 1., 0.]]) assert isinstance(numpy_output_1, np.ndarray) assert np.isclose(numpy_output_1, expected_output_1, equal_nan=True).all() dask_output_1 = convolve_2d(dask_data, kernel1) assert isinstance(dask_output_1, da.Array) assert np.isclose( dask_output_1.compute(), expected_output_1, equal_nan=True ).all() kernel2 = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]) numpy_output_2 = convolve_2d(data, kernel2) expected_output_2 = np.array([ [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, 4., 3., 5., 5., np.nan], [np.nan, 3., np.nan, np.nan, np.nan, np.nan], [np.nan, 5., np.nan, np.nan, np.nan, np.nan], [np.nan, 5., np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] ]) # kernel2 is of 3x3, thus the border edge is 1 cell long. # currently, ignoring border edge (i.e values in edges are all nans) assert isinstance(numpy_output_2, np.ndarray) assert np.isclose( numpy_output_2, expected_output_2, equal_nan=True ).all() dask_output_2 = convolve_2d(dask_data, kernel2) assert isinstance(dask_output_2, da.Array) assert np.isclose( dask_output_2.compute(), expected_output_2, equal_nan=True ).all() kernel3 = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]]) numpy_output_3 = convolve_2d(data, kernel3) expected_output_3 = np.array([ [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, 4., 2., 4., 4., np.nan], [np.nan, 2., np.nan, np.nan, np.nan, np.nan], [np.nan, 4., np.nan, np.nan, np.nan, np.nan], [np.nan, 4., np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] ]) # kernel3 is of 3x3, thus the border edge is 1 cell long. # currently, ignoring border edge (i.e values in edges are all nans) assert isinstance(numpy_output_3, np.ndarray) assert np.isclose(numpy_output_3, expected_output_3, equal_nan=True).all() dask_output_3 = convolve_2d(dask_data, kernel3) assert isinstance(dask_output_3, da.Array) assert np.isclose( dask_output_3.compute(), expected_output_3, equal_nan=True ).all()
def test_convolution(): n, m = 6, 6 raster = xr.DataArray(np.ones((n, m)), dims=['y', 'x']) raster['x'] = np.linspace(0, n, n) raster['y'] = np.linspace(0, m, m) cellsize_x, cellsize_y = calc_cellsize(raster) # add some nan pixels nan_cells = [(i, i) for i in range(n)] for cell in nan_cells: raster[cell[0], cell[1]] = np.nan # kernel array = [[1]] kernel = np.ones((1, 1)) # np.nansum(np.array([np.nan])) = 0.0 expected_out_sum_1 = np.array([[0., 1., 1., 1., 1., 1.], [1., 0., 1., 1., 1., 1.], [1., 1., 0., 1., 1., 1.], [1., 1., 1., 0., 1., 1.], [1., 1., 1., 1., 0., 1.], [1., 1., 1., 1., 1., 0.]]) # Convolution will return np.nan, so convert nan to 0 assert np.all(np.nan_to_num(expected_out_sum_1) == expected_out_sum_1) # np.nanmean(np.array([np.nan])) = nan mean_output_1 = convolve_2d(raster.values, kernel / kernel.sum()) for cell in nan_cells: assert np.isnan(mean_output_1[cell[0], cell[1]]) # remaining cells are 1s for i in range(n): for j in range(m): if i != j: assert mean_output_1[i, j] == 1 # kernel array: [[0, 1, 0], # [1, 1, 1], # [0, 1, 0]] kernel = circle_kernel(cellsize_x, cellsize_y, 2) sum_output_2 = convolve_2d(np.nan_to_num(raster.values), kernel, pad=False) expected_out_sum_2 = np.array([[2., 2., 4., 4., 4., 3.], [2., 4., 3., 5., 5., 4.], [4., 3., 4., 3., 5., 4.], [4., 5., 3., 4., 3., 4.], [4., 5., 5., 3., 4., 2.], [3., 4., 4., 4., 2., 2.]]) assert np.all(sum_output_2 == expected_out_sum_2) mean_output_2 = convolve_2d(np.ones((n, m)), kernel / kernel.sum(), pad=True) expected_mean_output_2 = np.ones((n, m)) assert np.all(mean_output_2 == expected_mean_output_2) # kernel array: [[0, 1, 0], # [1, 0, 1], # [0, 1, 0]] kernel = annulus_kernel(cellsize_x, cellsize_y, 2.0, 0.5) sum_output_3 = convolve_2d(np.nan_to_num(raster.values), kernel, pad=False) expected_out_sum_3 = np.array([[2., 1., 3., 3., 3., 2.], [1., 4., 2., 4., 4., 3.], [3., 2., 4., 2., 4., 3.], [3., 4., 2., 4., 2., 3.], [3., 4., 4., 2., 4., 1.], [2., 3., 3., 3., 1., 2.]]) assert np.all(sum_output_3 == expected_out_sum_3) mean_output_3 = convolve_2d(np.ones((n, m)), kernel / kernel.sum(), pad=True) expected_mean_output_3 = np.ones((n, m)) assert np.all(mean_output_3 == expected_mean_output_3)
def _focal_sum_cupy(data, kernel): out = convolve_2d(data, kernel) return out
def _focal_mean_cupy(data, kernel): out = convolve_2d(data, kernel / kernel.sum()) return out
def hotspots(raster: xr.DataArray, kernel: xr.DataArray, x: Optional[str] = 'x', y: Optional[str] = 'y') -> xr.DataArray: """ Identify statistically significant hot spots and cold spots in an input raster. To be a statistically significant hot spot, a feature will have a high value and be surrounded by other features with high values as well. Neighborhood of a feature defined by the input kernel, which currently support a shape of circle, annulus, or custom kernel. The result should be a raster with the following 7 values: 90 for 90% confidence high value cluster 95 for 95% confidence high value cluster 99 for 99% confidence high value cluster -90 for 90% confidence low value cluster -95 for 95% confidence low value cluster -99 for 99% confidence low value cluster 0 for no significance Parameters: ---------- raster: xarray.DataArray 2D Input raster image with shape = (height, width). kernel: Numpy Array 2D array where values of 1 indicate the kernel. Returns: ---------- xarray.DataArray 2D array of hotspots with values indicating confidence level. Examples: ---------- Imports >>> import numpy as np >>> import xarray as xr >>> from xrspatial import focal Create Data Array >>> agg = xr.DataArray(np.array([[0, 0, 0, 0, 0, 0, 0], >>> [0, 0, 0, 0, 0, 0, 0], >>> [0, 0, 10, 10, 10, 0, 0], >>> [0, 0, 10, 10, 10, 0, 0], >>> [0, 0, 10, 10, 10, 0, 0], >>> [0, 0, 0, 0, 0, 0, 0], >>> [0, 0, 0, 0, 0, 0, 0]]), >>> dims = ["lat", "lon"]) >>> height, width = agg.shape >>> _lon = np.linspace(0, width - 1, width) >>> _lat = np.linspace(0, height - 1, height) >>> agg["lon"] = _lon >>> agg["lat"] = _lat Create Kernel >>> kernel = focal.circle_kernel(1, 1, 1) Create Hotspot Data Array >>> focal.hotspots(agg, kernel, x = 'lon', y = 'lat') <xarray.DataArray (lat: 7, lon: 7)> array([[ 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 95, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0]], dtype=int8) Coordinates: * lon (lon) float64 0.0 1.0 2.0 3.0 4.0 5.0 6.0 * lat (lat) float64 0.0 1.0 2.0 3.0 4.0 5.0 6.0 """ # validate raster if not isinstance(raster, DataArray): raise TypeError("`raster` must be instance of DataArray") if raster.ndim != 2: raise ValueError("`raster` must be 2D") if not (issubclass(raster.values.dtype.type, np.integer) or issubclass(raster.values.dtype.type, np.floating)): raise ValueError("`raster` must be an array of integers or float") raster_dims = raster.dims if raster_dims != (y, x): raise ValueError("raster.coords should be named as coordinates:" "(%s, %s)".format(y, x)) # apply kernel to raster values mean_array = convolve_2d(raster.values, kernel / kernel.sum(), pad=True) # calculate z-scores global_mean = np.nanmean(raster.values) global_std = np.nanstd(raster.values) if global_std == 0: raise ZeroDivisionError("Standard deviation " "of the input raster values is 0.") z_array = (mean_array - global_mean) / global_std out = _hotspots(z_array) result = DataArray(out, coords=raster.coords, dims=raster.dims, attrs=raster.attrs) return result
import matplotlib.pyplot as plt from xrspatial import convolution # load datasets hh_path = '/home/[email protected]/Documents/sm_paper/smapvex16/insitu_handheld/SV16M_PSM_SoilMoistureHandheld_Vers3_w_coords.csv' s1_path = '/home/[email protected]/Documents/sm_paper/smapvex16/s1sm/SMCS1_20160719_001513_063_A.tif' hh_data = pd.read_csv(hh_path, index_col=[1, 2], parse_dates=[1]) # hh_data['SITE_ID'] = [x.split("-")[0] for x in hh_data['SITE_ID']] # hh_data = hh_data.groupby('SITE_ID').mean() #hh_data = hh_data.groupby(level=0).mean() hh_data = hh_data.xs('Top', level='LOCATION') hh_data = hh_data.loc[hh_data.index.date == dt.date(year=2016, month=7, day=19)] s1_data = xr.open_rasterio(s1_path) s1_data = convolution.convolve_2d(s1_data, np.full((3, 3), 1 / 3)) # extract values smlist = list() for irow in range(hh_data.shape[0]): try: tmp = s1_data.interp(x=hh_data['Lon'].iloc[irow], y=hh_data['Lat'].iloc[irow], method='linear').values[0] # if tmp > 40: # hh_data['SOIL_MOISTURE'].iloc[irow] = hh_data['SOIL_MOISTURE'].iloc[irow] + 0.1 smlist.append(tmp) except: smlist.append(np.nan)
def hotspots(raster, kernel, x='x', y='y'): """Identify statistically significant hot spots and cold spots in an input raster. To be a statistically significant hot spot, a feature will have a high value and be surrounded by other features with high values as well. Neighborhood of a feature defined by the input kernel, which currently support a shape of circle, annulus, or custom kernel. The result should be a raster with the following 7 values: 90 for 90% confidence high value cluster 95 for 95% confidence high value cluster 99 for 99% confidence high value cluster -90 for 90% confidence low value cluster -95 for 95% confidence low value cluster -99 for 99% confidence low value cluster 0 for no significance Parameters ---------- raster: xarray.DataArray Input raster image with shape=(height, width) kernel: Kernel Returns ------- hotspots: xarray.DataArray """ # validate raster if not isinstance(raster, DataArray): raise TypeError("`raster` must be instance of DataArray") if raster.ndim != 2: raise ValueError("`raster` must be 2D") if not (issubclass(raster.values.dtype.type, np.integer) or issubclass(raster.values.dtype.type, np.floating)): raise ValueError("`raster` must be an array of integers or float") raster_dims = raster.dims if raster_dims != (y, x): raise ValueError("raster.coords should be named as coordinates:" "(%s, %s)".format(y, x)) # apply kernel to raster values mean_array = convolve_2d(raster.values, kernel / kernel.sum(), pad=True) # calculate z-scores global_mean = np.nanmean(raster.values) global_std = np.nanstd(raster.values) if global_std == 0: raise ZeroDivisionError("Standard deviation " "of the input raster values is 0.") z_array = (mean_array - global_mean) / global_std out = _hotspots(z_array) result = DataArray(out, coords=raster.coords, dims=raster.dims, attrs=raster.attrs) return result