def test_cog_mem_dask(tmpdir): pp = Path(str(tmpdir)) xx, ds = gen_test_data(pp, dask=True) # write to memory 1 bb = write_cog(xx, ":mem:") assert isinstance(bb, Delayed) bb = bb.compute() assert isinstance(bb, bytes) path = pp / "cog1.tiff" with open(str(path), "wb") as f: f.write(bb) yy = rio_slurp_xarray(path) np.testing.assert_array_equal(yy.values, xx.values) assert yy.geobox == xx.geobox assert yy.nodata == xx.nodata # write to memory 2 bb = to_cog(xx) assert isinstance(bb, Delayed) bb = bb.compute() assert isinstance(bb, bytes) path = pp / "cog2.tiff" with open(str(path), "wb") as f: f.write(bb) yy = rio_slurp_xarray(path) np.testing.assert_array_equal(yy.values, xx.values) assert yy.geobox == xx.geobox assert yy.nodata == xx.nodata
def add_chirps( urls: Dict[Any, Any], ds: xr.Dataset, era: str, training: bool = True, dask_chunks: Dict[Any, Any] = { "x": "auto", "y": "auto" }, ) -> Optional[xr.Dataset]: # load rainfall climatology if era == "_S1": chirps = rio_slurp_xarray(urls["chirps"][0]) if era == "_S2": chirps = rio_slurp_xarray(urls["chirps"][1]) if chirps.size >= 2: if training: chirps = xr_reproject(chirps, ds.geobox, "bilinear") ds["rain"] = chirps else: # Clip CHIRPS to ~ S2 tile boundaries so we can handle NaNs local to S2 tile xmin, xmax = ds.x.values[0], ds.x.values[-1] ymin, ymax = ds.y.values[0], ds.y.values[-1] inProj = Proj("epsg:6933") outProj = Proj("epsg:4326") xmin, ymin = transform(inProj, outProj, xmin, ymin) xmax, ymax = transform(inProj, outProj, xmax, ymax) # create lat/lon indexing slices - buffer S2 bbox by 0.05deg if (xmin < 0) & (xmax < 0): x_slice = list(np.arange(xmin + 0.05, xmax - 0.05, -0.05)) else: x_slice = list(np.arange(xmax - 0.05, xmin + 0.05, 0.05)) y_slice = list(np.arange(ymin - 0.05, ymax + 0.1, 0.05)) # index global chirps using buffered s2 tile bbox chirps = assign_crs( chirps.sel(longitude=y_slice, latitude=x_slice, method="nearest")) # fill any NaNs in CHIRPS with local (s2-tile bbox) mean chirps = chirps.fillna(chirps.mean()) chirps = xr_reproject(chirps, ds.geobox, "bilinear") chirps = chirps.chunk(dask_chunks) ds["rain"] = chirps # rename bands to include era for band in ds.data_vars: ds = ds.rename({band: band + era}) return ds return None
def add_chirps(ds, era, training=True, dask_chunks={'x': 'auto', 'y': 'auto'}): # load rainfall climatology if era == "_S1": chirps = rio_slurp_xarray( "s3://deafrica-input-datasets/rainfall/CHPclim_jan_jun_cumulative_rainfall.tif" ) if era == "_S2": chirps = rio_slurp_xarray( "s3://deafrica-input-datasets/rainfall/CHPclim_jul_dec_cumulative_rainfall.tif" ) if training: chirps = xr_reproject(chirps, ds.geobox, "bilinear") ds["rain"] = chirps else: # Clip CHIRPS to ~ S2 tile boundaries so we can handle NaNs local to S2 tile xmin, xmax = ds.x.values[0], ds.x.values[-1] ymin, ymax = ds.y.values[0], ds.y.values[-1] inProj = Proj("epsg:6933") outProj = Proj("epsg:4326") xmin, ymin = transform(inProj, outProj, xmin, ymin) xmax, ymax = transform(inProj, outProj, xmax, ymax) # create lat/lon indexing slices - buffer S2 bbox by 0.05deg if (xmin < 0) & (xmax < 0): x_slice = list(np.arange(xmin + 0.05, xmax - 0.05, -0.05)) else: x_slice = list(np.arange(xmax - 0.05, xmin + 0.05, 0.05)) if (ymin < 0) & (ymax < 0): y_slice = list(np.arange(ymin + 0.05, ymax - 0.05, -0.05)) else: y_slice = list(np.arange(ymin - 0.05, ymax + 0.05, 0.05)) # index global chirps using buffered s2 tile bbox chirps = assign_crs( chirps.sel(longitude=y_slice, latitude=x_slice, method="nearest")) # fill any NaNs in CHIRPS with local (s2-tile bbox) mean chirps = chirps.fillna(chirps.mean()) chirps = xr_reproject(chirps, ds.geobox, "bilinear") chirps = chirps.chunk(dask_chunks) ds["rain"] = chirps #rename bands to include era for band in ds.data_vars: ds = ds.rename({band: band + era}) return ds
def post_processing(predicted: xr.Dataset, ) -> xr.DataArray: """ filter prediction results with post processing filters. Simplified from production code to skip segmentation, probability, and mode calcs """ dc = Datacube(app='whatever') predict = predicted.Predictions #--Post process masking--------------------------------------------------------------- #print(" masking with AEZ,WDPA,WOfS,slope & elevation") # mask out classification beyond AEZ boundary gdf = gpd.read_file('data/Southern.shp').to_crs('epsg:6933') with HiddenPrints(): mask = xr_rasterize(gdf, predicted) predict = predict.where(mask, 0) # mask with WDPA url_wdpa = "s3://deafrica-input-datasets/protected_areas/WDPA_southern.tif" wdpa = rio_slurp_xarray(url_wdpa, gbox=predicted.geobox) wdpa = wdpa.astype(bool) predict = predict.where(~wdpa, 0) #mask with WOFS wofs = dc.load(product='wofs_ls_summary_annual', like=predicted.geobox, time=('2019')) wofs = wofs.frequency > 0.2 # threshold predict = predict.where(~wofs, 0) #mask steep slopes url_slope = "https://deafrica-input-datasets.s3.af-south-1.amazonaws.com/srtm_dem/srtm_africa_slope.tif" slope = rio_slurp_xarray(url_slope, gbox=predicted.geobox) slope = slope > 50 predict = predict.where(~slope, 0) #mask where the elevation is above 3600m elevation = dc.load(product='dem_srtm', like=predicted.geobox) elevation = elevation.elevation > 3600 # threshold predict = predict.where(~elevation.squeeze(), 0) #set dtype predict = predict.astype(np.int8) return predict
def test_cog_file(tmpdir, opts): pp = Path(str(tmpdir)) xx, ds = gen_test_data(pp) # write to file ff = write_cog(xx, pp / "cog.tif", **opts) assert isinstance(ff, Path) assert ff == pp / "cog.tif" assert ff.exists() yy = rio_slurp_xarray(pp / "cog.tif") np.testing.assert_array_equal(yy.values, xx.values) assert yy.geobox == xx.geobox assert yy.nodata == xx.nodata _write_cog(np.stack([xx.values, xx.values]), xx.geobox, pp / "cog-2-bands.tif", overview_levels=[], **opts) yy, mm = rio_slurp(pp / "cog-2-bands.tif") assert mm.gbox == xx.geobox assert yy.shape == (2, *xx.shape) np.testing.assert_array_equal(yy[0], xx.values) np.testing.assert_array_equal(yy[1], xx.values) with pytest.raises(ValueError, match="Need 2d or 3d ndarray on input"): _write_cog(xx.values.ravel(), xx.geobox, pp / "wontwrite.tif") # sizes that are not multiples of 16 # also check that supplying `nodata=` doesn't break things xx_odd = xx[:23, :63] ff = write_cog(xx_odd, pp / "cog_odd.tif", nodata=xx_odd.attrs["nodata"], **opts) assert isinstance(ff, Path) assert ff == pp / "cog_odd.tif" assert ff.exists() yy = rio_slurp_xarray(pp / "cog_odd.tif") np.testing.assert_array_equal(yy.values, xx_odd.values) assert yy.geobox == xx_odd.geobox assert yy.nodata == xx_odd.nodata with pytest.warns(UserWarning): write_cog(xx, pp / "cog_badblocksize.tif", blocksize=50)
def post_processing( predicted: xr.Dataset, ) -> xr.DataArray: """ filter prediction results with post processing filters. :param predicted: The prediction results """ dc = Datacube(app='whatever') #grab predictions and proba for post process filtering predict=predicted.Predictions # mask out classification beyond AEZ boundary gdf = gpd.read_file('data/Western.geojson') with HiddenPrints(): mask = xr_rasterize(gdf, predicted) predict = predict.where(mask,0) # mask with WDPA url_wdpa="s3://deafrica-input-datasets/protected_areas/WDPA_western.tif" wdpa=rio_slurp_xarray(url_wdpa, gbox=predicted.geobox) wdpa = wdpa.astype(bool) predict = predict.where(~wdpa, 0) #mask with WOFS wofs=dc.load(product='ga_ls8c_wofs_2_summary',like=predicted.geobox) wofs=wofs.frequency > 0.2 # threshold predict=predict.where(~wofs, 0) #mask steep slopes url_slope="https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif" slope=rio_slurp_xarray(url_slope, gbox=predicted.geobox) slope=slope > 35 predict=predict.where(~slope, 0) #mask where the elevation is above 3600m elevation=dc.load(product='dem_srtm', like=predicted.geobox) elevation=elevation.elevation > 3600 # threshold predict=predict.where(~elevation.squeeze(), 0) #set dtype predict=predict.astype(np.int8) return predict
def test_cog_file(tmpdir): pp = Path(str(tmpdir)) xx, ds = gen_test_data(pp) # write to file ff = write_cog(xx, pp / "cog.tif") assert isinstance(ff, Path) assert ff == pp / "cog.tif" assert ff.exists() yy = rio_slurp_xarray(pp / "cog.tif") np.testing.assert_array_equal(yy.values, xx.values) assert yy.geobox == xx.geobox assert yy.nodata == xx.nodata _write_cog(np.stack([xx.values, xx.values]), xx.geobox, pp / "cog-2-bands.tif", overview_levels=[]) yy, mm = rio_slurp(pp / "cog-2-bands.tif") assert mm.gbox == xx.geobox assert yy.shape == (2, *xx.shape) np.testing.assert_array_equal(yy[0], xx.values) np.testing.assert_array_equal(yy[1], xx.values) with pytest.raises(ValueError, match="Need 2d or 3d ndarray on input"): _write_cog(xx.values.ravel(), xx.geobox, pp / "wontwrite.tif")
def test_cog_rgba(tmpdir, use_windowed_writes): pp = Path(str(tmpdir)) xx, ds = gen_test_data(pp) pix = np.dstack([xx.values] * 4) rgba = xr.DataArray(pix, attrs=xx.attrs, dims=("y", "x", "band"), coords=xx.coords) assert rgba.geobox == xx.geobox assert rgba.shape[:2] == rgba.geobox.shape ff = write_cog(rgba, pp / "cog.tif", use_windowed_writes=use_windowed_writes) yy = rio_slurp_xarray(ff) assert yy.geobox == rgba.geobox assert yy.shape == rgba.shape np.testing.assert_array_equal(yy.values, rgba.values) with pytest.raises(ValueError): _write_cog( rgba.values[1:, :, :], rgba.geobox, ":mem:", use_windowed_writes=use_windowed_writes, )
def gm_mads_two_seasons_production(ds1, ds2): """ Feature layer function for production run of eastern crop-mask """ def fun(ds, era): # normalise SR and edev bands for band in ds.data_vars: if band not in ["sdev", "bcdev"]: ds[band] = ds[band] / 10000 gm_mads = calculate_indices( ds, index=["NDVI", "LAI", "MNDWI"], drop=False, normalise=False, collection="s2", ) gm_mads["sdev"] = -np.log(gm_mads["sdev"]) gm_mads["bcdev"] = -np.log(gm_mads["bcdev"]) gm_mads["edev"] = -np.log(gm_mads["edev"]) # rainfall climatology if era == "_S1": chirps = assign_crs( xr.open_rasterio( "/g/data/CHIRPS/cumulative_alltime/CHPclim_jan_jun_cumulative_rainfall.nc" ), crs="epsg:4326", ) if era == "_S2": chirps = assign_crs( xr.open_rasterio( "/g/data/CHIRPS/cumulative_alltime/CHPclim_jul_dec_cumulative_rainfall.nc" ), crs="epsg:4326", ) chirps = xr_reproject(chirps, ds.geobox, "bilinear") gm_mads["rain"] = chirps for band in gm_mads.data_vars: gm_mads = gm_mads.rename({band: band + era}) return gm_mads epoch1 = fun(ds1, era="_S1") epoch2 = fun(ds2, era="_S2") # slope url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif" slope = rio_slurp_xarray(url_slope, gbox=ds1.geobox) slope = slope.to_dataset(name="slope") result = xr.merge([epoch1, epoch2, slope], compat="override") result = result.astype(np.float32) return result.squeeze()
def gm_mads_two_seasons_training(query): #connect to the datacube dc = datacube.Datacube(app='feature_layers') #load S2 geomedian ds = dc.load(product='gm_s2_semiannual', **query) # load the data dss = {"S1": ds.isel(time=0), "S2": ds.isel(time=1)} #create features epoch1 = common_ops(dss["S1"], era="_S1") epoch1 = add_chirps(epoch1, era='_S1') epoch2 = common_ops(dss["S2"], era="_S2") epoch2 = add_chirps(epoch2, era='_S2') # add slope url_slope = "https://deafrica-input-datasets.s3.af-south-1.amazonaws.com/srtm_dem/srtm_africa_slope.tif" slope = rio_slurp_xarray(url_slope, gbox=ds.geobox) slope = slope.to_dataset(name="slope") result = xr.merge([epoch1, epoch2, slope], compat="override") return result.astype(np.float32).squeeze()
def gm_mads_two_seasons_predict(ds): dc = datacube.Datacube(app="training") ds = ds / 10000 ds1 = ds.sel(time=slice("2019-01", "2019-06")) ds2 = ds.sel(time=slice("2019-07", "2019-12")) def fun(ds, era): # geomedian and tmads # gm_mads = xr_geomedian_tmad(ds) gm_mads = xr_geomedian_tmad_new(ds).compute() gm_mads = calculate_indices( gm_mads, index=["NDVI", "LAI", "MNDWI"], drop=False, normalise=False, collection="s2", ) gm_mads["sdev"] = -np.log(gm_mads["sdev"]) gm_mads["bcdev"] = -np.log(gm_mads["bcdev"]) gm_mads["edev"] = -np.log(gm_mads["edev"]) gm_mads = gm_mads.chunk({"x": 2000, "y": 2000}) # rainfall climatology if era == "_S1": chirps = assign_crs( xr.open_rasterio( "/g/data/CHIRPS/cumulative_alltime/CHPclim_jan_jun_cumulative_rainfall.nc" ), crs="epsg:4326", ) if era == "_S2": chirps = assign_crs( xr.open_rasterio( "/g/data/CHIRPS/cumulative_alltime/CHPclim_jul_dec_cumulative_rainfall.nc" ), crs="epsg:4326", ) chirps = xr_reproject(chirps, ds.geobox, "bilinear") chirps = chirps.chunk({"x": 2000, "y": 2000}) gm_mads["rain"] = chirps for band in gm_mads.data_vars: gm_mads = gm_mads.rename({band: band + era}) return gm_mads epoch1 = fun(ds1, era="_S1") epoch2 = fun(ds2, era="_S2") # slope url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif" slope = rio_slurp_xarray(url_slope, gbox=ds.geobox) slope = slope.to_dataset(name="slope").chunk({"x": 2000, "y": 2000}) result = xr.merge([epoch1, epoch2, slope], compat="override") return result.squeeze()
def gm_mads_two_seasons_prediction( datasets, geobox, measurements: List[str], urls: Dict[Any, Any], dask_chunks: Dict[str, Any] = { "x": -1, "y": -1 }, ) -> Optional[xr.Dataset]: """ Feature layer function for production run of eastern crop-mask. Similar to the training function but data is loaded internally, CHIRPS is reprojected differently, and dask chunks are used. """ ds = load_with_native_transform( datasets, geobox=geobox, native_transform=lambda x: drop_nan_nodata(x), bands=measurements, chunks=dask_chunks, resampling="bilinear", ) dss = { "S1": ds.isel(spec=0).drop(["spatial_ref", "spec"]), "S2": ds.isel(spec=1).drop(["spatial_ref", "spec"]), } # create features epoch1 = common_ops(dss["S1"], era="_S1") epoch1 = add_chirps(urls, epoch1, era="_S1", training=False, dask_chunks=dask_chunks) epoch2 = common_ops(dss["S2"], era="_S2") epoch2 = add_chirps(urls, epoch2, era="_S2", training=False, dask_chunks=dask_chunks) if (not epoch1) or (not epoch2): return None # add slope url_slope = urls["slope"] slope = rio_slurp_xarray(url_slope, gbox=ds.geobox) slope = slope.to_dataset(name="slope").chunk(dask_chunks) result = xr.merge([epoch1, epoch2, slope], compat="override") result = result.astype(np.float32) return result.squeeze()
def gm_mads_two_seasons_prediction(geobox, dask_chunks): """ Feature layer function for production run of eastern crop-mask. Similar to the training function but data is loaded internally, CHIRPS is reprojected differently, and dask chunks are used. """ dc = datacube.Datacube(app="prediction") # load the data measurements = [ "blue", "green", "red", "nir", "swir_1", "swir_2", "red_edge_1", "red_edge_2", "red_edge_3", "bcdev", "edev", "sdev", ] ds = dc.load(product="gm_s2_semiannual", time="2019", measurements=measurements, like=geobox, dask_chunks=dask_chunks, resampling='bilinear') dss = {"S1": ds.isel(time=0), "S2": ds.isel(time=1)} #create features epoch1 = common_ops(dss["S1"], era="_S1") epoch1 = add_chirps(epoch1, era='_S1', training=False, dask_chunks=dask_chunks) epoch2 = common_ops(dss["S2"], era="_S2") epoch2 = add_chirps(epoch2, era='_S2', training=False, dask_chunks=dask_chunks) # add slope url_slope = "https://deafrica-input-datasets.s3.af-south-1.amazonaws.com/srtm_dem/srtm_africa_slope.tif" slope = rio_slurp_xarray(url_slope, gbox=ds.geobox) slope = slope.to_dataset(name="slope").chunk(dask_chunks) result = xr.merge([epoch1, epoch2, slope], compat="override") result = result.astype(np.float32) return result.squeeze()
def annual_gm_mads_evi_training(ds): dc = datacube.Datacube(app='training') # grab gm+tmads gm_mads=dc.load(product='ga_s2_gm',time='2019',like=ds.geobox, measurements=['red', 'blue', 'green', 'nir', 'swir_1', 'swir_2', 'red_edge_1', 'red_edge_2', 'red_edge_3', 'SMAD', 'BCMAD','EMAD']) gm_mads['SMAD'] = -np.log(gm_mads['SMAD']) gm_mads['BCMAD'] = -np.log(gm_mads['BCMAD']) gm_mads['EMAD'] = -np.log(gm_mads['EMAD']/10000) #calculate band indices on gm gm_mads = calculate_indices(gm_mads, index=['EVI','LAI','MNDWI'], drop=False, collection='s2') #normalise spectral GM bands 0-1 for band in gm_mads.data_vars: if band not in ['SMAD', 'BCMAD','EMAD', 'EVI', 'LAI', 'MNDWI']: gm_mads[band] = gm_mads[band] / 10000 #calculate EVI on annual timeseries evi = calculate_indices(ds,index=['EVI'], drop=True, normalise=True, collection='s2') # EVI stats gm_mads['evi_std'] = evi.EVI.std(dim='time') gm_mads['evi_10'] = evi.EVI.quantile(0.1, dim='time') gm_mads['evi_25'] = evi.EVI.quantile(0.25, dim='time') gm_mads['evi_75'] = evi.EVI.quantile(0.75, dim='time') gm_mads['evi_90'] = evi.EVI.quantile(0.9, dim='time') gm_mads['evi_range'] = gm_mads['evi_90'] - gm_mads['evi_10'] #rainfall climatology chirps_S1 = xr_reproject(assign_crs(xr.open_rasterio('/g/data/CHIRPS/cumulative_alltime/CHPclim_jan_jun_cumulative_rainfall.nc'), crs='epsg:4326'), ds.geobox,"bilinear") chirps_S2 = xr_reproject(assign_crs(xr.open_rasterio('/g/data/CHIRPS/cumulative_alltime/CHPclim_jul_dec_cumulative_rainfall.nc'), crs='epsg:4326'), ds.geobox,"bilinear") gm_mads['rain_S1'] = chirps_S1 gm_mads['rain_S2'] = chirps_S2 #slope url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif" slope = rio_slurp_xarray(url_slope, gbox=ds.geobox) slope = slope.to_dataset(name='slope')#.chunk({'x':2000,'y':2000}) result = xr.merge([gm_mads,slope],compat='override') return result.squeeze()
def gm_mads_two_seasons_predict(ds): dc = datacube.Datacube(app='training') ds = ds / 10_000 ds1 = ds.sel(time=slice('2019-01', '2019-06')) ds2 = ds.sel(time=slice('2019-07', '2019-12')) def fun(ds, era): #geomedian and tmads #gm_mads = xr_geomedian_tmad(ds) gm_mads = xr_geomedian_tmad_new(ds).compute() gm_mads = calculate_indices(gm_mads, index=['NDVI', 'LAI', 'MNDWI'], drop=False, normalise=False, collection='s2') gm_mads['sdev'] = -np.log(gm_mads['sdev']) gm_mads['bcdev'] = -np.log(gm_mads['bcdev']) gm_mads['edev'] = -np.log(gm_mads['edev']) gm_mads = gm_mads.chunk({'x': 2000, 'y': 2000}) #rainfall climatology if era == '_S1': chirps = assign_crs(xr.open_rasterio( '/g/data/CHIRPS/cumulative_alltime/CHPclim_jan_jun_cumulative_rainfall.nc' ), crs='epsg:4326') if era == '_S2': chirps = assign_crs(xr.open_rasterio( '/g/data/CHIRPS/cumulative_alltime/CHPclim_jul_dec_cumulative_rainfall.nc' ), crs='epsg:4326') chirps = xr_reproject(chirps, ds.geobox, "bilinear") chirps = chirps.chunk({'x': 2000, 'y': 2000}) gm_mads['rain'] = chirps for band in gm_mads.data_vars: gm_mads = gm_mads.rename({band: band + era}) return gm_mads epoch1 = fun(ds1, era='_S1') epoch2 = fun(ds2, era='_S2') #slope url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif" slope = rio_slurp_xarray(url_slope, gbox=ds.geobox) slope = slope.to_dataset(name='slope').chunk({'x': 2000, 'y': 2000}) result = xr.merge([epoch1, epoch2, slope], compat='override') return result.squeeze()
def test_cog_mem(tmpdir, shape): pp = Path(str(tmpdir)) xx, ds = gen_test_data(pp, shape=shape) # write to memory 1 bb = write_cog(xx, ":mem:") assert isinstance(bb, bytes) path = pp / "cog1.tiff" with open(str(path), "wb") as f: f.write(bb) yy = rio_slurp_xarray(path) np.testing.assert_array_equal(yy.values, xx.values) assert yy.geobox == xx.geobox assert yy.nodata == xx.nodata # write to memory 2 bb = to_cog(xx) assert isinstance(bb, bytes) path = pp / "cog2.tiff" with open(str(path), "wb") as f: f.write(bb) yy = rio_slurp_xarray(path) np.testing.assert_array_equal(yy.values, xx.values) assert yy.geobox == xx.geobox assert yy.nodata == xx.nodata # write to memory 3 -- no overviews bb = to_cog(xx, overview_levels=[]) assert isinstance(bb, bytes) path = pp / "cog3.tiff" with open(str(path), "wb") as f: f.write(bb) yy = rio_slurp_xarray(path) np.testing.assert_array_equal(yy.values, xx.values) assert yy.geobox == xx.geobox assert yy.nodata == xx.nodata
def merge_tile_ds( x: int, y: int, config: FeaturePathConfig, geobox_dict: Dict[Tuple, GeoBox], gm_ds: Optional[xr.Dataset] = None, ) -> Tuple[str, GeoBox, xr.Dataset]: """ overall all tile tifs and additional features merged here, the xarray dataset, 3 extra indi:withces, integration of rainfall, slope with gm_ds :param gm_ds: :param x: tile index x :param y: time inde y :param config: FeaturePathConfig containing the model path and product info`et al. :param geobox_dict: geobox will calculate the tile geometry from the tile index :return: subfolder path and the xarray dataset of the features """ # this folder naming x, y will change subfld = "x{x:+04d}/y{y:+04d}".format(x=x, y=y) P6M_tifs: Dict = get_tifs_paths(config.TIF_path, subfld) geobox = geobox_dict[(x, y)] seasoned_ds = {} for k, tifs in P6M_tifs.items(): era = "_S1" if "2019-01--P6M" in k else "_S2" if not gm_ds: # no prepare base ds base_ds = merge_tifs_into_ds(k, tifs, rename_dict=config.rename_dict) else: base_ds = gm_ds # TODO: to validate the 6month geomedia is down scaled already. base_ds = down_scale_gm_band(base_ds) seasoned_ds[era] = complete_gm_mads(base_ds, geobox, era) slope = (rio_slurp_xarray( config.url_slope, gbox=geobox).drop("spatial_ref").to_dataset(name="slope")) return ( subfld, geobox, xr.merge([seasoned_ds["_S1"], seasoned_ds["_S2"], slope], compat="override").chunk({ "x": -1, "y": -1 }), )
def test_cog_file_dask(tmpdir): pp = Path(str(tmpdir)) xx, ds = gen_test_data(pp, dask=True) assert dask.is_dask_collection(xx) path = pp / "cog.tif" ff = write_cog(xx, path, overview_levels=[2, 4]) assert isinstance(ff, Delayed) assert path.exists() is False assert ff.compute() == path assert path.exists() yy = rio_slurp_xarray(pp / "cog.tif") np.testing.assert_array_equal(yy.values, xx.values) assert yy.geobox == xx.geobox assert yy.nodata == xx.nodata
def test_cog_rgba(tmpdir): pp = Path(str(tmpdir)) xx, ds = gen_test_data(pp) pix = np.dstack([xx.values] * 4) rgba = xr.DataArray(pix, attrs=xx.attrs, dims=('y', 'x', 'band'), coords=xx.coords) assert(rgba.geobox == xx.geobox) assert(rgba.shape[:2] == rgba.geobox.shape) ff = write_cog(rgba, pp / "cog.tif") yy = rio_slurp_xarray(ff) assert(yy.geobox == rgba.geobox) assert(yy.shape == rgba.shape) np.testing.assert_array_equal(yy.values, rgba.values) with pytest.raises(ValueError): _write_cog(rgba.values[1:, :, :], rgba.geobox, ':mem:')
def gm_mads_evi_rainfall(ds): """ 6 monthly and annual gm + mads evi stats (10, 50, 90 percentile, range, std) rainfall actual stats (min, mean, max, range, std) from monthly data rainfall clim stats (min, mean, max, range, std) from monthly data """ dc = datacube.Datacube(app='training') ds = ds / 10000 ds = ds.rename({'nir_1':'nir_wide', 'nir_2':'nir'}) ds1 = ds.sel(time=slice('2019-01', '2019-06')) ds2 = ds.sel(time=slice('2019-07', '2019-12')) chirps = [] chpclim = [] for m in range(1,13): chirps.append(xr_reproject(assign_crs(xr.open_rasterio(f'/g/data/CHIRPS/monthly_2019/chirps-v2.0.2019.{m:02d}.tif').squeeze().expand_dims({'time':[m]}), crs='epsg:4326'), ds.geobox, "bilinear")) chpclim.append(rio_slurp_xarray(f'https://deafrica-data-dev.s3.amazonaws.com/product-dev/deafrica_chpclim_50n_50s_{m:02d}.tif', gbox=ds.geobox, resapling='bilinear').expand_dims({'time':[m]})) chirps = xr.concat(chirps, dim='time') chpclim = xr.concat(chpclim, dim='time') def fun(ds, chirps, chpclim, era): ds = calculate_indices(ds, index=['EVI'], drop=False, normalise=False, collection='s2') #geomedian and tmads gm_mads = xr_geomedian_tmad(ds) gm_mads = calculate_indices(gm_mads, index=['EVI','NDVI','LAI','MNDWI'], drop=False, normalise=False, collection='s2') gm_mads['sdev'] = -np.log(gm_mads['sdev']) gm_mads['bcdev'] = -np.log(gm_mads['bcdev']) gm_mads['edev'] = -np.log(gm_mads['edev']) # EVI stats gm_mads['evi_10'] = ds.EVI.quantile(0.1, dim='time') gm_mads['evi_50'] = ds.EVI.quantile(0.5, dim='time') gm_mads['evi_90'] = ds.EVI.quantile(0.9, dim='time') gm_mads['evi_range'] = gm_mads['evi_90'] - gm_mads['evi_10'] gm_mads['evi_std'] = ds.EVI.std(dim='time') # rainfall actual gm_mads['rain_min'] = chirps.min(dim='time') gm_mads['rain_mean'] = chirps.mean(dim='time') gm_mads['rain_max'] = chirps.max(dim='time') gm_mads['rain_range'] = gm_mads['rain_max'] - gm_mads['rain_min'] gm_mads['rain_std'] = chirps.std(dim='time') # rainfall climatology gm_mads['rainclim_min'] = chpclim.min(dim='time') gm_mads['rainclim_mean'] = chpclim.mean(dim='time') gm_mads['rainclim_max'] = chpclim.max(dim='time') gm_mads['rainclim_range'] = gm_mads['rainclim_max'] - gm_mads['rainclim_min'] gm_mads['rainclim_std'] = chpclim.std(dim='time') for band in gm_mads.data_vars: gm_mads = gm_mads.rename({band:band+era}) return gm_mads epoch0 = fun(ds, chirps, chpclim, era='_S0') time, month = slice('2019-01', '2019-06'), slice(1, 6) epoch1 = fun(ds.sel(time=time), chirps.sel(time=month), chpclim.sel(time=month), era='_S1') time, month = slice('2019-07', '2019-12'), slice(7, 12) epoch2 = fun(ds.sel(time=time), chirps.sel(time=month), chpclim.sel(time=month), era='_S2') #slope url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif" slope = rio_slurp_xarray(url_slope, gbox=ds.geobox) slope = slope.to_dataset(name='slope') result = xr.merge([epoch0, epoch1, epoch2, slope],compat='override') return result.squeeze()
def test_rio_slurp(tmpdir): w, h, dtype, nodata, ndw = 96, 64, 'int16', -999, 7 pp = Path(str(tmpdir)) aa = mk_test_image(w, h, dtype, nodata, nodata_width=ndw) assert aa.shape == (h, w) assert aa.dtype.name == dtype assert aa[10, 30] == (30 << 8) | 10 assert aa[10, 11] == nodata aa0 = aa.copy() mm0 = write_gtiff(pp / "rio-slurp-aa.tif", aa, nodata=-999, overwrite=True) mm00 = write_gtiff(pp / "rio-slurp-aa-missing-nodata.tif", aa, nodata=None, overwrite=True) aa, mm = rio_slurp(mm0.path) np.testing.assert_array_equal(aa, aa0) assert mm.gbox == mm0.gbox assert aa.shape == mm.gbox.shape xx = rio_slurp_xarray(mm0.path) assert mm.gbox == xx.geobox np.testing.assert_array_equal(xx.values, aa0) aa, mm = rio_slurp(mm0.path, aa0.shape) np.testing.assert_array_equal(aa, aa0) assert aa.shape == mm.gbox.shape assert mm.gbox is mm.src_gbox xx = rio_slurp_xarray(mm0.path, aa0.shape) assert mm.gbox == xx.geobox np.testing.assert_array_equal(xx.values, aa0) aa, mm = rio_slurp(mm0.path, (3, 7)) assert aa.shape == (3, 7) assert aa.shape == mm.gbox.shape assert mm.gbox != mm.src_gbox assert mm.src_gbox == mm0.gbox assert mm.gbox.extent == mm0.gbox.extent aa, mm = rio_slurp(mm0.path, aa0.shape) np.testing.assert_array_equal(aa, aa0) assert aa.shape == mm.gbox.shape aa, mm = rio_slurp(mm0.path, mm0.gbox, resampling='nearest') np.testing.assert_array_equal(aa, aa0) xx = rio_slurp_xarray(mm0.path, mm0.gbox) assert mm.gbox == xx.geobox np.testing.assert_array_equal(xx.values, aa0) aa, mm = rio_slurp(mm0.path, gbox=mm0.gbox, dtype='float32') assert aa.dtype == 'float32' np.testing.assert_array_equal(aa, aa0.astype('float32')) xx = rio_slurp_xarray(mm0.path, gbox=mm0.gbox) assert mm.gbox == xx.geobox assert mm.nodata == xx.nodata np.testing.assert_array_equal(xx.values, aa0) aa, mm = rio_slurp(mm0.path, mm0.gbox, dst_nodata=-33) np.testing.assert_array_equal(aa == -33, aa0 == -999) aa, mm = rio_slurp(mm00.path, mm00.gbox, dst_nodata=None) np.testing.assert_array_equal(aa, aa0)
def post_processing(predicted): """ filter prediction results with post processing filters. :param predicted: The prediction results """ dc = Datacube(app='whatever') # grab predictions and proba for post process filtering predict = predicted.Predictions # proba = predicted.Probabilities # proba = proba.where(predict == 1, 100 - proba) # crop proba only # #------image seg and filtering ------------- # # write out ndvi for image seg # ndvi = assign_crs(predicted[["NDVI_S1", "NDVI_S2"]], # crs=predicted.geobox.crs) # # call function with dask delayed # filtered = image_segmentation(ndvi, predict) # # convert delayed object to dask array # filtered = dask.array.from_delayed(filtered.squeeze(), # shape=predict.shape, # dtype=np.int8) # # convert dask array to xr.Datarray # filtered = xr.DataArray(filtered, # coords=predict.coords, # attrs=predict.attrs) # --Post process masking------------------------------------------------ # merge back together for masking ds = xr.Dataset({"mask": predict}) #, "prob": proba, "filtered": filtered}) # mask out classification beyond AEZ boundary gdf = gpd.read_file( 'https://github.com/digitalearthafrica/crop-mask/blob/main/testing/eastern_cropmask/data/Eastern.geojson?raw=true' ) with HiddenPrints(): mask = xr_rasterize(gdf, predicted) mask = mask.chunk({}) ds = ds.where(mask, 0) # mask with WDPA wdpa = rio_slurp_xarray( "s3://deafrica-input-datasets/protected_areas/WDPA_eastern.tif", gbox=predicted.geobox) wdpa = wdpa.chunk({}) wdpa = wdpa.astype(bool) ds = ds.where(~wdpa, 0) # mask with WOFS wofs = dc.load(product="ga_ls8c_wofs_2_summary", like=predicted.geobox, dask_chunks={}) wofs = wofs.frequency > 0.2 # threshold ds = ds.where(~wofs, 0) # mask steep slopes slope = rio_slurp_xarray( 'https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif', gbox=predicted.geobox) slope = slope.chunk({}) slope = slope > 35 ds = ds.where(~slope, 0) # mask where the elevation is above 3600m elevation = dc.load(product="dem_srtm", like=predicted.geobox, dask_chunks={}) elevation = elevation.elevation > 3600 # threshold ds = ds.where(~elevation.squeeze(), 0) return ds.squeeze()
def gm_mads_two_seasons_production(x, y): """ Feature layer function for production run of eastern crop-mask """ rename_dict = { "B02": "blue", "B03": "green", "B04": "red", "B05": "red_edge_1", "B06": "red_edge_2", "B07": "red_edge_3", "B08": "nir", "B8A": "nir_narrow", "B11": "swir_1", "B12": "swir_2", "BCMAD": "bcdev", "EMAD": "edev", "SMAD": "sdev", } training_features = [ "red_S1", "blue_S1", "green_S1", "nir_S1", "swir_1_S1", "swir_2_S1", "red_edge_1_S1", "red_edge_2_S1", "red_edge_3_S1", "edev_S1", "sdev_S1", "bcdev_S1", "NDVI_S1", "LAI_S1", "MNDWI_S1", "rain_S1", "red_S2", "blue_S2", "green_S2", "nir_S2", "swir_1_S2", "swir_2_S2", "red_edge_1_S2", "red_edge_2_S2", "red_edge_3_S2", "edev_S2", "sdev_S2", "bcdev_S2", "NDVI_S2", "LAI_S2", "MNDWI_S2", "rain_S2", "slope", ] DATA_PATH = "/g/data/u23/data/" TIF_path = osp.join(DATA_PATH, "tifs20") subfld = "x{x:+04d}/y{y:+04d}/".format(x=x, y=y) P6M_tifs = get_tifs_paths(TIF_path, subfld) seasoned_ds = {} for k, tifs in P6M_tifs.items(): era = "_S1" if "2019-01--P6M" in k else "_S2" base_ds = merge_tifs_into_ds(k, tifs, rename_dict=rename_dict) seasoned_ds[era] = base_ds #convert from bands to features epoch1 = features(seasoned_ds['_S1'], era='_S1') epoch2 = features(seasoned_ds['_S2'], era='_S2') #append slope url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif" slope = rio_slurp_xarray(url_slope, epoch2.geobox) slope = slope.to_dataset(name='slope') #merge everything result = xr.merge([epoch1, epoch2, slope], compat='override') #order the features correctly result = result[training_features] result = result.astype(np.float32) return result.squeeze()
def gm_mads_two_seasons(geobox): """ Feature layer function for production run of eastern crop-mask """ dc = datacube.Datacube(app="prediction") # load the data measurements = [ "blue", "green", "red", "nir", "swir_1", "swir_2", "red_edge_1", "red_edge_2", "red_edge_3", "bcdev", "edev", "sdev", ] ds1 = dc.load( product="ga_s2_gm", time="2019", measurements=measurements, like=geobox ) ds2 = dc.load( product="ga_s2_gm", time="2019", measurements=measurements, like=geobox ) dss = {"S1": ds1, "S2": ds2} def fun(ds, era): # normalise SR and edev bands for band in ds.data_vars: if band not in ["sdev", "bcdev"]: ds[band] = ds[band] / 10000 gm_mads = calculate_indices( ds, index=["NDVI", "LAI", "MNDWI"], drop=False, normalise=False, collection="s2", ) gm_mads["sdev"] = -np.log(gm_mads["sdev"]) gm_mads["bcdev"] = -np.log(gm_mads["bcdev"]) gm_mads["edev"] = -np.log(gm_mads["edev"]) # rainfall climatology if era == "_S1": chirps = assign_crs( xr.open_rasterio( "/g/data/CHIRPS/cumulative_alltime/CHPclim_jan_jun_cumulative_rainfall.nc" ), crs="epsg:4326", ) if era == "_S2": chirps = assign_crs( xr.open_rasterio( "/g/data/CHIRPS/cumulative_alltime/CHPclim_jul_dec_cumulative_rainfall.nc" ), crs="epsg:4326", ) # Clip CHIRPS to ~ S2 tile boundaries so we can handle NaNs local to S2 tile xmin, xmax = ds.x.values[0], ds.x.values[-1] ymin, ymax = ds.y.values[0], ds.y.values[-1] inProj = Proj("epsg:6933") outProj = Proj("epsg:4326") xmin, ymin = transform(inProj, outProj, xmin, ymin) xmax, ymax = transform(inProj, outProj, xmax, ymax) # create lat/lon indexing slices - buffer S2 bbox by 0.05deg if (xmin < 0) & (xmax < 0): x_slice = list(np.arange(xmin + 0.05, xmax - 0.05, -0.05)) else: x_slice = list(np.arange(xmax - 0.05, xmin + 0.05, 0.05)) if (ymin < 0) & (ymax < 0): y_slice = list(np.arange(ymin + 0.05, ymax - 0.05, -0.05)) else: y_slice = list(np.arange(ymin - 0.05, ymax + 0.05, 0.05)) # index global chirps using buffered s2 tile bbox chirps = assign_crs(chirps.sel(x=y_slice, y=x_slice, method="nearest")) # fill any NaNs in CHIRPS with local (s2-tile bbox) mean chirps = chirps.fillna(chirps.mean()) chirps = xr_reproject(chirps, ds.geobox, "bilinear") gm_mads["rain"] = chirps for band in gm_mads.data_vars: gm_mads = gm_mads.rename({band: band + era}) return gm_mads epoch1 = fun(dss["S1"], era="_S1") epoch2 = fun(dss["S1"], era="_S2") # slope url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif" slope = rio_slurp_xarray(url_slope, gbox=ds1.geobox) slope = slope.to_dataset(name="slope") result = xr.merge([epoch1, epoch2, slope], compat="override") result = result.astype(np.float32) return result.squeeze()
def annual_gm_mads_evi_training(ds): dc = datacube.Datacube(app="training") # grab gm+tmads gm_mads = dc.load( product="ga_s2_gm", time="2019", like=ds.geobox, measurements=[ "red", "blue", "green", "nir", "swir_1", "swir_2", "red_edge_1", "red_edge_2", "red_edge_3", "SMAD", "BCMAD", "EMAD", ], ) gm_mads["SMAD"] = -np.log(gm_mads["SMAD"]) gm_mads["BCMAD"] = -np.log(gm_mads["BCMAD"]) gm_mads["EMAD"] = -np.log(gm_mads["EMAD"] / 10000) # calculate band indices on gm gm_mads = calculate_indices( gm_mads, index=["EVI", "LAI", "MNDWI"], drop=False, collection="s2" ) # normalise spectral GM bands 0-1 for band in gm_mads.data_vars: if band not in ["SMAD", "BCMAD", "EMAD", "EVI", "LAI", "MNDWI"]: gm_mads[band] = gm_mads[band] / 10000 # calculate EVI on annual timeseries evi = calculate_indices( ds, index=["EVI"], drop=True, normalise=True, collection="s2" ) # EVI stats gm_mads["evi_std"] = evi.EVI.std(dim="time") gm_mads["evi_10"] = evi.EVI.quantile(0.1, dim="time") gm_mads["evi_25"] = evi.EVI.quantile(0.25, dim="time") gm_mads["evi_75"] = evi.EVI.quantile(0.75, dim="time") gm_mads["evi_90"] = evi.EVI.quantile(0.9, dim="time") gm_mads["evi_range"] = gm_mads["evi_90"] - gm_mads["evi_10"] # rainfall climatology chirps_S1 = xr_reproject( assign_crs( xr.open_rasterio( "/g/data/CHIRPS/cumulative_alltime/CHPclim_jan_jun_cumulative_rainfall.nc" ), crs="epsg:4326", ), ds.geobox, "bilinear", ) chirps_S2 = xr_reproject( assign_crs( xr.open_rasterio( "/g/data/CHIRPS/cumulative_alltime/CHPclim_jul_dec_cumulative_rainfall.nc" ), crs="epsg:4326", ), ds.geobox, "bilinear", ) gm_mads["rain_S1"] = chirps_S1 gm_mads["rain_S2"] = chirps_S2 # slope url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif" slope = rio_slurp_xarray(url_slope, gbox=ds.geobox) slope = slope.to_dataset(name="slope") # .chunk({'x':2000,'y':2000}) result = xr.merge([gm_mads, slope], compat="override") return result.squeeze()
def gm_mads_evi_rainfall(ds): """ 6 monthly and annual gm + mads evi stats (10, 50, 90 percentile, range, std) rainfall actual stats (min, mean, max, range, std) from monthly data rainfall clim stats (min, mean, max, range, std) from monthly data """ dc = datacube.Datacube(app="training") ds = ds / 10000 ds = ds.rename({"nir_1": "nir_wide", "nir_2": "nir"}) ds1 = ds.sel(time=slice("2019-01", "2019-06")) ds2 = ds.sel(time=slice("2019-07", "2019-12")) chirps = [] chpclim = [] for m in range(1, 13): chirps.append( xr_reproject( assign_crs( xr.open_rasterio( f"/g/data/CHIRPS/monthly_2019/chirps-v2.0.2019.{m:02d}.tif" ) .squeeze() .expand_dims({"time": [m]}), crs="epsg:4326", ), ds.geobox, "bilinear", ) ) chpclim.append( rio_slurp_xarray( f"https://deafrica-data-dev.s3.amazonaws.com/product-dev/deafrica_chpclim_50n_50s_{m:02d}.tif", gbox=ds.geobox, resapling="bilinear", ).expand_dims({"time": [m]}) ) chirps = xr.concat(chirps, dim="time") chpclim = xr.concat(chpclim, dim="time") def fun(ds, chirps, chpclim, era): ds = calculate_indices( ds, index=["EVI"], drop=False, normalise=False, collection="s2" ) # geomedian and tmads gm_mads = xr_geomedian_tmad(ds) gm_mads = calculate_indices( gm_mads, index=["EVI", "NDVI", "LAI", "MNDWI"], drop=False, normalise=False, collection="s2", ) gm_mads["sdev"] = -np.log(gm_mads["sdev"]) gm_mads["bcdev"] = -np.log(gm_mads["bcdev"]) gm_mads["edev"] = -np.log(gm_mads["edev"]) # EVI stats gm_mads["evi_10"] = ds.EVI.quantile(0.1, dim="time") gm_mads["evi_50"] = ds.EVI.quantile(0.5, dim="time") gm_mads["evi_90"] = ds.EVI.quantile(0.9, dim="time") gm_mads["evi_range"] = gm_mads["evi_90"] - gm_mads["evi_10"] gm_mads["evi_std"] = ds.EVI.std(dim="time") # rainfall actual gm_mads["rain_min"] = chirps.min(dim="time") gm_mads["rain_mean"] = chirps.mean(dim="time") gm_mads["rain_max"] = chirps.max(dim="time") gm_mads["rain_range"] = gm_mads["rain_max"] - gm_mads["rain_min"] gm_mads["rain_std"] = chirps.std(dim="time") # rainfall climatology gm_mads["rainclim_min"] = chpclim.min(dim="time") gm_mads["rainclim_mean"] = chpclim.mean(dim="time") gm_mads["rainclim_max"] = chpclim.max(dim="time") gm_mads["rainclim_range"] = gm_mads["rainclim_max"] - gm_mads["rainclim_min"] gm_mads["rainclim_std"] = chpclim.std(dim="time") for band in gm_mads.data_vars: gm_mads = gm_mads.rename({band: band + era}) return gm_mads epoch0 = fun(ds, chirps, chpclim, era="_S0") time, month = slice("2019-01", "2019-06"), slice(1, 6) epoch1 = fun( ds.sel(time=time), chirps.sel(time=month), chpclim.sel(time=month), era="_S1" ) time, month = slice("2019-07", "2019-12"), slice(7, 12) epoch2 = fun( ds.sel(time=time), chirps.sel(time=month), chpclim.sel(time=month), era="_S2" ) # slope url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif" slope = rio_slurp_xarray(url_slope, gbox=ds.geobox) slope = slope.to_dataset(name="slope") result = xr.merge([epoch0, epoch1, epoch2, slope], compat="override") return result.squeeze()
def post_processing( predicted: xr.Dataset, urls: Dict[str, Any] ) -> Tuple[xr.DataArray, xr.DataArray, xr.DataArray]: """ Run the delayed post_processing functions, then create a lazy xr.Dataset to satisfy odc-stats """ dc = Datacube(app="whatever") # grab predictions and proba for post process filtering predict = predicted.Predictions proba = predicted.Probabilities proba = proba.where(predict == 1, 100 - proba) # crop proba only # ------image seg and filtering ------------- # write out ndvi for image seg ndvi = assign_crs(predicted[["NDVI_S1", "NDVI_S2"]], crs=predicted.geobox.crs) # call function with dask delayed filtered = image_segmentation(ndvi, predict) # convert delayed object to dask array filtered = dask.array.from_delayed( filtered.squeeze(), shape=predict.shape, dtype=np.uint8 ) # convert dask array to xr.Datarray filtered = xr.DataArray(filtered, coords=predict.coords, attrs=predict.attrs) # --Post process masking---------------------------------------- # merge back together for masking ds = xr.Dataset({"mask": predict, "prob": proba, "filtered": filtered}) # mask out classification beyond AEZ boundary gdf = gpd.read_file(urls["aez"]) with HiddenPrints(): mask = xr_rasterize(gdf, predicted) mask = mask.chunk({}) ds = ds.where(mask, 0) # mask with WDPA wdpa = rio_slurp_xarray(urls["wdpa"], gbox=predicted.geobox) wdpa = wdpa.chunk({}) wdpa = wdpa.astype(bool) ds = ds.where(~wdpa, 0) # mask with WOFS wofs=dc.load(product='wofs_ls_summary_annual', like=predicted.geobox, dask_chunks={}, time=('2019')) wofs=wofs.frequency > 0.20 # threshold ds=ds.where(~wofs, 0) # mask steep slopes slope = rio_slurp_xarray(urls["slope"], gbox=predicted.geobox) slope = slope.chunk({}) slope = slope > 50 ds = ds.where(~slope, 0) # mask where the elevation is above 3600m elevation = dc.load(product="dem_srtm", like=predicted.geobox, dask_chunks={}) elevation = elevation.elevation > 3600 # threshold ds = ds.where(~elevation.squeeze(), 0) return ds.squeeze()
# from sklearn.impute import SimpleImputer #dc = datacube.Datacube(config='/home/547/sc0554/datacube.conf', env='lccs_dev') #query = {'time': ('2015-01-01', '2015-12-31')} #query['crs'] = 'EPSG:3577' #data = dc.load(product='fc_percentile_albers_annual', measurements='PV_PC_90', **query) data = xr.open_rasterio( '/g/data/r78/LCCS_Aberystwyth/urban_tests/test_sites_peter/perth_2015_gm.tif' ) data = assign_crs(data, crs='epsg:3577') # quickshift expects multiband images with bands in the last dimension data = data.transpose() fname = '/g/data/r78/LCCS_Aberystwyth/continental_run_april2020/2015/lccs_2015_L4_0.5.0.tif' LCCS = rio_slurp_xarray(fname, gbox=data.geobox) LCCS = LCCS.isel(band=0) print("LCCS shape", LCCS.shape) meta_d = LCCS.copy() ##.squeeze().drop('time') seg = felzenszwalb(LCCS.data.transpose()) #seg = quickshift(LCCS.data.transpose(), kernel_size=3, convert2lab=False, max_dist=10, ratio=0.5) print('seg shape', seg.shape) data_seg_med = scipy.ndimage.median(input=LCCS.data.transpose(), labels=seg, index=seg) #data_seg_med = data_seg_med.squeeze("time").drop("time") print("seg_med shape", data_seg_med.shape) out = xr.DataArray(data=data_seg_med.transpose(), dims=meta_d.dims, coords=meta_d.coords, attrs=meta_d.attrs)