def test_band_nodim(self): ref = self.buildData() ref = DataCube(ref.get_array()[:, 0].drop('bands')) for ifmt in formats: fn = os.path.join(self.tmpdir, 'test_band_nodim.' + ifmt) print("Testing " + fn) datacube_to_file(ref, fn, fmt=ifmt) res = datacube_from_file(fn, fmt=ifmt) xarray.testing.assert_allclose(res.get_array(), ref.get_array())
def test_xy_nolabels(self): ref = self.buildData() ref = DataCube(ref.get_array().drop('x').drop('y')) for ifmt in formats: fn = os.path.join(self.tmpdir, 'test_xy_nolabels.' + ifmt) print("Testing " + fn) datacube_to_file(ref, fn, fmt=ifmt) res = datacube_from_file(fn, fmt=ifmt) xarray.testing.assert_allclose(res.get_array(), ref.get_array())
def test_typing_float(self): ref = self.buildData() ref = DataCube(ref.get_array().astype(numpy.float64)) for ifmt in formats: fn = os.path.join(self.tmpdir, 'test_typing_float.' + ifmt) print("Testing " + fn) datacube_to_file(ref, fn, fmt=ifmt) res = datacube_from_file(fn, fmt=ifmt) xarray.testing.assert_allclose(res.get_array(), ref.get_array()) self.assertEqual(res.get_array().dtype, ref.get_array().dtype)
def test_coordinateOrderChanged(self): inpcube = DataCube(self.inpcube.get_array().transpose()) refcube = DataCube(self.refcube.get_array().transpose()) outcube = udf_savitzkygolaysmooth_phenology.apply_datacube( inpcube, dict(do_smoothing=False, do_phenology=True)) xarray.testing.assert_allclose(outcube.get_array(), refcube.get_array())
def apply_datacube(cube: DataCube, context: Dict) -> DataCube: """ Applies a rolling window median composite to a timeseries datacube. This UDF preserves dimensionality, and assumes a datacube with a temporal dimension 't' as input. """ array: xarray.DataArray = cube.get_array() import pandas as pd import numpy as np #this method computes dekad's, can be used to resample data to desired frequency time_dimension_index = array.get_index('t') d = time_dimension_index.day - np.clip( (time_dimension_index.day - 1) // 10, 0, 2) * 10 - 1 date = time_dimension_index.values - np.array(d, dtype="timedelta64[D]") #replace each value with 30-day window median #first median rolling window to fill gaps on all dates composited = array.rolling(t=30, min_periods=1, center=True).median().dropna("t") #resample rolling window medians to dekads ten_daily_composite = composited.groupby_bins("t", date).median() return DataCube(ten_daily_composite)
def apply_hypercube(cube: DataCube, context: dict) -> DataCube: from scipy.signal import savgol_filter array: xarray.DataArray = cube.get_array() filled = array.interpolate_na(dim='t') smoothed_array = savgol_filter(filled.values, 5, 2, axis=0) return DataCube(xarray.DataArray(smoothed_array, dims=array.dims, coords=array.coords))
def test_missingCoordinates(self): inpcube = DataCube(self.inpcube.get_array()[:, :, 0, :]) refcube = DataCube(self.refcube.get_array()[:, :, 0, :]) outcube = udf_savitzkygolaysmooth_phenology.apply_datacube( inpcube, dict(do_smoothing=False, do_phenology=True)) xarray.testing.assert_allclose(outcube.get_array(), refcube.get_array())
def test_hasNoDataTimeSeries(self): inpcube = DataCube(self.inpcube.get_array().where( self.inpcube.get_array().x != 3, numpy.nan, drop=False)) refcube = DataCube(self.refcube.get_array().where( self.refcube.get_array().x != 3, 0., drop=False)) outcube = udf_savitzkygolaysmooth_phenology.apply_datacube( inpcube, dict(do_smoothing=False, do_phenology=True)) xarray.testing.assert_allclose(outcube.get_array(), refcube.get_array())
def apply_datacube(cube: DataCube, context: Dict) -> DataCube: """ Applies a savitzky-golay smoothing to a timeseries datacube. This UDF preserves dimensionality, and assumes a datacube with a temporal dimension 't' as input. """ array: xarray.DataArray = cube.get_array() filled = array.interpolate_na(dim='t') smoothed_array = savgol_filter(filled.values, 5, 2, axis=0) return DataCube( xarray.DataArray(smoothed_array, dims=array.dims, coords=array.coords))
def test_multiBand(self): inparr1 = self.inpcube.get_array() inparr2 = self.inpcube.get_array().assign_coords(bands=['extraband']) refarr1 = self.refcube.get_array() refarr2 = self.refcube.get_array().assign_coords(bands=['extraband']) inpcube = DataCube(xarray.concat([inparr1, inparr2], dim='bands')) refcube = DataCube(xarray.concat([refarr1, refarr2], dim='bands')) outcube = udf_savitzkygolaysmooth_phenology.apply_datacube( inpcube, dict(do_smoothing=True, do_phenology=False)) xarray.testing.assert_allclose(outcube.get_array(), refcube.get_array())
def apply_datacube(cube: DataCube, context: dict) -> DataCube: """Compute the NDVI based on sentinel2 tiles Tiles with ids "red" and "nir" are required. The NDVI computation will be applied to all time stamped 2D raster tiles that have equal time stamps. """ array: xarray.DataArray = cube.get_array() red = array.sel(bands="TOC-B04_10M") nir = array.sel(bands="TOC-B08_10M") ndvi = (nir - red) / (nir + red) return DataCube(ndvi)
def apply_datacube(cube: DataCube, context: Dict) -> DataCube: # access the underlying xarray inarr=cube.get_array() # ndvi B4=inarr.loc[:,'TOC-B04_10M'] B8=inarr.loc[:,'TOC-B08_10M'] ndvi=(B8-B4)/(B8+B4) # extend bands dim ndvi=ndvi.expand_dims(dim='bands', axis=-3).assign_coords(bands=['ndvi']) # wrap back to datacube and return return DataCube(ndvi)
def apply_datacube(udf_cube: DataCube, context: dict) -> DataCube: """ Apply the BFASTmonitor method to detect a break at the end of time-series of the datacube. This UDF reduce the time dimension of the input datacube. :param udf_cube: the openEO virtual DataCube object :return DataCube(breaks_xr): """ from datetime import datetime # convert the openEO datacube into the xarray DataArray structure my_xarray: xr.DataArray = udf_cube.get_array() #select single band, removes band dimension my_xarray = my_xarray.sel(bands='VV') # start_hist = datetime(2017, 5, 1) start_monitor = datetime(2019, 1, 1) end_monitor = datetime(2019, 12, 29) # get the dates from the data cube: dates = [ pd.Timestamp(date).to_pydatetime() for date in my_xarray.coords['t'].values ] # pre-processing - crop the input data cube according to the history and monitor periods: data, dates = crop_data_dates(my_xarray.values, dates, start_hist, end_monitor) # !!! Note !!! that data has the shape 91, and not 92 for our dataset. The reason is the definition in # the bfast utils.py script where the start_hist is set < than dates, and not <= than dates. # ------------------------------------- # specify the BFASTmonitor parameters: model = BFASTMonitor(start_monitor, freq=31, k=3, verbose=1, hfrac=0.25, trend=True, level=0.05, backend='python') # run the monitoring: # model.fit(data, dates, nan_value=udf_data.nodatavals[0]) model.fit(data, dates) # get the detected breaks as an xarray Data Array: breaks_xr = xr.DataArray( model.breaks, coords=[my_xarray.coords['x'].values, my_xarray.coords['y'].values], dims=['x', 'y']) # return the breaks as openEO DataCube: return DataCube(breaks_xr)
def apply_hypercube(cube: DataCube, context: dict) -> DataCube: """Reduce the time dimension for each tile and compute min, mean, max and sum for each pixel over time. Each raster tile in the udf data object will be reduced by time. Minimum, maximum, mean and sum are computed for each pixel over time. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ # The list of tiles that were created array: xarray.DataArray = cube.get_array() result = xarray.concat( [array.min(dim='t'), array.max(dim='t'), array.sum(dim='t'), array.mean(dim='t')], dim='bands' ) return DataCube(result)
def apply_datacube(cube: DataCube, context) -> DataCube: import xarray import numpy as np # Get the x array containing the time series array: xarray.DataArray = cube.get_array() min = 0.85 max = 1.15 step = 0.1 mean = array.median(skipna=True) bins = np.arange(min, max + step, step) * mean.values.tolist() bins = np.concatenate([[0], bins, [255]]) buckets = np.digitize(array.values, bins=bins).astype(float) return DataCube( xarray.DataArray(buckets, coords={ 't': array.t.values, 'bands': array.bands.values, 'y': array.y.values, 'x': array.x.values, }, dims=['t', 'bands', 'y', 'x']))
def test_LoadSave(self): cube1 = DataCube(self.build_array(32, 16)) save_DataCube('/tmp/test_LoadSave.json', cube1) cube2 = load_DataCube('/tmp/test_LoadSave.json') xarray.testing.assert_allclose(cube1.get_array(), cube2.get_array())
def apply_datacube(cube: DataCube, context: Dict) -> DataCube: import pandas import xarray import numpy class PhenologypParams: def __init__(self, year): self.year = year # yer of the season, int self.sStart = pandas.DateOffset( months=4, days=2) # Start date of interval for start of season self.sEnd = pandas.DateOffset( months=6, days=10) # End date of tart interval for start of season self.mStart = pandas.DateOffset( months=6, days=10) # Start date of interval for mid of season self.mEnd = pandas.DateOffset( months=9, days=1) # End date of tart interval for mid of season self.eStart = pandas.DateOffset( months=9, days=1) # Start date of interval for end of season self.eEnd = pandas.DateOffset( months=12, days=31) # End date of tart interval for end of season self.tSos = 10. # Threshold for start of season self.tEos = 10. # Threshold for end of season """ sStartDate: First date of the interval for getting season start sEndDate: Last date of the interval for getting season start mStartDate: First date of the interval for getting maximum greenness mEndDate: Last date of the interval for getting maximum greenness eStartDate: First date of the interval for getting season end eEndDate: Last date of the interval for getting season end tSos: The offset (%) to add to the start date minimum to set the start of the season tEos: The offset (%) to subtract from the end date minimum to set the end of the season """ class CropPhenology: def extractSeasonDates(self, timeseries, args): if timeseries is None: return None else: # Get the local maximum greenness mMax = self.getLocalMax( timeseries, pandas.Timestamp(args.year, args.mStart.months, args.mStart.days), pandas.Timestamp(args.year, args.mEnd.months, args.mEnd.days)) dmMax = mMax['Times'] ymMax = mMax['Greenness'] # Get the start of season dates sos = self.getStartOfSeason( timeseries, pandas.Timestamp(args.year, args.sStart.months, args.sStart.days), pandas.Timestamp(args.year, args.sEnd.months, args.sEnd.days), float(args.tSos), float(ymMax)) # Get the end of season dates eos = self.getEndOfSeason( timeseries, pandas.Timestamp(args.year, args.eStart.months, args.eStart.days), pandas.Timestamp(args.year, args.eEnd.months, args.eEnd.days), float(args.tEos), float(ymMax)) #return result return [sos[3], eos[3]] def getLocalMax(self, df, start, end): df_range = df.loc[df['Times'].between(start, end)] return df_range.loc[df_range['Greenness'].idxmax()] """ Calculate the start of the season based on selected interval [start, end] and a greenness curve (df). Within this interval we will first look for the local minimum greenness, marked by (dsMin, ysMin). In the second step we will use the offset (%) to calculate the amount greenness offset that needs to be applied to the minumum value in order to get the start of the season. This offset is calculated as a percentage of the difference between the maximum greenness and the local minimum. """ def getStartOfSeason(self, df, start, end, offset, yMax): # Get the local minimum greenness in the start season interval df_sRange = df.loc[df['Times'].between(start, end)] sMin = df_sRange.loc[df_sRange['Greenness'].idxmin()] dsMin = sMin['Times'] ysMin = sMin['Greenness'] # Calculate the greenness value corresponding to the start of the season ySos = ysMin + ((yMax - ysMin) * (offset / 100.0)) # Get the closest value to this greenness df_sRange = df_sRange.loc[df_sRange['Times'] >= dsMin] sos = df_sRange.iloc[(df_sRange['Greenness'] - ySos).abs().argsort()[:1]] return (dsMin, ysMin, ySos, pandas.to_datetime(str(sos['Times'].values[0]))) """ Calculate the end of the season based on selected interval [start, end] and a greenness curve (df). Within this interval we will first look for the local minimum greenness, marked by (deMin, yeMin). In the second step we will use the offset (%) to calculate the amount greenness offset that needs to be applied to the minumum value in order to get the start of the season. This offset is calculated as a percentage of the difference between the maximum greenness and the local minimum. """ def getEndOfSeason(self, df, start, end, offset, yMax): # Get the local minimum greenness in the start season interval df_eRange = df.loc[df['Times'].between(start, end)] eMin = df_eRange.loc[df_eRange['Greenness'].idxmin()] deMin = eMin['Times'] yeMin = eMin['Greenness'] # Calculate the greenness value corresponding to the start of the season yEos = yeMin + ((yMax - yeMin) * (offset / 100.0)) # Get the closest value to this greenness df_eRange = df_eRange.loc[df_eRange['Times'] <= deMin] eos = df_eRange.iloc[(df_eRange['Greenness'] - yEos).abs().argsort()[:1]] return (deMin, yeMin, yEos, pandas.to_datetime(str(eos['Times'].values[0]))) array = cube.get_array() cropphenology = CropPhenology() phenologyparams = PhenologypParams(int(array.t.dt.year[0])) season = xarray.DataArray(numpy.zeros( (2, array.x.shape[0], array.y.shape[0]), dtype=numpy.datetime64), dims=('bands', 'x', 'y'), coords={'bands': ['sos', 'eos']}) for ix in array.x.values: for iy in array.y.values: iserie = pandas.DataFrame(data={ 'Greenness': array[:, 0, ix, iy].values, 'Times': array.t.values }) iseason = cropphenology.extractSeasonDates(iserie, phenologyparams) #season.values[:,ix,iy]=[iseason[0].dayofyear,iseason[1].dayofyear] season.values[:, ix, iy] = iseason return DataCube(season)
def test_Reduce(self): cube1 = reduceXY(8, 2, DataCube(self.build_array(32, 16))) cube2 = DataCube(self.build_array(4, 8, mult=(8, 2))) xarray.testing.assert_allclose(cube1.get_array(), cube2.get_array())
def apply_datacube(cube: DataCube, context: Dict) -> DataCube: import functools import xarray import numpy from xarray.core.dataarray import DataArray import pandas from tensorflow.python.keras.models import load_model # BUILTIN CONFIG ######################### NDVI='ndvi' PVid='ndvi' S2id='S2ndvi' VHid='VH' VVid='VV' prediction_model="" gan_window_half='90D' gan_steps='5D' gan_samples=37 # this is 2*gan_window_half/gan_steps+1 acquisition_steps='10D' scaler='default' # FILL FROM CONTEXT IF THERE IS ######################### if context is not None: prediction_model=context.get('prediction_model',prediction_model) if context is not None: gan_window_half=context.get('gan_window_half',gan_window_half) if context is not None: gan_steps=context.get('gan_steps',gan_steps) if context is not None: gan_samples=context.get('gan_samples',gan_samples) if context is not None: acquisition_steps=context.get('acquisition_steps',acquisition_steps) if context is not None: scaler=context.get('scaler',scaler) # HELPER FUNCTIONS ######################### @functools.lru_cache(maxsize=25) def load_datafusion_model(prediction_model): return load_model(prediction_model) class default_scaler(): def minmaxscaler(self,data, source): ranges = {} ranges[NDVI] = [-0.08, 1] ranges[VVid] = [-20, -2] ranges[VHid] = [-33, -8] # Scale between -1 and 1 datarescaled = 2*(data - ranges[source][0])/(ranges[source][1] - ranges[source][0]) - 1 return datarescaled def minmaxunscaler(self,data, source): ranges = {} ranges[NDVI] = [-0.08, 1] ranges[VVid] = [-20, -2] ranges[VHid] = [-33, -8] # Unscale dataunscaled = 0.5*(data + 1) * (ranges[source][1] - ranges[source][0]) + ranges[source][0] return dataunscaled class passthrough_scaler(): def minmaxscaler(self,data, source): return data def minmaxunscaler(self,data, source): return data def process_window(inarr, model, scaler, windowsize=128, nodata=0): inarr=inarr.ffill(dim='t').resample(t='1D').ffill().resample(t=gan_steps).ffill() # older tensorflows expect exact number of samples in every dimension if len(inarr.t)>gan_samples: trimfront=int((len(inarr.t)-gan_samples)/2) trimback=trimfront + (0 if (len(inarr.t)-gan_samples)%2==0 else 1) inarr=inarr.sel(t=inarr.t[trimfront:-trimback]) if len(inarr.t)<gan_samples: trimfront=int((gan_samples-len(inarr.t))/2) trimback=trimfront + (0 if (gan_samples-len(inarr.t))%2==0 else 1) front=pandas.date_range(end=inarr.t.values.min()-pandas.to_timedelta(gan_steps), periods=trimfront, freq=gan_steps).values.astype(inarr.t.dtype) back=pandas.date_range(start=inarr.t.values.max()+pandas.to_timedelta(gan_steps), periods=trimback, freq=gan_steps).values.astype(inarr.t.dtype) inarr=inarr.reindex({'t':numpy.concatenate((front,inarr.t.values,back))}) # grow it to 5 dimensions inarr=inarr.expand_dims(dim=['d0','d5'],axis=[0,5]) # select bands PV=inarr.sel(bands=PVid) S2=inarr.sel(bands=S2id) VH=inarr.sel(bands=VHid) VV=inarr.sel(bands=VVid) # Scale S1 VV = scaler.minmaxscaler(VV, VVid) VH = scaler.minmaxscaler(VH, VHid) # Concatenate s1 data s1_backscatter = xarray.concat((VV, VH), dim='d5') # Scale NDVI s2_ndvi = scaler.minmaxscaler(S2, NDVI) probav_ndvi = scaler.minmaxscaler(PV, NDVI) # Remove any nan values # Passing in numpy arrays because reduces RAM usage (newer tensorflows copy out from xarray into a numpy array) and backwards compatibility goes further back in time s2_ndvi=s2_ndvi.fillna(nodata).values s1_backscatter=s1_backscatter.fillna(nodata).values probav_ndvi=probav_ndvi.fillna(nodata).values # Run neural network predictions = model.predict((s1_backscatter, s2_ndvi, probav_ndvi)) # Unscale predictions = scaler.minmaxunscaler(predictions, NDVI) return predictions.reshape((windowsize, windowsize)) # MAIN CODE ######################### # extract xarray inarr=cube.get_array() # rescale inarr.loc[{'bands':PVid}]=0.004*inarr.sel(bands=PVid)-0.08 inarr.loc[{'bands':VHid}]=10.*xarray.ufuncs.log10(inarr.sel(bands=VHid)) inarr.loc[{'bands':VVid}]=10.*xarray.ufuncs.log10(inarr.sel(bands=VVid)) # compute windows xsize,ysize=inarr.x.shape[0],inarr.y.shape[0] windowlist=[((0,128),(0,128))] # init scaler sc=default_scaler() if scaler=='passthrough': sc=passthrough_scaler() # load the model model=load_datafusion_model(prediction_model) # compute acquisition dates acquisition_dates = pandas.date_range( inarr.t.values.min() + pandas.to_timedelta(gan_window_half), inarr.t.values.max() - pandas.to_timedelta(gan_window_half), freq=acquisition_steps ) # result buffer shape=[len(acquisition_dates),1,1,1] shape[inarr.dims.index('x')]=xsize shape[inarr.dims.index('y')]=ysize predictions=DataArray(numpy.full(shape,numpy.nan, dtype=numpy.float32),dims=inarr.dims,coords={'bands':['predictions'],'t':acquisition_dates}) # run processing for idate in acquisition_dates: for iwin in windowlist: data=inarr.sel({ 'x':slice(iwin[0][0],iwin[0][1]), 'y':slice(iwin[1][0],iwin[1][1]), 't':slice(idate-pandas.to_timedelta(gan_window_half), idate+pandas.to_timedelta(gan_window_half)) }) ires = process_window(data, model, sc, 128, 0.).astype(numpy.float32) predictions.loc[{'t':idate,'x':range(iwin[0][0],iwin[0][1]),'y':range(iwin[1][0],iwin[1][1])}]=ires # return the predictions return DataCube(predictions)
def resampleXY(xskip, yskip, datacube: DataCube): dataarray = datacube.get_array() return DataCube(dataarray.coarsen({'x': xskip, 'y': yskip}).mean())