def run_bfast(indice_array): ''' Runs BFAST algorithm Input: indice_array: Array[] sorted 3D array with image arrays for the whole period Output: Breaks and mean of timeseries ''' print("starting Bfast Process") # set parameters k = 3 freq = 30 trend = True hfrac = 0.25 level = 0.05 start_hist = datetime(2013, 8, 1) # set start of history period start_monitor = datetime(2019, 7, 1) # set start of monitoring perios end_monitor = datetime(2020, 7, 31) # set end of monitoring period dates = pd.date_range('2013-08-01', '2020-07-31', freq='MS') # create data array dates2 = [pd.to_datetime(date) for date in dates] nancount = numpy.count_nonzero(numpy.isnan(indice_array)) print(f"Timeseries has {nancount} NANs") indice_array = numpy.where(numpy.isnan(indice_array), -9999, indice_array) # supstitute noData values indice_array = indice_array * 10000 indice_array = indice_array.astype(int) data, dates = crop_data_dates(indice_array, dates2, start_hist, end_monitor) # crop array while len(dates2) > data.shape[0]: dates2.pop() if len(dates2) < data.shape[0]: dates2.insert(0, datetime(2013, 1, 1)) print("First date: {}".format(dates2[0])) print("Last date: {}".format(dates2[-1])) print("Shape of data array: {}".format(data.shape)) print(f'Number of dates {len(dates2)}') # create BFAST model model = BFASTMonitor(start_monitor, freq=freq, k=k, hfrac=hfrac, trend=trend, level=level, backend='python', verbose=1) # data = data[:, 2330:3000, :] model.fit(data, dates, n_chunks=5, nan_value=-9999) # fit model to timeseries # bfastSingle.fit_single(data, dates2, model) # save results breaks = model.breaks means = model.means return breaks, means # return detected breaks in timeseries and mean values of pixel
def apply_datacube(udf_cube: DataCube, context: dict) -> DataCube: """ Apply the BFASTmonitor method to detect a break at the end of time-series of the datacube. This UDF reduce the time dimension of the input datacube. :param udf_cube: the openEO virtual DataCube object :return DataCube(breaks_xr): """ from datetime import datetime # convert the openEO datacube into the xarray DataArray structure my_xarray: xr.DataArray = udf_cube.get_array() #select single band, removes band dimension my_xarray = my_xarray.sel(bands='VV') # start_hist = datetime(2017, 5, 1) start_monitor = datetime(2019, 1, 1) end_monitor = datetime(2019, 12, 29) # get the dates from the data cube: dates = [ pd.Timestamp(date).to_pydatetime() for date in my_xarray.coords['t'].values ] # pre-processing - crop the input data cube according to the history and monitor periods: data, dates = crop_data_dates(my_xarray.values, dates, start_hist, end_monitor) # !!! Note !!! that data has the shape 91, and not 92 for our dataset. The reason is the definition in # the bfast utils.py script where the start_hist is set < than dates, and not <= than dates. # ------------------------------------- # specify the BFASTmonitor parameters: model = BFASTMonitor(start_monitor, freq=31, k=3, verbose=1, hfrac=0.25, trend=True, level=0.05, backend='python') # run the monitoring: # model.fit(data, dates, nan_value=udf_data.nodatavals[0]) model.fit(data, dates) # get the detected breaks as an xarray Data Array: breaks_xr = xr.DataArray( model.breaks, coords=[my_xarray.coords['x'].values, my_xarray.coords['y'].values], dims=['x', 'y']) # return the breaks as openEO DataCube: return DataCube(breaks_xr)
def run_bfast(indice_array): # parameters k = 3 freq = 30 trend = True hfrac = 0.25 level = 0.05 start_hist = datetime(2013, 4, 1) # no data for the first three months of 2013 start_monitor = datetime(2016, 7, 1) end_monitor = datetime(2018, 12, 31) dates = pd.date_range('2013-04-01', '2018-7-31', freq='MS') dates2 = [pd.to_datetime(date) for date in dates] indice_array = numpy.where(numpy.isnan(indice_array), -9999, indice_array) data, dates2 = crop_data_dates(indice_array, dates2, start_hist, end_monitor) # data = data * 10000 data = data.astype(int) while len(dates2) > data.shape[0]: dates2.pop() if len(dates2) < data.shape[0]: dates2.insert(0, datetime(2013, 1, 1)) print("First date: {}".format(dates2[0])) print("Last date: {}".format(dates2[-1])) print("Shape of data array: {}".format(data.shape)) print(f'Number of dates {len(dates2)}') model = BFASTMonitor(start_monitor, freq=freq, k=k, hfrac=hfrac, trend=trend, level=level, backend='python', verbose=1) # data = data[:, 2330:3000, :] model.fit(data, dates2, nan_value=-9999) # bfastSingle.fit_single(data, dates2, model) # visualize results breaks = model.breaks means = model.means return breaks, means
def bfast_window(window, read_lock, write_lock, src, dst, segment_dir, monitor_params, crop_params, out): """Run the bfast model on image windows""" # read in a read_lock to avoid duplicate reading and corruption of the data with read_lock: data = src.read(window=window).astype(np.int16) # all the nan are transformed into 0 by casting don't we want to use np.iinfo.minint16 ? # read the local observation date with (segment_dir / 'dates.csv').open() as f: dates = [ datetime.strptime(l, "%Y-%m-%d") for l in f.read().splitlines() if l.rstrip() ] # crop the initial data to the used dates data, dates = crop_data_dates(data, dates, **crop_params) # start the bfast process model = BFASTMonitor(**monitor_params) # fit the model model.fit(data, dates) # vectorized fonction to format the results as decimal year (e.g mid 2015 will be 2015.5) to_decimal = np.vectorize(break_to_decimal_year, excluded=[1]) # slice the date to narrow it to the monitoring dates start = monitor_params['start_monitor'] end = crop_params['end'] monitoring_dates = dates[dates.index(start):dates.index(end) + 1] # carreful slicing is x in [i,j[ # compute the decimal break on the model decimal_breaks = to_decimal(model.breaks, monitoring_dates) # agregate the results on 2 bands monitoring_results = np.stack( (decimal_breaks, model.magnitudes)).astype(np.float32) with write_lock: dst.write(monitoring_results, window=window) out.update_progress() return
def bfast4openeo(udf_data): # from bfast import BFASTMonitor from bfast.utils import crop_data_dates from datetime import datetime import xarray as xr import pandas as pd import numpy as np # start_hist = datetime(2016, 12, 31) start_monitor = datetime(2019, 1, 1) end_monitor = datetime(2019, 12, 29) # get the dates from the data cube: dates = [ pd.Timestamp(date).to_pydatetime() for date in udf_data.coords['time'].values ] # pre-processing - crop the input data cube according to the history and monitor periods: data, dates = crop_data_dates(udf_data.values, dates, start_hist, end_monitor) # !!! Note !!! that data has the shape 91, and not 92 for our dataset. The reason is the definition in # the bfast utils.py script where the start_hist is set < than dates, and not <= than dates. # ------------------------------------- # specify the BFASTmonitor parameters: model = BFASTMonitor(start_monitor, freq=31, k=3, verbose=1, hfrac=0.25, trend=True, level=0.05, backend='python') # run the monitoring: # model.fit(data, dates, nan_value=udf_data.nodatavals[0]) model.fit(data, dates) # get the detected breaks as an xarray Data Array: # !!! question !!! are those breaks identical to the breaks we get from R script? breaks_xr = xr.DataArray( model.breaks, coords=[udf_data.coords['y'].values, udf_data.coords['x'].values], dims=['y', 'x']) return breaks_xr
def set_bfast_parameters(self, start_monitor, end_monitor, start_hist, freq, k, hfrac, trend, level, backend='opencl', verbose=1, device_id=0): '''Set parameters, see bfast for what they do.. okay we should say this here parameters: ----------- start_monitor : datetime object A datetime object specifying the start of the monitoring phase. end_monitor: datetime object A datetime object specifying the end of the monitoring phase. start_hist: datetime object A datetime object specifying the start of the history phase. freq : int, default 365 The frequency for the seasonal model. k : int, default 3 The number of harmonic terms. hfrac : float, default 0.25 Float in the interval (0,1) specifying the bandwidth relative to the sample size in the MOSUM/ME monitoring processes. trend : bool, default True Whether a tend offset term shall be used or not level : float, default 0.05 Significance level of the monitoring (and ROC, if selected) procedure, i.e., probability of type I error. backend : string, either 'opencl' or 'python' Chooses what backend to use. opencl uses the GPU implementation, which is much faster. verbose : int, optional (default=0) The verbosity level (0=no output, 1=output) ''' self.start_monitor = start_monitor self.end_monitor = end_monitor self.start_hist = start_hist self.freq = freq self.k = k self.hfrac = hfrac self.trend = trend self.level = level self.backend = backend self.verbose = verbose self.device_id = device_id self.model = BFASTMonitor( self.start_monitor, freq=freq, # add these k=k, hfrac=hfrac, trend=trend, level=level, backend=backend, verbose=verbose, device_id=device_id, ) try: print("device: ", pyopencl.get_platforms()[0].get_devices()) except: print( "You selected openCL, but no device was found, are you sure you set up a gpu session?" )
def run_bfast_(backend, k=3, freq=365, trend=False, hfrac=0.25, level=0.05, start_hist=datetime(2002, 1, 1), start_monitor=datetime(2010, 1, 1), end_monitor=datetime(2018, 1, 1)): print("Running the {} backend".format(backend)) # download and parse input data ifile_meta = "data/peru_small/dates.txt" ifile_data = "data/peru_small/data.npy" if not os.path.isdir("data/peru_small"): os.makedirs("data/peru_small") if not os.path.exists(ifile_meta): url = 'https://sid.erda.dk/share_redirect/fcwjD77gUY/dates.txt' wget.download(url, ifile_meta) if not os.path.exists(ifile_data): url = 'https://sid.erda.dk/share_redirect/fcwjD77gUY/data.npy' wget.download(url, ifile_data) data_orig = np.load(ifile_data) with open(ifile_meta) as f: dates = f.read().split('\n') dates = [datetime.strptime(d, '%Y-%m-%d') for d in dates if len(d) > 0] data, dates = crop_data_dates(data_orig, dates, start_hist, end_monitor) # fit BFASTMontiro model model = BFASTMonitor( start_monitor, freq=freq, k=k, hfrac=hfrac, trend=trend, level=level, backend=backend, verbose=0, device_id=0, ) #data = data[:,:50,:50] start_time = time.time() if backend == "opencl": model.fit(data, dates, n_chunks=5, nan_value=-32768) else: model.fit(data, dates, nan_value=-32768) end_time = time.time() print("All computations have taken {} seconds.".format(end_time - start_time)) # visualize results breaks = model.breaks means = model.means magnitudes = model.magnitudes valids = model.valids return breaks, means, magnitudes, valids
with open(ifile_meta) as f: dates = f.read().split('\n') dates = [datetime.strptime(d, '%Y-%m-%d') for d in dates if len(d) > 0] data, dates = crop_data_dates(data_orig, dates, start_hist, end_monitor) print("First date: {}".format(dates[0])) print("Last date: {}".format(dates[-1])) print("Shape of data array: {}".format(data.shape)) # fit BFASTMontiro model model = BFASTMonitor( start_monitor, freq=freq, k=k, hfrac=hfrac, trend=trend, level=level, backend='opencl', verbose=1, device_id=0, ) #data = data[:,:50,:50] start_time = time.time() model.fit(data, dates, n_chunks=5, nan_value=-32768) end_time = time.time() print("All computations have taken {} seconds.".format(end_time - start_time)) # visualize results breaks = model.breaks means = model.means
# define history and monitoring period and crop input data from bfast.monitor.utils import crop_data_dates start_hist = datetime(2002, 1, 1) start_monitor = datetime(2010, 1, 1) end_monitor = datetime(2018, 1, 1) data, dates = crop_data_dates(data_orig, dates, start_hist, end_monitor) print("First date: {}".format(dates[0])) print("Last date: {}".format(dates[-1])) print("Shape of data array: {}".format(data.shape)) # apply BFASTMonitor using the OpenCL backend and the first device (e.g., GPU) from bfast import BFASTMonitor model = BFASTMonitor( start_monitor, freq=365, k=3, hfrac=0.25, trend=False, level=0.05, backend='opencl', device_id=0, ) model.fit(data, dates, n_chunks=5, nan_value=-32768) print("Detected breaks") # -2 corresponds to not enough data for a pixel # -1 corresponds to "no breaks detected" # idx with isx>=0 corresponds to the position of the first break print(model.breaks)
def calc_trend_breakpoints(): fout = Path('/Users/u0116961/Documents/work/deforestation_paper/breakpoints.csv') ds_lai = io('LAI') ds_vod = io('SMOS_IC') date_from = '2010-01-01' date_to = '2019-12-31' x0s = np.arange(2010, 2020) rss_cols = [f'RSS_LAI_{x0}' for x0 in x0s] + [f'RSS_VOD_{x0}' for x0 in x0s] + \ ['bfast_lai_b1', 'bfast_lai_b2', 'bfast_lai_m1', 'bfast_lai_m2' ] + \ ['bfast_vod_b1', 'bfast_vod_b2', 'bfast_vod_m1', 'bfast_vod_m2' ] start_date = datetime(2012, 1, 1) bfast = BFASTMonitor( start_date, freq=365, k=3, hfrac=0.25, trend=False, level=0.05, verbose=0, backend='python', device_id=0, ) for i, _ in ds_lai.lut.iterrows(): print(f'{i} / {len(ds_lai.lut)}') lai = ds_lai.read('LAI', i, date_from=date_from, date_to=date_to).dropna() vod = ds_vod.read('VOD', i, date_from=date_from, date_to=date_to) if len(vod) > 0: invalid = (ds_vod.read('Flags', i, date_from=date_from, date_to=date_to) > 0) | \ (ds_vod.read('RMSE', i, date_from=date_from, date_to=date_to) > 8) | \ (ds_vod.read('VOD_StdErr', i, date_from=date_from, date_to=date_to) > 1.2) vod[invalid] = np.nan vod = vod.dropna() if (len(lai) < 1) | (len(vod) < 1): continue mlai = pd.DataFrame(normalize(lai.resample('M').mean()), columns=['LAI']) mvod = pd.DataFrame(normalize(vod.resample('M').mean()), columns=['VOD']) rss = pd.DataFrame(columns=rss_cols, index=(i,)) data = np.reshape(mvod.values, (len(mvod), 1, 1)) dates = mvod.index.to_pydatetime() bfast.fit(data, dates, n_chunks=5, nan_value=-32768) rss.loc[i,'bfast_vod_m1'] = bfast.magnitudes[0][0] if bfast.breaks[0][0] >= 0: dt = dates[dates >= datetime(2011, 1, 1)][bfast.breaks[0][0]] rss.loc[i,'bfast_vod_b1'] = dt.year + dt.month/12 if len(bfast.breaks) > 1: dt = dates[dates > datetime(2011, 1, 1)][bfast.breaks.flatten()[1]] rss.loc[i,'bfast_vod_b2'] = dt.year + dt.month/12 rss.loc[i,'bfast_vod_m2'] = bfast.magnitudes.flatten()[1] data = np.reshape(mlai.values, (len(mlai), 1, 1)) dates = mlai.index.to_pydatetime() bfast.fit(data, dates, n_chunks=None, nan_value=-32768) rss.loc[i,'bfast_lai_m1'] = bfast.magnitudes[0][0] if bfast.breaks[0][0] >= 0: dt = dates[dates >= start_date][bfast.breaks[0][0]] rss.loc[i,'bfast_lai_b1'] = dt.year + dt.month/12 # if len(bfast.breaks) > 1: # dt = dates[dates > datetime(2011, 1, 1)][bfast.breaks.flatten()[1]] # rss.loc[i,'bfast_lai_b2'] = dt.year + dt.month/12 # rss.loc[i,'bfast_lai_m2'] = bfast.magnitudes.flatten()[1] for x0 in x0s: mlai[f'LAI_trend_{x0}'] = np.nan mvod[f'VOD_trend_{x0}'] = np.nan lai_l = mlai[date_from:f'{x0}']['LAI'] lai_u = mlai[f'{x0 + 1}':date_to]['LAI'] vod_l = mvod[date_from:f'{x0}']['VOD'] vod_u = mvod[f'{x0 + 1}':date_to]['VOD'] for lai, vod in zip([lai_l, lai_u], [vod_l, vod_u]): try: mk_lai = mk.original_test(lai) mk_vod = mk.original_test(vod) x = (lai.index.year - lai.index.year.min()) * 12 + lai.index.month mlai.loc[lai.index, f'LAI_trend_{x0}'] = mk_lai.intercept + mk_lai.slope * x.values x = (vod.index.year - vod.index.year.min()) * 12 + vod.index.month mvod.loc[vod.index, f'VOD_trend_{x0}'] = mk_vod.intercept + mk_vod.slope * x.values except: continue rss.loc[i, f'RSS_LAI_{x0}'] = ((mlai[f'LAI'] - mlai[f'LAI_trend_{x0}']) ** 2).sum() rss.loc[i, f'RSS_VOD_{x0}'] = ((mvod[f'VOD'] - mvod[f'VOD_trend_{x0}']) ** 2).sum() if fout.exists(): rss.to_csv(fout, float_format='%0.4f', mode='a', header=False) else: rss.to_csv(fout, float_format='%0.4f')
data_orig = numpy.load(ifile_data) with open(ifile_meta) as f: dates = f.read().split('\n') dates = [datetime.strptime(d, '%Y-%m-%d') for d in dates if len(d) > 0] data, dates = crop_data_dates(data_orig, dates, start_hist, end_monitor) print("First date: {}".format(dates[0])) print("Last date: {}".format(dates[-1])) print("Shape of data array: {}".format(data.shape)) # fit BFAST using the CPU implementation (single pixel) model = BFASTMonitor(start_monitor, freq=freq, k=k, hfrac=hfrac, trend=trend, level=level, backend='python', verbose=1) # only apply on a small subset data = data[:, :50, :50] model.fit(data, dates, nan_value=-32768) # visualize results breaks = model.breaks means = model.means no_breaks_indices = (breaks == -1) means[no_breaks_indices] = 0 means[means > 0] = 0 breaks_plot = breaks.astype(numpy.float)