def load_crophealth_data(): """ Loads Sentinel-2 analysis-ready data (ARD) product for the crop health case-study area. The ARD product is provided for the last year. Last modified: January 2020 outputs ds - data set containing combined, masked data from Sentinel-2a and -2b. Masked values are set to 'nan' """ # Suppress warnings warnings.filterwarnings('ignore') # Initialise the data cube. 'app' argument is used to identify this app dc = datacube.Datacube(app='Crophealth-app') # Specify latitude and longitude ranges latitude = (-24.974997, -24.995971) longitude = (152.429994, 152.395805) # Specify the date range # Calculated as today's date, subtract 90 days to match NRT availability # Dates are converted to strings as required by loading function below end_date = dt.date.today() start_date = end_date - dt.timedelta(days=365) time = (start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d")) # Construct the data cube query products = ["s2a_ard_granule", "s2b_ard_granule"] query = { 'x': longitude, 'y': latitude, 'time': time, 'measurements': [ 'nbar_red', 'nbar_green', 'nbar_blue', 'nbar_nir_1', 'nbar_swir_2', 'nbar_swir_3' ], 'output_crs': 'EPSG:3577', 'resolution': (-10, 10) } # Load the data and mask out bad quality pixels ds_s2 = load_ard(dc, products=products, min_gooddata=0.5, **query) # Calculate the normalised difference vegetation index (NDVI) across # all pixels for each image. # This is stored as an attribute of the data ds_s2 = calculate_indices(ds_s2, index='NDVI', collection='ga_s2_1') # Return the data return (ds_s2)
prefire_end = '2020-01-06' postfire_start = '2020-01-07' postfire_end = '2020-05-01' query_1 = { "x": (central_lon - buffer, central_lon + buffer), "y": (central_lat - buffer, central_lat + buffer), "time": (prefire_start, prefire_end), "output_crs": "EPSG:32755", "resolution": (-10, 10) } prefire_data = load_ard( dc=dc, products=['s2a_ard_granule', 's2b_ard_granule'], measurements=['nbart_nir_1', 'nbart_swir_3'], min_gooddata=0, # dask_chunks={'x': 'auto', 'y': 'auto'}, group_by='solar_day', **query_1) prefire_image = prefire_data.median(dim='time') prefire_image = calculate_indices(prefire_image, index='NBR', collection='ga_s2_1', drop=False) prefire_burnratio = prefire_image.NBR prefire_burnratio.data query_2 = { "x": (central_lon - buffer, central_lon + buffer), "y": (central_lat - buffer, central_lat + buffer),
def get_training_data_for_shp(gdf, index, row, out_arrs, out_vars, products, dc_query, custom_func=None, field=None, calc_indices=None, reduce_func=None, drop=True, zonal_stats=None): """ Function to extract data from the ODC for training a machine learning classifier using a geopandas geodataframe of labelled geometries. This function provides a number of pre-defined methods for producing training data, including: calcuating band indices, reducing time series using summary statistics, and/or generating zonal statistics across polygons. The 'custom_func' parameter provides a method for the user to supply a custom function for generating features rather than using the pre-defined methods. Parameters ---------- gdf : geopandas geodataframe geometry data in the form of a geopandas geodataframe products : list a list of products to load from the datacube. e.g. ['ga_ls7e_ard_3', 'ga_ls8c_ard_3'] dc_query : dictionary Datacube query object, should not contain lat and long (x or y) variables as these are supplied by the 'gdf' variable field : string A string containing the name of column with class labels. Field must contain numeric values. out_arrs : list An empty list into which the training data arrays are stored. out_vars : list An empty list into which the data varaible names are stored. custom_func : function, optional A custom function for generating feature layers. If this parameter is set, all other options (excluding 'zonal_stats'), will be ignored. The result of the 'custom_func' must be a single xarray dataset containing 2D coordinates (i.e x, y - no time dimension). The custom function has access to the datacube dataset extracted using the 'dc_query' params. Example custom function to return multiple products: `def custom_function(ds): dc = datacube.Datacube(app='custom_function') mad = dc.load(product='ls8_nbart_tmad_annual', like=ds.geobox) output = xr.merge([ds, mad]) return output` calc_indices: list, optional If not using a custom func, then this parameter provides a method for calculating a number of remote sensing indices (e.g. `['NDWI', 'NDVI']`). reduce_func : string, optional Function to reduce the data from multiple time steps to a single timestep. Options are 'mean', 'median', 'std', 'max', 'min', 'geomedian'. Ignored if 'custom_func' is provided. drop : boolean, optional If this variable is set to True, and 'calc_indices' are supplied, the spectral bands will be dropped from the dataset leaving only the band indices as data variables in the dataset. Default is True. zonal_stats : string, optional An optional string giving the names of zonal statistics to calculate for each polygon. Default is None (all pixel values are returned). Supported values are 'mean', 'median', 'max', 'min', and 'std'. Will work in conjunction with a 'custom_func'. Returns -------- Two lists, a list of numpy.arrays containing classes and extracted data for each pixel or polygon, and another containing the data variable names. """ # prevent function altering dictionary kwargs dc_query = deepcopy(dc_query) # remove dask chunks if supplied as using # mulitprocessing for parallelization if 'dask_chunks' in dc_query.keys(): dc_query.pop('dask_chunks', None) # connect to datacube dc = datacube.Datacube(app='training_data') # set up query based on polygon (convert to albers) geom = geometry.Geometry(gdf.geometry.values[index].__geo_interface__, geometry.CRS('epsg:3577')) q = {"geopolygon": geom} # merge polygon query with user supplied query params dc_query.update(q) # load_ard doesn't handle derivative products, so check # products aren't one of those below others = [ 'ls5_nbart_geomedian_annual', 'ls7_nbart_geomedian_annual', 'ls8_nbart_geomedian_annual', 'ls5_nbart_tmad_annual', 'ls7_nbart_tmad_annual', 'ls8_nbart_tmad_annual', 'landsat_barest_earth', 'ls8_barest_earth_albers' ] if products[0] in others: ds = dc.load(product=products[0], **dc_query) ds = ds.where(ds != 0, np.nan) else: # load data with HiddenPrints(): ds = load_ard(dc=dc, products=products, output_crs='EPSG:3577', **dc_query) # create polygon mask with HiddenPrints(): mask = xr_rasterize(gdf.iloc[[index]], ds) # Use custom function for training data if it exists if custom_func is not None: with HiddenPrints(): data = custom_func(ds) # Mask dataset data = data.where(mask) else: # Mask dataset ds = ds.where(mask) # first check enough variables are set to run functions if (len(ds.time.values) > 1) and (reduce_func == None): raise ValueError( "You're dataset has " + str(len(ds.time.values)) + " time-steps, please provide a reduction function," + " e.g. reduce_func='mean'") if calc_indices is not None: # determine which collection is being loaded if products[0] in others: collection = 'ga_ls_2' elif '3' in products[0]: collection = 'ga_ls_3' elif 's2' in products[0]: collection = 'ga_s2_1' if len(ds.time.values) > 1: if reduce_func in ['mean', 'median', 'std', 'max', 'min']: with HiddenPrints(): data = calculate_indices(ds, index=calc_indices, drop=drop, collection=collection) # getattr is equivalent to calling data.reduce_func method_to_call = getattr(data, reduce_func) data = method_to_call(dim='time') elif reduce_func == 'geomedian': data = GeoMedian().compute(ds) with HiddenPrints(): data = calculate_indices(data, index=calc_indices, drop=drop, collection=collection) else: raise Exception( reduce_func + " is not one of the supported" + " reduce functions ('mean','median','std','max','min', 'geomedian')" ) else: with HiddenPrints(): data = calculate_indices(ds, index=calc_indices, drop=drop, collection=collection) # when band indices are not required, reduce the # dataset to a 2d array through reduce function if calc_indices is None: if len(ds.time.values) > 1: if reduce_func == 'geomedian': data = GeoMedian().compute(ds) elif reduce_func in ['mean', 'median', 'std', 'max', 'min']: method_to_call = getattr(ds, reduce_func) data = method_to_call('time') else: data = ds.squeeze() if zonal_stats is None: # If no zonal stats were requested then extract all pixel values flat_train = sklearn_flatten(data) # Make a labelled array of identical size flat_val = np.repeat(row[field], flat_train.shape[0]) stacked = np.hstack((np.expand_dims(flat_val, axis=1), flat_train)) elif zonal_stats in ['mean', 'median', 'std', 'max', 'min']: method_to_call = getattr(data, zonal_stats) flat_train = method_to_call() flat_train = flat_train.to_array() stacked = np.hstack((row[field], flat_train)) else: raise Exception( zonal_stats + " is not one of the supported" + " reduce functions ('mean','median','std','max','min')") # Append training data and labels to list out_arrs.append(stacked) out_vars.append([field] + list(data.data_vars))
def load_s2_nbart_ts_cor_dask( dc, lat_top, lat_bottom, lon_left, lon_right, start_of_epoch, end_of_epoch, chunks, cor_type ): allbands = [ "nbart_blue", "nbart_green", "nbart_red", "nbart_nir_2", "nbart_swir_2", "nbart_swir_3", "fmask", ] # Define spatial and temporal coverage if (cor_type==0): newquery = { "crs": "EPSG:3577", "x": (lon_left, lon_right), "y": (lat_top, lat_bottom), "time": (start_of_epoch, end_of_epoch), "output_crs": "EPSG:3577", "resolution": (-20, 20), "measurements": allbands, "dask_chunks": chunks, "group_by": "solar_day", } elif (cor_type==1): #UTM projection zone code outcrs = utm_code(lon_left, lon_right) newquery = { "x": (lon_left, lon_right), "y": (lat_top, lat_bottom), "time": (start_of_epoch, end_of_epoch), "output_crs": outcrs, "resolution": (-20, 20), "measurements": allbands, "dask_chunks": chunks, "group_by": "solar_day", } # Names of targeted spectral bands # Band names used with in the dataset new_bandlabels = ["blue", "green", "red", "nir", "swir1", "swir2", "fmask"] # Load S2 data using Datacube API s2_ds = ddh.load_ard(dc, products=['s2a_ard_granule', 's2b_ard_granule'], min_gooddata=0.0, mask_pixel_quality=False, **newquery) # Rename spectral band names to new band labels rndic = dict(zip(allbands, new_bandlabels)) s2_ds = s2_ds.rename(rndic) return s2_ds
def run_filmstrip_app(output_name, time_range, time_step, tide_range=(0.0, 1.0), resolution=(-30, 30), max_cloud=50, ls7_slc_off=False, size_limit=200): ''' An interactive app that allows the user to select a region from a map, then load Digital Earth Australia Landsat data and combine it using the geometric median ("geomedian") statistic to reveal the median or 'typical' appearance of the landscape for a series of time periods. The results for each time period are combined into a 'filmstrip' plot which visualises how the landscape has changed in appearance across time, with a 'change heatmap' panel highlighting potential areas of greatest change. For coastal applications, the analysis can be customised to select only satellite images obtained during a specific tidal range (e.g. low, average or high tide). Last modified: June 2020 Parameters ---------- output_name : str A name that will be used to name the output filmstrip plot file. time_range : tuple A tuple giving the date range to analyse (e.g. `time_range = ('1988-01-01', '2017-12-31')`). time_step : dict This parameter sets the length of the time periods to compare (e.g. `time_step = {'years': 5}` will generate one filmstrip plot for every five years of data; `time_step = {'months': 18}` will generate one plot for each 18 month period etc. Time periods are counted from the first value given in `time_range`. tide_range : tuple, optional An optional parameter that can be used to generate filmstrip plots based on specific ocean tide conditions. This can be valuable for analysing change consistently along the coast. For example, `tide_range = (0.0, 0.2)` will select only satellite images acquired at the lowest 20% of tides; `tide_range = (0.8, 1.0)` will select images from the highest 20% of tides. The default is `tide_range = (0.0, 1.0)` which will select all images regardless of tide. resolution : tuple, optional The spatial resolution to load data. The default is `resolution = (-30, 30)`, which will load data at 30 m pixel resolution. Increasing this (e.g. to `resolution = (-100, 100)`) can be useful for loading large spatial extents. max_cloud : int, optional This parameter can be used to exclude satellite images with excessive cloud. The default is `50`, which will keep all images with less than 50% cloud. ls7_slc_off : bool, optional An optional boolean indicating whether to include data from after the Landsat 7 SLC failure (i.e. SLC-off). Defaults to False, which removes all Landsat 7 observations > May 31 2003. size_limit : int, optional An optional size limit for the area selection in sq km. Defaults to 200 sq km. Returns ------- ds_geomedian : xarray Dataset An xarray dataset containing geomedian composites for each timestep in the analysis. ''' ######################## # Select and load data # ######################## # Define centre_coords as a global variable global centre_coords # Test if centre_coords is in the global namespace; # use default value if it isn't if 'centre_coords' not in globals(): centre_coords = (-33.9719, 151.1934) # Plot interactive map to select area basemap = basemap_to_tiles(basemaps.Esri.WorldImagery) geopolygon = select_on_a_map(height='600px', layers=(basemap,), center=centre_coords , zoom=12) # Set centre coords based on most recent selection to re-focus # subsequent data selections centre_coords = geopolygon.centroid.points[0][::-1] # Test size of selected area area = geopolygon.to_crs(crs=CRS('epsg:3577')).area / 1000000 if area > size_limit: print(f'Warning: Your selected area is {area:.00f} sq km. ' f'Please select an area of less than {size_limit} sq km.' f'\nTo select a smaller area, re-run the cell ' f'above and draw a new polygon.') else: print('Starting analysis...') # Connect to datacube database dc = datacube.Datacube(app='Change_filmstrips') # Configure local dask cluster create_local_dask_cluster() # Obtain native CRS crs = mostcommon_crs(dc=dc, product='ga_ls5t_ard_3', query={'time': '1990', 'geopolygon': geopolygon}) # Create query based on time range, area selected, custom params query = {'time': time_range, 'geopolygon': geopolygon, 'output_crs': crs, 'gqa_iterative_mean_xy': [0, 1], 'cloud_cover': [0, max_cloud], 'resolution': resolution, 'dask_chunks': {'time': 1, 'x': 2000, 'y': 2000}, 'align': (resolution[1] / 2.0, resolution[1] / 2.0)} # Load data from all three Landsats ds = load_ard(dc=dc, measurements=['nbart_red', 'nbart_green', 'nbart_blue'], products=['ga_ls5t_ard_3', 'ga_ls7e_ard_3', 'ga_ls8c_ard_3'], min_gooddata=0.0, ls7_slc_off=ls7_slc_off, **query) # Optionally calculate tides for each timestep in the satellite # dataset and drop any observations out side this range if tide_range != (0.0, 1.0): ds = tidal_tag(ds=ds, tidepost_lat=None, tidepost_lon=None) min_tide, max_tide = ds.tide_height.quantile(tide_range).values ds = ds.sel(time = (ds.tide_height >= min_tide) & (ds.tide_height <= max_tide)) ds = ds.drop('tide_height') print(f' Keeping {len(ds.time)} observations with tides ' f'between {min_tide:.2f} and {max_tide:.2f} m') # Create time step ranges to generate filmstrips from bins_dt = pd.date_range(start=time_range[0], end=time_range[1], freq=pd.DateOffset(**time_step)) # Bin all satellite observations by timestep. If some observations # fall outside the upper bin, label these with the highest bin labels = bins_dt.astype('str') time_steps = (pd.cut(ds.time.values, bins_dt, labels = labels[:-1]) .add_categories(labels[-1]) .fillna(labels[-1])) time_steps_var = xr.DataArray(time_steps, [('time', ds.time)], name='timestep') # Resample data temporally into time steps, and compute geomedians geomedian_ds = (ds.groupby(time_steps_var) .apply(lambda ds_subset: xr_geomedian(ds_subset, num_threads=1, eps=0.2 * (1 / 10_000), nocheck=True)))
def dNBR_processing(coordinates): # Load all data in baseline period available from s2a/b_ard_granule datasets prefire_ard = load_ard( dc=dc, products=['s2a_ard_granule', 's2b_ard_granule'], x=(coordinates.x - 0.1, coordinates.x + 0.1), y=(coordinates.y - 0.1, coordinates.y + 0.1), time=(prefire_start, prefire_end), measurements=['nbart_nir_1', 'nbart_swir_3'], min_gooddata=0.1, output_crs='EPSG:32755', # UTM Zone 55S resolution=(-10, 10), group_by='solar_day') prefire_ard = calculate_indices(prefire_ard, index='NBR', collection='ga_s2_1', drop=False) # Compute median using all observations in the dataset along the time axis prefire_image = prefire_ard.median(dim='time') # Delete baseline_combined del prefire_ard # Select NBR prefire_NBR = prefire_image.NBR del prefire_image # Load all data in post-fire period available from s2a/b_ard_granule datasets postfire_ard = load_ard( dc=dc, products=['s2a_ard_granule', 's2b_ard_granule'], x=(coordinates.x - 0.1, coordinates.x + 0.1), y=(coordinates.y - 0.1, coordinates.y + 0.1), time=(postfire_start, postfire_end), measurements=['nbart_nir_1', 'nbart_swir_3'], min_gooddata=0.1, output_crs='EPSG:32755', # UTM Zone 55S resolution=(-10, 10), group_by='solar_day') # Calculate NBR on all post-fire images postfire_ard = calculate_indices(postfire_ard, index='NBR', collection='ga_s2_1', drop=False) # Calculate the median post-fire image postfire_image = postfire_ard.median(dim='time') del postfire_ard # Select NBR postfire_NBR = postfire_image.NBR del postfire_image # Calculate delta delta_NBR = prefire_NBR - postfire_NBR del prefire_NBR del postfire_NBR x = np.round_(coordinates.x, decimals=4) y = np.round_(coordinates.y, decimals=4) # Turn dNBR into a x-array dataset for export to GeoTIFF dnbr_dataset = delta_NBR.to_dataset(name='delta_NBR') # cog.write_cog(dnbr_dataset, './NBR_geotiffs/{x}_{y}_dNBR.tif') write_geotiff(f'/scratch/wj97/ab4513/dNBR_geotiffs/{x}_{y}_dNBR.tif', dnbr_dataset) del delta_NBR del dnbr_dataset