def ingest_work(config, source_type, output_type, index, sources, geobox): namemap = get_namemap(config) measurements = get_measurements(source_type, config) variable_params = get_variable_params(config) global_attributes = config['global_attributes'] with datacube.set_options(reproject_threads=1): fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')] data = Datacube.product_data(sources, geobox, measurements, fuse_func=fuse_func) nudata = data.rename(namemap) file_path = get_filename(config, index, sources) def _make_dataset(labels, sources): sources_union = union_points(*[source.extent.to_crs(geobox.crs).points for source in sources]) valid_data = intersect_points(geobox.extent.points, sources_union) dataset = make_dataset(dataset_type=output_type, sources=sources, extent=geobox.extent, center_time=labels['time'], uri=file_path.absolute().as_uri(), app_info=get_app_metadata(config, config['filename']), valid_data=GeoPolygon(valid_data, geobox.crs)) return dataset datasets = xr_apply(sources, _make_dataset, dtype='O') # Store in Dataarray to associate Time -> Dataset nudata['dataset'] = datasets_to_doc(datasets) write_dataset_to_netcdf(nudata, global_attributes, variable_params, file_path) return datasets
def ingest_work(config, source_type, output_type, tile, tile_index): _LOG.info('Starting task %s', tile_index) namemap = get_namemap(config) measurements = get_measurements(source_type, config) variable_params = get_variable_params(config) global_attributes = config['global_attributes'] with datacube.set_options(reproject_threads=1): fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')] data = Datacube.load_data(tile.sources, tile.geobox, measurements, fuse_func=fuse_func) nudata = data.rename(namemap) file_path = get_filename(config, tile_index, tile.sources, version=config['taskfile_version']) def _make_dataset(labels, sources): return make_dataset(product=output_type, sources=sources, extent=tile.geobox.extent, center_time=labels['time'], uri=file_path.absolute().as_uri(), app_info=get_app_metadata(config, config['filename']), valid_data=GeoPolygon.from_sources_extents(sources, tile.geobox)) datasets = xr_apply(tile.sources, _make_dataset, dtype='O') # Store in Dataarray to associate Time -> Dataset nudata['dataset'] = datasets_to_doc(datasets) write_dataset_to_netcdf(nudata, file_path, global_attributes, variable_params) _LOG.info('Finished task %s', tile_index) return datasets
def test_write_dataset_to_netcdf(tmpnetcdf_filename): affine = Affine.scale(0.1, 0.1) * Affine.translation(20, 30) geobox = geometry.GeoBox(100, 100, affine, geometry.CRS(GEO_PROJ)) dataset = xarray.Dataset(attrs={'extent': geobox.extent, 'crs': geobox.crs}) for name, coord in geobox.coordinates.items(): dataset[name] = (name, coord.values, {'units': coord.units, 'crs': geobox.crs}) dataset['B10'] = (geobox.dimensions, np.arange(10000, dtype='int16').reshape(geobox.shape), {'nodata': 0, 'units': '1', 'crs': geobox.crs}) write_dataset_to_netcdf(dataset, tmpnetcdf_filename, global_attributes={'foo': 'bar'}, variable_params={'B10': {'attrs': {'abc': 'xyz'}}}) with netCDF4.Dataset(tmpnetcdf_filename) as nco: nco.set_auto_mask(False) assert 'B10' in nco.variables var = nco.variables['B10'] assert (var[:] == dataset['B10'].values).all() assert 'foo' in nco.ncattrs() assert nco.getncattr('foo') == 'bar' assert 'abc' in var.ncattrs() assert var.getncattr('abc') == 'xyz'
def write_your_netcdf(data, dataset_name, filename, crs): """ This function turns an xarray dataarray into a dataset so we can write it to netcdf. It adds on a crs definition from the original array. data = your xarray dataset, dataset_name is a string describing your variable Last modified: May 2018 Author: Bex Dunn """ #turn array into dataset so we can write the netcdf if isinstance(data,xr.DataArray): dataset= data.to_dataset(name=dataset_name) elif isinstance(data,xr.Dataset): dataset = data else: print('your data might be the wrong type, it is: '+type(data)) #grab our crs attributes to write a spatially-referenced netcdf dataset.attrs['crs'] = crs try: write_dataset_to_netcdf(dataset, filename) except RuntimeError as err: print("RuntimeError: {0}".format(err))
def test_useful_error_on_write_empty_dataset(tmpnetcdf_filename): with pytest.raises(DatacubeException) as excinfo: ds = xr.Dataset() write_dataset_to_netcdf(ds, tmpnetcdf_filename) assert 'empty' in str(excinfo.value) with pytest.raises(DatacubeException) as excinfo: ds = xr.Dataset(data_vars={'blue': (('time',), numpy.array([0, 1, 2]))}) write_dataset_to_netcdf(ds, tmpnetcdf_filename) assert 'CRS' in str(excinfo.value)
def xarray_to_cfnetcdf(data_xarray, output_nc_file, variable_name, crs): # Data Cube friendly dataset, copy booleans to int8 as bool is not supported dcf_ds = data_xarray.astype('int8', copy=False).to_dataset(name = variable_name) # set a valid crs object, DC relies upon the python object so a WKT representation of CRS will fail dcf_ds.attrs['crs'] = crs # Set units for year coordinate dcf_ds.coords['year'].attrs['units'] = 'years since 0' # Set units for data variable dcf_ds.data_vars[variable_name].attrs['units'] = 1 # write dataset out using datacube storage method - this is an unfortunate nessicity and we should expose a # function like this in a nicer way write_dataset_to_netcdf(dcf_ds, output_nc_file)
def create_files(data_ret, odir, MY_OBS_VAR, dt_list): for k, data in data_ret.items(): if len(odir) > 0: global_attributes = {} global_attributes = dict( Comment1='Data observed on ' + ','.join([dt.strftime('%Y-%m-%d') for dt in dt_list])) filename = odir + '/' + 'LATEST_PIXEL_' + ''.join(map(str, k)) \ + "_CLOUD_FREE_LAST_" + str(period) + "_DAYS" obs_filename = odir + '/' + 'LATEST_PIXEL_' + ''.join(map(str, k)) \ + "_CLOUD_FREE_LAST_" + str(period) + "_DAYS_OBS" try: ncfl = filename + ".nc" ncobs = obs_filename + ".nc" filename = filename + ".tif" obs_filename = obs_filename + ".tif" write_dataset_to_netcdf( data[[MY_OBS_VAR]], global_attributes=global_attributes, variable_params={MY_OBS_VAR: { 'zlib': True }}, filename=Path(ncobs)) write_dataset_to_netcdf(data[['swir1', 'nir', 'green']], global_attributes=global_attributes, variable_params={ 'swir1': { 'zlib': True }, 'nir': { 'zlib': True }, 'green': { 'zlib': True } }, filename=Path(ncfl)) write_geotiff(filename=obs_filename, dataset=data[[MY_OBS_VAR]]) write_geotiff(filename=filename, dataset=data[['swir1', 'nir', 'green']], profile_override={'photometric': 'RGB'}) except RuntimeError as e: _log.info('File exists ', e) return else: # data['days_since_1970'] = day_arr my_data[k] = data print("computing finished and ready as dictionary in my_data ", str(datetime.now()))
def run(tile, gwf, center_dt): """Basic datapreparation recipe 001 Computes mean NDVI for a landsat collection over a given time frame Args: tile (tuple): Tuple of (tile indices, Tile object). Tile object can be loaded as xarray.Dataset using gwf.load() gwf (GridWorkflow): GridWorkflow object instantiated with the corresponding product center_dt (datetime): Date to be used in making the filename Return: str: The filename of the netcdf file created """ try: center_dt = center_dt.strftime("%Y-%m-%d") # TODO: Need a more dynamic way to handle this filename (e.g.: global variable for the path up to datacube_ingest) nc_filename = os.path.expanduser( '~/datacube_ingest/recipes/landsat_8_ndvi_mean/ndvi_mean_%d_%d_%s.nc' % (tile[0][0], tile[0][1], center_dt)) if os.path.isfile(nc_filename): raise ValueError('%s already exist' % nc_filename) # Load Landsat sr sr = gwf.load( tile[1], dask_chunks={ 'x': 1667, 'y': 1667 }, ) # Compute ndvi sr['ndvi'] = (sr.nir - sr.red) / (sr.nir + sr.red) * 10000 clear = masking.make_mask(sr.pixel_qa, clear=True) ndvi = sr.drop( ['pixel_qa', 'blue', 'red', 'green', 'nir', 'swir1', 'swir2']) ndvi_clear = ndvi.where(clear) # Run temporal reductions and rename DataArrays ndvi_mean = ndvi_clear.mean('time', keep_attrs=True) ndvi_mean['ndvi'].attrs['nodata'] = -9999 ndvi_mean_int = ndvi_mean.apply(to_int) ndvi_mean_int.attrs['crs'] = sr.attrs['crs'] write_dataset_to_netcdf(ndvi_mean_int, nc_filename, netcdfparams={'zlib': True}) return nc_filename except Exception as e: print('Tile (%d, %d) not processed. %s' % (tile[0][0], tile[0][1], e)) raise return None
def test_write_dataset_to_netcdf(tmpnetcdf_filename, odc_style_xr_dataset): write_dataset_to_netcdf(odc_style_xr_dataset, tmpnetcdf_filename, global_attributes={'foo': 'bar'}, variable_params={'B10': {'attrs': {'abc': 'xyz'}}}) with netCDF4.Dataset(tmpnetcdf_filename) as nco: nco.set_auto_mask(False) assert 'B10' in nco.variables var = nco.variables['B10'] assert (var[:] == odc_style_xr_dataset['B10'].values).all() assert 'foo' in nco.ncattrs() assert nco.getncattr('foo') == 'bar' assert 'abc' in var.ncattrs() assert var.getncattr('abc') == 'xyz'
def do_ndvi_task(config, task): global_attributes = config['global_attributes'] variable_params = config['variable_params'] file_path = Path(task['filename']) output_type = config['ndvi_dataset_type'] measurement = output_type.measurements['ndvi'] output_dtype = np.dtype(measurement['dtype']) nodata_value = np.dtype(output_dtype).type(measurement['nodata']) if file_path.exists(): raise OSError(errno.EEXIST, 'Output file already exists', str(file_path)) measurements = ['red', 'nir'] nbar_tile = task['nbar'] nbar = GridWorkflow.load(nbar_tile, measurements) ndvi = calculate_ndvi(nbar, nodata=nodata_value, dtype=output_dtype, units=measurement['units']) def _make_dataset(labels, sources): assert len(sources) geobox = nbar.geobox source_data = union_points( *[dataset.extent.to_crs(geobox.crs).points for dataset in sources]) valid_data = intersect_points(geobox.extent.points, source_data) dataset = make_dataset(product=output_type, sources=sources, extent=geobox.extent, center_time=labels['time'], uri=file_path.absolute().as_uri(), app_info=get_app_metadata(config), valid_data=GeoPolygon(valid_data, geobox.crs)) return dataset datasets = xr_apply(nbar_tile.sources, _make_dataset, dtype='O') ndvi['dataset'] = datasets_to_doc(datasets) write_dataset_to_netcdf( dataset=ndvi, filename=Path(file_path), global_attributes=global_attributes, variable_params=variable_params, ) return datasets
def write_dataset_to_storage(self, dataset, filename, global_attributes=None, variable_params=None, storage_config=None, **kwargs): # TODO: Currently ingestor copies chunking info from storage_config to # variable_params, this logic should probably happen here. write_dataset_to_netcdf(dataset, filename, global_attributes=global_attributes, variable_params=variable_params, **kwargs) return {}
def run(tile, center_dt, path): """Basic datapreparation recipe 001 Computes mean NDVI for a landsat collection over a given time frame Args: tile (tuple): Tuple of (tile indices, Tile object). Tile object can be loaded as xarray.Dataset using gwf.load() center_dt (datetime): Date to be used in making the filename path (str): Directory where files generated are to be written Return: str: The filename of the netcdf file created """ try: center_dt = center_dt.strftime("%Y-%m-%d") nc_filename = os.path.join( path, 'ndvi_mean_%d_%d_%s.nc' % (tile[0][0], tile[0][1], center_dt)) if os.path.isfile(nc_filename): logger.warning( '%s already exists. Returning filename for database indexing', nc_filename) return nc_filename # Load Landsat sr sr = GridWorkflow.load(tile[1], dask_chunks={'x': 1667, 'y': 1667}) # Compute ndvi sr['ndvi'] = (sr.nir - sr.red) / (sr.nir + sr.red) * 10000 clear = masking.make_mask(sr.pixel_qa, clear=True) ndvi = sr.drop( ['pixel_qa', 'blue', 'red', 'green', 'nir', 'swir1', 'swir2']) ndvi_clear = ndvi.where(clear) # Run temporal reductions and rename DataArrays ndvi_mean = ndvi_clear.mean('time', keep_attrs=True) ndvi_mean['ndvi'].attrs['nodata'] = -9999 ndvi_mean_int = ndvi_mean.apply(to_int) ndvi_mean_int.attrs['crs'] = sr.attrs['crs'] write_dataset_to_netcdf(ndvi_mean_int, nc_filename, netcdfparams={'zlib': True}) return nc_filename except Exception as e: logger.info('Tile (%d, %d) not processed. %s' % (tile[0][0], tile[0][1], e)) return None
def do_ndvi_task(config, task): global_attributes = config['global_attributes'] variable_params = config['variable_params'] file_path = Path(task['filename']) output_type = config['ndvi_dataset_type'] measurement = output_type.measurements['ndvi'] output_dtype = np.dtype(measurement['dtype']) nodata_value = np.dtype(output_dtype).type(measurement['nodata']) if file_path.exists(): raise OSError(errno.EEXIST, 'Output file already exists', str(file_path)) measurements = ['red', 'nir'] nbar_tile = task['nbar'] nbar = GridWorkflow.load(nbar_tile, measurements) ndvi = calculate_ndvi(nbar, nodata=nodata_value, dtype=output_dtype, units=measurement['units']) def _make_dataset(labels, sources): assert len(sources) geobox = nbar.geobox source_data = union_points(*[dataset.extent.to_crs(geobox.crs).points for dataset in sources]) valid_data = intersect_points(geobox.extent.points, source_data) dataset = make_dataset(product=output_type, sources=sources, extent=geobox.extent, center_time=labels['time'], uri=file_path.absolute().as_uri(), app_info=get_app_metadata(config), valid_data=GeoPolygon(valid_data, geobox.crs)) return dataset datasets = xr_apply(nbar_tile.sources, _make_dataset, dtype='O') ndvi['dataset'] = datasets_to_doc(datasets) write_dataset_to_netcdf( dataset=ndvi, filename=Path(file_path), global_attributes=global_attributes, variable_params=variable_params, ) return datasets
def test_write_dataset_to_netcdf(tmpnetcdf_filename): affine = Affine.scale(0.1, 0.1)*Affine.translation(20, 30) geobox = GeoBox(100, 100, affine, CRS(GEO_PROJ)) dataset = xarray.Dataset(attrs={'extent': geobox.extent, 'crs': geobox.crs}) for name, coord in geobox.coordinates.items(): dataset[name] = (name, coord.labels, {'units': coord.units}) dataset['B10'] = (geobox.dimensions, numpy.arange(10000).reshape(geobox.shape), {'nodata': 0, 'units': '1'}) write_dataset_to_netcdf(dataset, {'foo': 'bar'}, {'B10': {'attrs': {'abc': 'xyz'}}}, Path(tmpnetcdf_filename)) with netCDF4.Dataset(tmpnetcdf_filename) as nco: nco.set_auto_mask(False) assert 'B10' in nco.variables var = nco.variables['B10'] assert (var[:] == dataset['B10'].values).all() assert 'foo' in nco.ncattrs() assert nco.getncattr('foo') == 'bar' assert 'abc' in var.ncattrs() assert var.getncattr('abc') == 'xyz'
def ingest_work(config, source_type, output_type, index, sources, geobox): namemap = get_namemap(config) measurements = get_measurements(source_type, config) variable_params = get_variable_params(config) global_attributes = config['global_attributes'] with datacube.set_options(reproject_threads=1): fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')] data = Datacube.product_data(sources, geobox, measurements, fuse_func=fuse_func) nudata = data.rename(namemap) file_path = get_filename(config, index, sources) def _make_dataset(labels, sources): sources_union = union_points( *[source.extent.to_crs(geobox.crs).points for source in sources]) valid_data = intersect_points(geobox.extent.points, sources_union) dataset = make_dataset(dataset_type=output_type, sources=sources, extent=geobox.extent, center_time=labels['time'], uri=file_path.absolute().as_uri(), app_info=get_app_metadata( config, config['filename']), valid_data=GeoPolygon(valid_data, geobox.crs)) return dataset datasets = xr_apply( sources, _make_dataset, dtype='O') # Store in Dataarray to associate Time -> Dataset nudata['dataset'] = datasets_to_doc(datasets) write_dataset_to_netcdf(nudata, global_attributes, variable_params, file_path) return datasets
def do_fc_task(config, task): global_attributes = config['global_attributes'] variable_params = config['variable_params'] file_path = Path(task['filename']) output_product = config['fc_product'] if file_path.exists(): raise OSError(errno.EEXIST, 'Output file already exists', str(file_path)) nbar_tile: Tile = task['nbar'] nbar = GridWorkflow.load(nbar_tile, ['green', 'red', 'nir', 'swir1', 'swir2']) output_measurements = config['fc_product'].measurements.values() fc_dataset = make_fc_tile(nbar, output_measurements, config.get('sensor_regression_coefficients')) def _make_dataset(labels, sources): assert sources dataset = make_dataset(product=output_product, sources=sources, extent=nbar.geobox.extent, center_time=labels['time'], uri=file_path.absolute().as_uri(), app_info=get_app_metadata(config), valid_data=GeoPolygon.from_sources_extents(sources, nbar.geobox)) return dataset datasets = xr_apply(nbar_tile.sources, _make_dataset, dtype='O') fc_dataset['dataset'] = datasets_to_doc(datasets) write_dataset_to_netcdf( dataset=fc_dataset, filename=file_path, global_attributes=global_attributes, variable_params=variable_params, ) return datasets
sr_max = sr_dask.max('time', keep_attrs=True, dtype=np.int16, skipna=True) sr_max.rename( { 'blue': 'blue_max', 'green': 'green_max', 'red': 'red_max', 'nir': 'nir_max', 'swir1': 'swir1_max', 'swir2': 'swir2_max', 'ndvi': 'ndvi_max' }, inplace=True) sr_std = sr_dask.std('time', keep_attrs=True, dtype=np.int16, skipna=True) sr_std.rename( { 'blue': 'blue_std', 'green': 'green_std', 'red': 'red_std', 'nir': 'nir_std', 'swir1': 'swir1_std', 'swir2': 'swir2_std', 'ndvi': 'ndvi_std' }, inplace=True) # Merge dataarrays combined = xr.merge([sr_mean, sr_min, sr_max, sr_std]) combined.attrs['crs'] = sr_dask_0.attrs['crs'] print(combined) # with ProgressBar(): write_dataset_to_netcdf(combined, '/tmp/sr_reduced.nc')
print(lon_range, lat_range) print(crs) for platform in platform_list: product_name = '{}_{}_albers'.format(platform, product_type) print('Loading product: {}'.format(product_name)) output_file = '/g/data/u46/users/dra547/erf_07_09_2013_' + product_name + '.cdf' print(output_file) dataset = dc.load(product=product_name, x=lon_range, y=lat_range, time=(acq_min, acq_max), group_by='solar_day', crs=crs, measurements=measurements_list) # Load PQ Mask mask_product = '{}_{}_albers'.format(platform, 'pq') sensor_pq = dc.load(product=mask_product, group_by='solar_day', fuse_func=ga_pq_fuser, like=dataset) cloud_free = make_mask(sensor_pq.pixelquality, ga_good_pixel=True) dataset = dataset.where(cloud_free).fillna(-999).astype('int16') dataset.attrs[ 'crs'] = sensor_pq.crs # Temporarily required until xarray issue #1009 gets into a release print(dataset) write_dataset_to_netcdf(dataset, output_file)
def test_netcdf_source(tmpnetcdf_filename): affine = Affine.scale(0.1, 0.1) * Affine.translation(20, 30) geobox = geometry.GeoBox(110, 100, affine, geometry.CRS(GEO_PROJ)) dataset = xarray.Dataset(attrs={ 'extent': geobox.extent, 'crs': geobox.crs }) for name, coord in geobox.coordinates.items(): dataset[name] = (name, coord.values, { 'units': coord.units, 'crs': geobox.crs }) dataset['B10'] = (geobox.dimensions, numpy.arange(11000, dtype='int16').reshape(geobox.shape), { 'nodata': 0, 'units': '1', 'crs': geobox.crs }) write_dataset_to_netcdf(dataset, tmpnetcdf_filename, global_attributes={'foo': 'bar'}, variable_params={'B10': { 'attrs': { 'abc': 'xyz' } }}) with netCDF4.Dataset(tmpnetcdf_filename) as nco: nco.set_auto_mask(False) source = NetCDFDataSource(nco, 'B10') assert source.crs == geobox.crs assert source.transform.almost_equals(affine) assert (source.read() == dataset['B10']).all() dest = numpy.empty((60, 50)) source.reproject(dest, affine, geobox.crs, 0, Resampling.nearest) assert (dest == dataset['B10'][:60, :50]).all() source.reproject(dest, affine * Affine.translation(10, 10), geobox.crs, 0, Resampling.nearest) assert (dest == dataset['B10'][10:70, 10:60]).all() source.reproject(dest, affine * Affine.translation(-10, -10), geobox.crs, 0, Resampling.nearest) assert (dest[10:, 10:] == dataset['B10'][:50, :40]).all() dest = numpy.empty((200, 200)) source.reproject(dest, affine, geobox.crs, 0, Resampling.nearest) assert (dest[:100, :110] == dataset['B10']).all() source.reproject(dest, affine * Affine.translation(10, 10), geobox.crs, 0, Resampling.nearest) assert (dest[:90, :100] == dataset['B10'][10:, 10:]).all() source.reproject(dest, affine * Affine.translation(-10, -10), geobox.crs, 0, Resampling.nearest) assert (dest[10:110, 10:120] == dataset['B10']).all() source.reproject(dest, affine * Affine.scale(2, 2), geobox.crs, 0, Resampling.nearest) assert (dest[:50, :55] == dataset['B10'][1::2, 1::2]).all() source.reproject( dest, affine * Affine.scale(2, 2) * Affine.translation(10, 10), geobox.crs, 0, Resampling.nearest) assert (dest[:40, :45] == dataset['B10'][21::2, 21::2]).all() source.reproject( dest, affine * Affine.scale(2, 2) * Affine.translation(-10, -10), geobox.crs, 0, Resampling.nearest) assert (dest[10:60, 10:65] == dataset['B10'][1::2, 1::2]).all()
import os from datacube.index.postgres._connections import PostgresDb from datacube.index._api import Index from datacube.api import GridWorkflow from datacube.storage.storage import write_dataset_to_netcdf from pprint import pprint import numpy nc_filename = os.path.expanduser( '~/datacube_ingest/recipes/ndvi_mean/ndvi_mean_%d_%d_%s.nc' % (12, -16, '1987')) db = PostgresDb.from_config() i = Index(db) gwf = GridWorkflow(i, product='ls8_espa_mexico') cells_list = gwf.list_cells(product='ls8_espa_mexico', x=(-106, -101), y=(19, 23)) sr = gwf.load(cells_list[(12, -16)], dask_chunks={'x': 1000, 'y': 1000}) sr['ndvi'] = (sr.nir - sr.red) / (sr.nir + sr.red) * 10000 ndvi = sr.drop(['pixel_qa', 'blue', 'red', 'green', 'nir', 'swir1', 'swir2']) # Run temporal reductions and rename DataArrays ndvi_mean = ndvi.mean('time', keep_attrs=True) ndvi_mean = ndvi_mean.astype('int16') ndvi_mean.attrs['crs'] = sr.attrs['crs'] write_dataset_to_netcdf(ndvi_mean, nc_filename) print(nc_filename)
def write_dataset_to_storage(self, dataset, *args, **kargs): """See :meth:`datacube.drivers.driver.write_dataset_to_storage` """ return write_dataset_to_netcdf(dataset, *args, **kargs)
'nir': 'nir_min', 'swir1': 'swir1_min', 'swir2': 'swir2_min', 'ndvi': 'ndvi_min'}, inplace=True) sr_max = sr.max('time', keep_attrs=True, dtype=np.int16, skipna=True) sr_max.rename({'blue': 'blue_max', 'green': 'green_max', 'red': 'red_max', 'nir': 'nir_max', 'swir1': 'swir1_max', 'swir2': 'swir2_max', 'ndvi': 'ndvi_max'}, inplace=True) sr_std = sr.std('time', keep_attrs=True, dtype=np.int16, skipna=True) sr_std.rename({'blue': 'blue_std', 'green': 'green_std', 'red': 'red_std', 'nir': 'nir_std', 'swir1': 'swir1_std', 'swir2': 'swir2_std', 'ndvi': 'ndvi_std'}, inplace=True) combined = xr.merge([sr_mean, sr_min, sr_max, sr_std]) combined.attrs['crs'] = sr.attrs['crs'] print(combined) write_dataset_to_netcdf(combined, sr_out_dc_dask) time_end = time.time() timing.append(time_end - time_begin) # summary for id, t in enumerate(timing): print('Test %d completed in %.1f seconds' % (id, t))
inplace=True) sr_max = sr_clear2.max('time', keep_attrs=True) sr_max.rename( { 'blue': 'blue_max', 'green': 'green_max', 'red': 'red_max', 'nir': 'nir_max', 'swir1': 'swir1_max', 'swir2': 'swir2_max', 'ndvi': 'ndvi_max' }, inplace=True) sr_std = sr_clear2.std('time', keep_attrs=True) sr_std.rename( { 'blue': 'blue_std', 'green': 'green_std', 'red': 'red_std', 'nir': 'nir_std', 'swir1': 'swir1_std', 'swir2': 'swir2_std', 'ndvi': 'ndvi_std' }, inplace=True) combined = xr.merge([sr_mean, sr_min, sr_max, sr_std, terrain]) combined.attrs['crs'] = sr.attrs['crs'] print(combined) with ProgressBar(): write_dataset_to_netcdf(combined, nc_file)
def create_latest_images(data_info, period, odir): for k, v in data_info.items(): data = data_info[k] day_arr = np.zeros([data.blue.shape[1], data.blue.shape[2]], dtype=np.int16) stored_band = np.zeros((6, 4000, 4000), dtype=np.int16) dt_list = data_info[k].time.values.astype('M8[D]').astype('O').tolist() print("looking latest pixels for ", ','.join([dt.strftime('%Y-%m-%d') for dt in dt_list])) for index, dt in enumerate( data_info[k].time.values.astype('M8[D]').astype('O').tolist()): ds = data_info[k].isel(time=index) days = (dt - MY_REF_DATE).days day_arr = update_latest_pixel(index, days, ds, day_arr, stored_band) data = data.isel(time=0).drop('time') for count, band in enumerate([ data.blue, data.green, data.red, data.nir, data.swir1, data.swir2 ]): band.data = stored_band[count] band.data[band.data == 0] = -999 day_arr[day_arr == 0] = -999 day_arr = xr.DataArray(day_arr, coords=data.coords, dims=['y', 'x']) data['days_since_1970'] = day_arr my_data[k] = data if odir: global_attributes = {} global_attributes = dict( Comment1='Data acquired on ' + ','.join([dt.strftime('%Y-%m-%d') for dt in dt_list])) FILE_LOC = odir filename = FILE_LOC + '/' + 'LATEST_PIXEL_' + ''.join(map( str, k)) + "_" + str(datetime.now().date( )) + "_CLOUD_FREE_LAST_" + str(period) + "_DAYS.nc" try: write_dataset_to_netcdf(data, global_attributes=global_attributes, variable_params={ 'blue': { 'zlib': True }, 'green': { 'zlib': True }, 'red': { 'zlib': True }, 'nir': { 'zlib': True }, 'swir1': { 'zlib': True }, 'swir2': { 'zlib': True }, 'days_since_1970': { 'zlib': True } }, filename=Path(filename)) except RuntimeError as e: print(e) return print("Written onto " + filename) _log.info('Data written onto %s', filename) else: print("computing finished and ready as dictionary in my_data ", str(datetime.now()))
def create_latest_images(data_info, duration, odir): for k, v in data_info.iteritems(): data = data_info[k] day_arr = np.zeros([data.swir1.shape[1], data.swir1.shape[2]], dtype=np.int16) stored_band = np.zeros((6, 4000, 4000), dtype=np.int16) dt_list = list() dt_tmp_list = data_info[k].time.values.astype('M8[D]').astype( 'O').tolist() print("looking latest pixels for ", ','.join([dt.strftime('%Y-%m-%d') for dt in dt_tmp_list])) for index, dt in enumerate( data_info[k].time.values.astype('M8[D]').astype('O').tolist()): ds = data_info[k].isel(time=index) days = (dt - MY_REF_DATE).days print("count of zero pixel", str(np.count_nonzero(stored_band[0] == 0))) if np.count_nonzero(stored_band[0] == 0) > 0: day_arr = update_latest_pixel(index, days, ds, day_arr, stored_band) dt_list.append(dt) else: break print("The dates added are ", ','.join([dt.strftime('%Y-%m-%d') for dt in dt_list])) print("time delta for cell " + str(k) + str(len(dt_tmp_list) - len(dt_list))) data = data.isel(time=0).drop('time') for count, band in enumerate([data.swir1, data.nir, data.green]): band.data = stored_band[count] band.data[band.data == 0] = -999 day_arr[day_arr == 0] = -999 day_arr = xr.DataArray(day_arr, coords=data.coords, dims=['y', 'x']) my_data[k] = data if len(odir) > 0: day_ds = day_arr.to_dataset(name='days_since_1970') day_ds.attrs = data.attrs global_attributes = {} global_attributes = dict( Comment1='Data observed on ' + ','.join([dt.strftime('%Y-%m-%d') for dt in dt_list])) filename = odir + '/' + 'LATEST_PIXEL_' + ''.join(map(str, k)) \ + "_CLOUD_FREE_LAST_" + str(period) + "_DAYS" obs_filename = odir + '/' + 'LATEST_PIXEL_' + ''.join(map(str, k)) \ + "_CLOUD_FREE_LAST_" + str(period) + "_DAYS_OBS" try: ncfl = filename + ".nc" ncobs = obs_filename + ".nc" write_dataset_to_netcdf(data, global_attributes=global_attributes, variable_params={ 'swir1': { 'zlib': True }, 'nir': { 'zlib': True }, 'green': { 'zlib': True } }, filename=Path(ncfl)) write_dataset_to_netcdf( day_ds, global_attributes=global_attributes, variable_params={'days_since_1970': { 'zlib': True }}, filename=Path(ncobs)) filename = filename + ".tif" obs_filename = obs_filename + ".tif" write_geotiff(filename=filename, dataset=data, profile_override={'photometric': 'RGB'}) write_geotiff(filename=obs_filename, dataset=day_ds) except RuntimeError as e: _log.info('File exists ', e) return else: data['days_since_1970'] = day_arr my_data[k] = data print("computing finished and ready as dictionary in my_data ", str(datetime.now()))
def run(tile, center_dt, path): """Basic datapreparation recipe 001 Combines temporal statistics of surface reflectance and ndvi with terrain metrics Args: tile (tuple): Tuple of (tile indices, Tile object). Tile object can be loaded as xarray.Dataset using gwf.load() center_dt (datetime): Date to be used in making the filename path (str): Directory where files generated are to be written Return: str: The filename of the netcdf file created """ try: center_dt = center_dt.strftime("%Y-%m-%d") nc_filename = os.path.join(path, 's2_20m_001_%d_%d_%s.nc' % (tile[0][0], tile[0][1], center_dt)) # Load Landsat sr if os.path.isfile(nc_filename): logger.warning('%s already exists. Returning filename for database indexing', nc_filename) return nc_filename sr_0 = GridWorkflow.load(tile[1], dask_chunks={'x': 1000, 'y': 1000}) sr_0 = sr_0.apply(func=to_float, keep_attrs=True) # Load terrain metrics using same spatial parameters than sr dc = datacube.Datacube(app = 's2_20m_001_%s' % randomword(5)) terrain = dc.load(product='srtm_cgiar_mexico', like=sr_0, time=(datetime(1970, 1, 1), datetime(2018, 1, 1)), dask_chunks={'x': 1000, 'y': 1000}) dc.close() # Keep clear pixels (2: Dark features, 4: Vegetation, 5: Not vegetated, # 6: Water, 7: Unclassified, 11: Snow/Ice) sr_1 = sr_0.where(sr_0.pixel_qa.isin([2,4,5,6,7,8,11])) sr_1 = sr_1.drop('pixel_qa') # Compute ndvi sr_1['ndvi'] = ((sr_1.nir - sr_1.red) / (sr_1.nir + sr_1.red)) * 10000 sr_1['ndvi'].attrs['nodata'] = 0 # Compute ndmi sr_1['ndmi'] = ((sr_1.nir - sr_1.swir1) / (sr_1.nir + sr_1.swir1)) * 10000 sr_1['ndmi'].attrs['nodata'] = 0 # Run temporal reductions and rename DataArrays sr_mean = sr_1.mean('time', keep_attrs=True, skipna=True) sr_mean.rename({'blue': 'blue_mean', 'green': 'green_mean', 'red': 'red_mean', 're1': 're1_mean', 're2': 're2_mean', 're3': 're3_mean', 'nir': 'nir_mean', 'swir1': 'swir1_mean', 'swir2': 'swir2_mean', 'ndmi': 'ndmi_mean', 'ndvi': 'ndvi_mean'}, inplace=True) # Compute min/max/std only for vegetation indices ndvi_max = sr_1.ndvi.max('time', keep_attrs=True, skipna=True) ndvi_max = ndvi_max.rename('ndvi_max') ndvi_max.attrs['nodata'] = 0 ndvi_min = sr_1.ndvi.min('time', keep_attrs=True, skipna=True) ndvi_min = ndvi_min.rename('ndvi_min') ndvi_min.attrs['nodata'] = 0 # ndmi ndmi_max = sr_1.ndmi.max('time', keep_attrs=True, skipna=True) ndmi_max = ndmi_max.rename('ndmi_max') ndmi_max.attrs['nodata'] = 0 ndmi_min = sr_1.ndmi.min('time', keep_attrs=True, skipna=True) ndmi_min = ndmi_min.rename('ndmi_min') ndmi_min.attrs['nodata'] = 0 # Merge dataarrays combined = xr.merge([sr_mean.apply(to_int), to_int(ndvi_max), to_int(ndvi_min), to_int(ndmi_max), to_int(ndmi_min), terrain]) combined.attrs['crs'] = sr_0.attrs['crs'] combined = combined.compute() write_dataset_to_netcdf(combined, nc_filename) return nc_filename except Exception as e: logger.warning('Tile (%d, %d) not processed. %s' % (tile[0][0], tile[0][1], e)) return None
def write_product(data, sources, output_prod_info, global_attrs, var_params, path): nudata, nudatasets = generate_dataset(data, sources, output_prod_info, path.absolute().as_uri()) write_dataset_to_netcdf(nudata, global_attrs, var_params, path) return nudatasets
def run(tile, center_dt, path): """Basic datapreparation recipe 001 Combines temporal statistics of surface reflectance and ndvi with terrain metrics Args: tile (tuple): Tuple of (tile indices, Tile object). Tile object can be loaded as xarray.Dataset using gwf.load() center_dt (datetime): Date to be used in making the filename path (str): Directory where files generated are to be written Return: str: The filename of the netcdf file created """ try: center_dt = center_dt.strftime("%Y-%m-%d") nc_filename = os.path.join( path, 'madmex_001_%d_%d_%s.nc' % (tile[0][0], tile[0][1], center_dt)) # Load Landsat sr if os.path.isfile(nc_filename): logger.warning( '%s already exists. Returning filename for database indexing', nc_filename) return nc_filename sr_0 = GridWorkflow.load(tile[1], dask_chunks={'x': 1667, 'y': 1667}) # Load terrain metrics using same spatial parameters than sr dc = datacube.Datacube(app='landsat_madmex_001_%s' % randomword(5)) terrain = dc.load(product='srtm_cgiar_mexico', like=sr_0, time=(datetime(1970, 1, 1), datetime(2018, 1, 1)), dask_chunks={ 'x': 1667, 'y': 1667 }) dc.close() # Mask clouds, shadow, water, ice,... and drop qa layer clear = masking.make_mask(sr_0.pixel_qa, cloud=False, cloud_shadow=False, snow=False) sr_1 = sr_0.where(clear) sr_2 = sr_1.drop('pixel_qa') # Convert Landsat data to float (nodata values are converted to np.Nan) sr_3 = sr_2.apply(func=to_float, keep_attrs=True) # Compute ndvi sr_3['ndvi'] = ((sr_3.nir - sr_3.red) / (sr_3.nir + sr_3.red)) * 10000 sr_3['ndvi'].attrs['nodata'] = -9999 # Run temporal reductions and rename DataArrays sr_mean = sr_3.mean('time', keep_attrs=True, skipna=True) sr_mean.rename( { 'blue': 'blue_mean', 'green': 'green_mean', 'red': 'red_mean', 'nir': 'nir_mean', 'swir1': 'swir1_mean', 'swir2': 'swir2_mean', 'ndvi': 'ndvi_mean' }, inplace=True) sr_min = sr_3.min('time', keep_attrs=True, skipna=True) sr_min.rename( { 'blue': 'blue_min', 'green': 'green_min', 'red': 'red_min', 'nir': 'nir_min', 'swir1': 'swir1_min', 'swir2': 'swir2_min', 'ndvi': 'ndvi_min' }, inplace=True) sr_max = sr_3.max('time', keep_attrs=True, skipna=True) sr_max.rename( { 'blue': 'blue_max', 'green': 'green_max', 'red': 'red_max', 'nir': 'nir_max', 'swir1': 'swir1_max', 'swir2': 'swir2_max', 'ndvi': 'ndvi_max' }, inplace=True) sr_std = sr_3.std('time', keep_attrs=True, skipna=True) sr_std.rename( { 'blue': 'blue_std', 'green': 'green_std', 'red': 'red_std', 'nir': 'nir_std', 'swir1': 'swir1_std', 'swir2': 'swir2_std', 'ndvi': 'ndvi_std' }, inplace=True) # Merge dataarrays combined = xr.merge([ sr_mean.apply(to_int), sr_min.apply(to_int), sr_max.apply(to_int), sr_std.apply(to_int), terrain ]) combined.attrs['crs'] = sr_0.attrs['crs'] write_dataset_to_netcdf(combined, nc_filename) return nc_filename except Exception as e: logger.warning('Tile (%d, %d) not processed. %s' % (tile[0][0], tile[0][1], e)) return None