def test_cog_no_crs(tmpdir, with_dask): pp = Path(str(tmpdir)) xx, ds = gen_test_data(pp, dask=with_dask) del xx.attrs['crs'] for dim in xx.dims: del xx[dim].attrs['crs'] with pytest.raises(ValueError): write_cog(xx, ":mem:") with pytest.raises(ValueError): to_cog(xx)
def output_ds_to_cog(bandsets, outbandnames, dirc, loc_str, s2_ds): """ Description: This function outputs a set of dataarrays in a dataset as cloud-optimised GeoTIFF files Parameters: bandsets: list of string specified the names of the dataarray in the dataset to be saved dirc: string the directory where the image files are save loc_str: string the name of location where the image data are from, the string will be part of the file names s2_ds: Xarray dataset Object the dataset contains the dataarrays Return: None """ # Create a list of date strings from the time series of time in the dataset timebandnames = get_timebandnames(s2_ds) for bandname, outputname in zip(bandsets, outbandnames): banddata = s2_ds[bandname] for i in range(len(s2_ds.time)): # date of the satellite image as part of the name of the GeoTIFF datestr = timebandnames[i] # Convert current time step into a `xarray.DataArray` singletimestamp_da = banddata[i] # Create output filename filename = dirc + "/" + loc_str + "_" + outputname + "_" + datestr + ".tif" # Write GeoTIFF write_cog(geo_im=singletimestamp_da, fname=filename, nodata=255, overwrite=True)
def test_cog_file(tmpdir, opts): pp = Path(str(tmpdir)) xx, ds = gen_test_data(pp) # write to file ff = write_cog(xx, pp / "cog.tif", **opts) assert isinstance(ff, Path) assert ff == pp / "cog.tif" assert ff.exists() yy = rio_slurp_xarray(pp / "cog.tif") np.testing.assert_array_equal(yy.values, xx.values) assert yy.geobox == xx.geobox assert yy.nodata == xx.nodata _write_cog(np.stack([xx.values, xx.values]), xx.geobox, pp / "cog-2-bands.tif", overview_levels=[], **opts) yy, mm = rio_slurp(pp / "cog-2-bands.tif") assert mm.gbox == xx.geobox assert yy.shape == (2, *xx.shape) np.testing.assert_array_equal(yy[0], xx.values) np.testing.assert_array_equal(yy[1], xx.values) with pytest.raises(ValueError, match="Need 2d or 3d ndarray on input"): _write_cog(xx.values.ravel(), xx.geobox, pp / "wontwrite.tif") # sizes that are not multiples of 16 # also check that supplying `nodata=` doesn't break things xx_odd = xx[:23, :63] ff = write_cog(xx_odd, pp / "cog_odd.tif", nodata=xx_odd.attrs["nodata"], **opts) assert isinstance(ff, Path) assert ff == pp / "cog_odd.tif" assert ff.exists() yy = rio_slurp_xarray(pp / "cog_odd.tif") np.testing.assert_array_equal(yy.values, xx_odd.values) assert yy.geobox == xx_odd.geobox assert yy.nodata == xx_odd.nodata with pytest.warns(UserWarning): write_cog(xx, pp / "cog_badblocksize.tif", blocksize=50)
def test_cog_rgba(tmpdir, use_windowed_writes): pp = Path(str(tmpdir)) xx, ds = gen_test_data(pp) pix = np.dstack([xx.values] * 4) rgba = xr.DataArray(pix, attrs=xx.attrs, dims=("y", "x", "band"), coords=xx.coords) assert rgba.geobox == xx.geobox assert rgba.shape[:2] == rgba.geobox.shape ff = write_cog(rgba, pp / "cog.tif", use_windowed_writes=use_windowed_writes) yy = rio_slurp_xarray(ff) assert yy.geobox == rgba.geobox assert yy.shape == rgba.shape np.testing.assert_array_equal(yy.values, rgba.values) with pytest.raises(ValueError): _write_cog( rgba.values[1:, :, :], rgba.geobox, ":mem:", use_windowed_writes=use_windowed_writes, )
def test_cog_mem_dask(tmpdir): pp = Path(str(tmpdir)) xx, ds = gen_test_data(pp, dask=True) # write to memory 1 bb = write_cog(xx, ":mem:") assert isinstance(bb, Delayed) bb = bb.compute() assert isinstance(bb, bytes) path = pp / "cog1.tiff" with open(str(path), "wb") as f: f.write(bb) yy = rio_slurp_xarray(path) np.testing.assert_array_equal(yy.values, xx.values) assert yy.geobox == xx.geobox assert yy.nodata == xx.nodata # write to memory 2 bb = to_cog(xx) assert isinstance(bb, Delayed) bb = bb.compute() assert isinstance(bb, bytes) path = pp / "cog2.tiff" with open(str(path), "wb") as f: f.write(bb) yy = rio_slurp_xarray(path) np.testing.assert_array_equal(yy.values, xx.values) assert yy.geobox == xx.geobox assert yy.nodata == xx.nodata
def test_cog_file(tmpdir): pp = Path(str(tmpdir)) xx, ds = gen_test_data(pp) # write to file ff = write_cog(xx, pp / "cog.tif") assert isinstance(ff, Path) assert ff == pp / "cog.tif" assert ff.exists() yy = rio_slurp_xarray(pp / "cog.tif") np.testing.assert_array_equal(yy.values, xx.values) assert yy.geobox == xx.geobox assert yy.nodata == xx.nodata _write_cog(np.stack([xx.values, xx.values]), xx.geobox, pp / "cog-2-bands.tif", overview_levels=[]) yy, mm = rio_slurp(pp / "cog-2-bands.tif") assert mm.gbox == xx.geobox assert yy.shape == (2, *xx.shape) np.testing.assert_array_equal(yy[0], xx.values) np.testing.assert_array_equal(yy[1], xx.values) with pytest.raises(ValueError, match="Need 2d or 3d ndarray on input"): _write_cog(xx.values.ravel(), xx.geobox, pp / "wontwrite.tif")
def test_cog_file_dask(tmpdir): pp = Path(str(tmpdir)) xx, ds = gen_test_data(pp, dask=True) assert dask.is_dask_collection(xx) path = pp / "cog.tif" ff = write_cog(xx, path, overview_levels=[2, 4]) assert isinstance(ff, Delayed) assert path.exists() is False assert ff.compute() == path assert path.exists() yy = rio_slurp_xarray(pp / "cog.tif") np.testing.assert_array_equal(yy.values, xx.values) assert yy.geobox == xx.geobox assert yy.nodata == xx.nodata
def test_cog_rgba(tmpdir): pp = Path(str(tmpdir)) xx, ds = gen_test_data(pp) pix = np.dstack([xx.values] * 4) rgba = xr.DataArray(pix, attrs=xx.attrs, dims=('y', 'x', 'band'), coords=xx.coords) assert(rgba.geobox == xx.geobox) assert(rgba.shape[:2] == rgba.geobox.shape) ff = write_cog(rgba, pp / "cog.tif") yy = rio_slurp_xarray(ff) assert(yy.geobox == rgba.geobox) assert(yy.shape == rgba.shape) np.testing.assert_array_equal(yy.values, rgba.values) with pytest.raises(ValueError): _write_cog(rgba.values[1:, :, :], rgba.geobox, ':mem:')
def test_cog_mem(tmpdir, shape): pp = Path(str(tmpdir)) xx, ds = gen_test_data(pp, shape=shape) # write to memory 1 bb = write_cog(xx, ":mem:") assert isinstance(bb, bytes) path = pp / "cog1.tiff" with open(str(path), "wb") as f: f.write(bb) yy = rio_slurp_xarray(path) np.testing.assert_array_equal(yy.values, xx.values) assert yy.geobox == xx.geobox assert yy.nodata == xx.nodata # write to memory 2 bb = to_cog(xx) assert isinstance(bb, bytes) path = pp / "cog2.tiff" with open(str(path), "wb") as f: f.write(bb) yy = rio_slurp_xarray(path) np.testing.assert_array_equal(yy.values, xx.values) assert yy.geobox == xx.geobox assert yy.nodata == xx.nodata # write to memory 3 -- no overviews bb = to_cog(xx, overview_levels=[]) assert isinstance(bb, bytes) path = pp / "cog3.tiff" with open(str(path), "wb") as f: f.write(bb) yy = rio_slurp_xarray(path) np.testing.assert_array_equal(yy.values, xx.values) assert yy.geobox == xx.geobox assert yy.nodata == xx.nodata
def rgb_task2(item): dc = Datacube(config="datacube.conf") product = "ls8_level1_usgs" time = item["properties"]["datetime"].split("T")[0] x = (item["bbox"][0], item["bbox"][2]) y = (item["bbox"][1], item["bbox"][3]) measurements = ["B2"] ds = dc.load(product=product, measurements=measurements, time=time, x=x, y=y, output_crs='EPSG:4326', resolution=(-0.001, 0.001)) suffix = '_'.join(measurements) filename = f'{item["id"]}_{suffix}.tif' path = write_cog( ds.to_array(), Path('/static') / filename, ) return {"success": True, "url": str(path)}
meta_d = LCCS.copy() ##.squeeze().drop('time') seg = felzenszwalb(LCCS.data.transpose()) #seg = quickshift(LCCS.data.transpose(), kernel_size=3, convert2lab=False, max_dist=10, ratio=0.5) print('seg shape', seg.shape) data_seg_med = scipy.ndimage.median(input=LCCS.data.transpose(), labels=seg, index=seg) #data_seg_med = data_seg_med.squeeze("time").drop("time") print("seg_med shape", data_seg_med.shape) out = xr.DataArray(data=data_seg_med.transpose(), dims=meta_d.dims, coords=meta_d.coords, attrs=meta_d.attrs) print(out) name = 'lccs_l4_2015_seg' write_cog(out, fname=f'{name}.tif', overwrite=True) # Create list of labels that do not meet the desired shape requirements frac_dict = {} # rect_dict = {} # solidity_dict = {} # form_dict = {} # labels = [] #for region in regionprops(seg): # if region.area > 1: # fractal_dimension = 2 * np.log(region.perimeter / 4) / np.log(region.area) # rectangularity = region.area / (region.major_axis_length * region.minor_axis_length) # solidity = region.convex_area / region.area # form = (4 * np.pi * region.area) / np.square(region.perimeter)
def download_cci_lc(year: str, s3_dst: str, workdir: str, overwrite: bool = False): log = setup_logging() assets = {} cci_lc_version = get_version_from_year(year) name = f"{PRODUCT_NAME}_{year}_{cci_lc_version}" out_cog = URL(s3_dst) / year / f"{name}.tif" out_stac = URL(s3_dst) / year / f"{name}.stac-item.json" if s3_head_object(str(out_stac)) is not None and not overwrite: log.info(f"{out_stac} exists, skipping") return workdir = Path(workdir) if not workdir.exists(): workdir.mkdir(parents=True, exist_ok=True) # Create a temporary directory to work with tmpdir = mkdtemp(prefix=str(f"{workdir}/")) log.info(f"Working on {year} in the path {tmpdir}") if s3_head_object(str(out_cog)) is None or overwrite: log.info(f"Downloading {year}") try: local_file = Path(tmpdir) / f"{name}.zip" if not local_file.exists(): # Download the file c = cdsapi.Client() # We could also retrieve the object metadata from the CDS. # e.g. f = c.retrieve("series",{params}) | f.location = URL to download c.retrieve( "satellite-land-cover", { "format": "zip", "variable": "all", "version": cci_lc_version, "year": str(year), }, local_file, ) log.info(f"Downloaded file to {local_file}") else: log.info( f"File {local_file} exists, continuing without downloading" ) # Unzip the file log.info(f"Unzipping {local_file}") unzipped = None with zipfile.ZipFile(local_file, "r") as zip_ref: unzipped = local_file.parent / zip_ref.namelist()[0] zip_ref.extractall(tmpdir) # Process data ds = xr.open_dataset(unzipped) # Subset to Africa ulx, uly, lrx, lry = AFRICA_BBOX # Note: lats are upside down! ds_small = ds.sel(lat=slice(uly, lry), lon=slice(ulx, lrx)) ds_small = assign_crs(ds_small, crs="epsg:4326") # Create cog (in memory - :mem: returns bytes object) mem_dst = write_cog( ds_small.lccs_class, ":mem:", nodata=0, overview_resampling="nearest", ) # Write to s3 s3_dump(mem_dst, str(out_cog), ACL="bucket-owner-full-control") log.info(f"File written to {out_cog}") except Exception: log.exception(f"Failed to process {name}") exit(1) else: log.info(f"{out_cog} exists, skipping") assets["classification"] = pystac.Asset(href=str(out_cog), roles=["data"], media_type=pystac.MediaType.COG) # Write STAC document source_doc = ( "https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover" ) item = create_stac_item( str(out_cog), id=str( odc_uuid("Copernicus Land Cover", cci_lc_version, [source_doc, name])), assets=assets, with_proj=True, properties={ "odc:product": PRODUCT_NAME, "start_datetime": f"{year}-01-01T00:00:00Z", "end_datetime": f"{year}-12-31T23:59:59Z", }, ) item.add_links([ pystac.Link( target=source_doc, title="Source", rel=pystac.RelType.DERIVED_FROM, media_type="text/html", ) ]) s3_dump( json.dumps(item.to_dict(), indent=2), str(out_stac), ContentType="application/json", ACL="bucket-owner-full-control", ) log.info(f"STAC written to {out_stac}")
def xr_rasterize(gdf, da, attribute_col=False, crs=None, transform=None, name=None, x_dim='x', y_dim='y', export_tiff=None, **rasterio_kwargs): """ Rasterizes a geopandas.GeoDataFrame into an xarray.DataArray. Parameters ---------- gdf : geopandas.GeoDataFrame A geopandas.GeoDataFrame object containing the vector/shapefile data you want to rasterise. da : xarray.DataArray or xarray.Dataset The shape, coordinates, dimensions, and transform of this object are used to build the rasterized shapefile. It effectively provides a template. The attributes of this object are also appended to the output xarray.DataArray. attribute_col : string, optional Name of the attribute column in the geodataframe that the pixels in the raster will contain. If set to False, output will be a boolean array of 1's and 0's. crs : str, optional CRS metadata to add to the output xarray. e.g. 'epsg:3577'. The function will attempt get this info from the input GeoDataFrame first. transform : affine.Affine object, optional An affine.Affine object (e.g. `from affine import Affine; Affine(30.0, 0.0, 548040.0, 0.0, -30.0, "6886890.0) giving the affine transformation used to convert raster coordinates (e.g. [0, 0]) to geographic coordinates. If none is provided, the function will attempt to obtain an affine transformation from the xarray object (e.g. either at `da.transform` or `da.geobox.transform`). x_dim : str, optional An optional string allowing you to override the xarray dimension used for x coordinates. Defaults to 'x'. Useful, for example, if x and y dims instead called 'lat' and 'lon'. y_dim : str, optional An optional string allowing you to override the xarray dimension used for y coordinates. Defaults to 'y'. Useful, for example, if x and y dims instead called 'lat' and 'lon'. export_tiff: str, optional If a filepath is provided (e.g 'output/output.tif'), will export a geotiff file. A named array is required for this operation, if one is not supplied by the user a default name, 'data', is used **rasterio_kwargs : A set of keyword arguments to rasterio.features.rasterize Can include: 'all_touched', 'merge_alg', 'dtype'. Returns ------- xarr : xarray.DataArray """ # Check for a crs object try: crs = da.geobox.crs except: try: crs = da.crs except: if crs is None: raise Exception( "Please add a `crs` attribute to the " "xarray.DataArray, or provide a CRS using the " "function's `crs` parameter (e.g. crs='EPSG:3577')") # Check if transform is provided as a xarray.DataArray method. # If not, require supplied Affine if transform is None: try: # First, try to take transform info from geobox transform = da.geobox.transform # If no geobox except: try: # Try getting transform from 'transform' attribute transform = da.transform except: # If neither of those options work, raise an exception telling the # user to provide a transform raise Exception( "Please provide an Affine transform object using the " "`transform` parameter (e.g. `from affine import " "Affine; Affine(30.0, 0.0, 548040.0, 0.0, -30.0, " "6886890.0)`") # Grab the 2D dims (not time) try: dims = da.geobox.dims except: dims = y_dim, x_dim # Coords xy_coords = [da[dims[0]], da[dims[1]]] # Shape try: y, x = da.geobox.shape except: y, x = len(xy_coords[0]), len(xy_coords[1]) # Reproject shapefile to match CRS of raster print(f'Rasterizing to match xarray.DataArray dimensions ({y}, {x})') try: gdf_reproj = gdf.to_crs(crs=crs) except: # Sometimes the crs can be a datacube utils CRS object # so convert to string before reprojecting gdf_reproj = gdf.to_crs(crs={'init': str(crs)}) # If an attribute column is specified, rasterise using vector # attribute values. Otherwise, rasterise into a boolean array if attribute_col: # Use the geometry and attributes from `gdf` to create an iterable shapes = zip(gdf_reproj.geometry, gdf_reproj[attribute_col]) else: # Use geometry directly (will produce a boolean numpy array) shapes = gdf_reproj.geometry # Rasterise shapes into an array arr = rasterio.features.rasterize(shapes=shapes, out_shape=(y, x), transform=transform, **rasterio_kwargs) # Convert result to a xarray.DataArray xarr = xr.DataArray(arr, coords=xy_coords, dims=dims, attrs=da.attrs, name=name if name else None) # Add back crs if xarr.attrs doesn't have it if xarr.geobox is None: xarr = assign_crs(xarr, str(crs)) if export_tiff: print(f"Exporting GeoTIFF to {export_tiff}") write_cog(xarr, export_tiff, overwrite=True) return xarr
def main(): param=sys.argv argc = len(param) if ( argc != 5 ): print("Usage: python3 nmask_p3.py datadirc outdirc loc_str indfile") print("datadirc: Input data directory") print("outdirc: output directory") print("loc_str: location string for the output file name") print("indfile: filename of long term mean of one of the 4 indice") exit() # Input data directory datadirc = param[1] # Output directory outdirc = param[2] # location string in the output filename loc_str = param[3] # filename of long term mean of one of the 4 indice indfile = param[4] comm ='mkdir -p '+ outdirc os.system(comm) dbs=xr.open_rasterio(indfile) timebandsfname = datadirc + '/' + loc_str + '_timebandnames.npy' tsbandnames = np.load(timebandsfname) irow = dbs['y'].size icol = dbs['x'].size for tbname in tsbandnames: onetsmask = np.zeros(irow*icol, dtype = np.uint8) mixfname = datadirc + '/' + loc_str + '_'+tbname+'_predict.npy' if (os.path.isfile(mixfname)): maskfname = datadirc + '/' + loc_str + '_'+tbname+'_ipmask.npy' mask = np.load(maskfname) mixtures = np.load(mixfname) ss = mixtures.size vpnum = int(ss/3) mixtures = mixtures.reshape(vpnum, 3) vdmask = np. argmax(mixtures, axis = 1) + 1 #print(vdmask.shape) print(tbname) onetsmask[mask==1] = vdmask onetsmask = onetsmask.reshape(irow,icol) onetsmask = cym.spatial_filter_v2(onetsmask) dbs.data[0] = onetsmask outfname = outdirc+'/'+loc_str+'_'+tbname+'_nmask-cog.tif' write_cog(geo_im = dbs, fname = outfname, overwrite = True)
def main(): dirc = '/home/jovyan/nmask_testdata/cbr_dask_run/indices' loc_str = 'canberra' ncpu = 6 y1, y2 = -32.53284301899998, -33.52310232399998 x1, x2 = 121.934694247, 123.105109264 crs = 'EPSG:4326' out_crs = 'UTM' start_of_epoch = '2017-01-01' end_of_epoch = '2020-12-31' if out_crs == 'UTM': out_crs = tsf.utm_code(x1, x2) #create_local_dask_cluster(spare_mem='4GB') outdirc = '/home/jovyan/nmask_testdata/cbr_dask_run/maskfiles' modeldirc = '/home/jovyan/nmask_dask/models' modelname = 'combine_trdata_tsmask_model' #Load normalisation parameters for the input data parafilename = modeldirc + '/' + modelname + '_standarise_parameters.npy' norm_paras = np.load(parafilename) #Load neural network model modelfilename = modeldirc + '/' + modelname model = models.load_model(modelfilename) #Start a local cluster client = Client(n_workers=ncpu, threads_per_worker=1, processes=True) client chy, chx = 500, 500 dc = datacube.Datacube(app='load_clearsentinel') tg_ds = tsf.load_s2_nbart_dask(dc, y1, y2, x1, x2, start_of_epoch, end_of_epoch, { "time": 1, "y": chy, "x": chx }, crs, out_crs) indices_list = ['s6m', 'mndwi', 'msavi', 'whi'] indstr = start_of_epoch + "_" + end_of_epoch # loading background indices, add the indices data to the xarray data set tg_ds = load_bgids(indices_list, dirc, indstr, loc_str, tg_ds, chy, chx) indfile = dirc + '/' + loc_str + '_msavi_' + indstr + '.tif' dbs = xr.open_rasterio(indfile) # number of rows irow = tg_ds['y'].size # number of columns icol = tg_ds['x'].size # number of time steps tn = tg_ds['time'].size tbnamelist = tsf.get_timebandnames(tg_ds) #Classify each scene in the time series and output the cloud mask as a cog file for i in range(tn): #load data for one scene nmask = np.zeros(irow * icol, dtype=np.uint8) blue = tg_ds.blue[i, :, :].persist() green = tg_ds.green[i, :, :].persist() red = tg_ds.red[i, :, :].persist() nir = tg_ds.nir[i, :, :].persist() swir1 = tg_ds.swir1[i, :, :].persist() swir2 = tg_ds.swir2[i, :, :].persist() s6m = tg_ds.s6m.persist() mndwi = tg_ds.mndwi.persist() msavi = tg_ds.msavi.persist() whi = tg_ds.whi.persist() #prepare the input data for the nerual network model, each row of the ipdata represents a pixel ipdata = tf_data(blue, green, red, nir, swir1, swir2, s6m, mndwi, msavi, whi).compute() # Last column of the ipdata indicate if a pixel contains invalid input values ipmask = ipdata[:, :, 12].data tfdata = ipdata[:, :, 0:12].data #prepare the input data for the neural network model, filtering out invalid pixels ipmask = ipmask.flatten().astype(np.int) tfdata = tfdata.reshape(irow * icol, 12) tfdata = tfdata[ipmask == 1] tfdata = std_by_paramters(tfdata, 2, norm_paras) tbname = tbnamelist[i] print("Begin classifying scene ", tbname) mixtures = model.predict(tfdata) vdmask = np.argmax(mixtures, axis=1) + 1 # reconstuct the cloud mask image, invalid pixels have a cloud mask value of zero nmask[ipmask == 1] = vdmask nmask = nmask.reshape(irow, icol) #Apply sptail filter to the cloud mask, eliminate cloud masks with less than 2 neighbours nmask = cym.spatial_filter_v2(nmask) #output the cloud mask as a cog file dbs.data[0] = nmask outfname = outdirc + '/' + loc_str + '_' + tbname + '_nmask-cog.tif' write_cog(geo_im=dbs, fname=outfname, overwrite=True) print("Finish writing mask file ", outfname) dbs.close()
def main(year, crs = 'EPSG:6933', res = 10): crs_code = crs.split(':')[1] dea_filename = f"deafrica_gmw_{year}_{crs_code}_{res}m.tif" if os.path.exists(dea_filename): print(f"{dea_filename} already exists") return # download extents if needed gmw_shp = f'GMW_001_GlobalMangroveWatch_{year}/01_Data/GMW_{year}_v2.shp' if not os.path.exists(gmw_shp): gmw_shp = download_and_unzip_gmw(year=year) # extract extents over Africa gmw = gpd.read_file(gmw_shp) deafrica_extent = gpd.read_file('https://github.com/digitalearthafrica/deafrica-extent/raw/master/africa-extent.json') deafrica_extent = deafrica_extent.to_crs(gmw.crs) # find everything within deafrica_extent gmw_africa = gpd.sjoin(gmw, deafrica_extent,op='intersects') # include additional in the sqaure bounding box bound = box(*gmw_africa.total_bounds).buffer(0.001) deafrica_extent_square = gpd.GeoDataFrame(gpd.GeoSeries(bound), columns=['geometry'], crs=gmw_africa.crs) gmw_africa = gpd.sjoin(gmw, deafrica_extent_square,op='intersects') ## output raster setting gmw_africa = gmw_africa.to_crs(crs) bounds = gmw_africa.total_bounds bounds = np.hstack([np.floor(bounds[:2]/10)*10, np.ceil(bounds[2:]/10)*10]) #transform = Affine(res, 0.0, bounds[0], 0.0, -1*res, bounds[3]) out_shape = int((bounds[3]-bounds[1])/res), int((bounds[2]-bounds[0])/res) #rasterize in tiles tile_size = 50000 ny = np.ceil(out_shape[0]/tile_size).astype(int) nx = np.ceil(out_shape[1]/tile_size).astype(int) for iy in np.arange(ny): for ix in np.arange(nx): y0 = bounds[3]-iy*tile_size*res x0 = bounds[0]+ix*tile_size*res y1 = np.max([bounds[1], bounds[3]-(iy+1)*tile_size*res]) x1 = np.min([bounds[2], bounds[0]+(ix+1)*tile_size*res]) transform = Affine(res, 0.0, x0, 0.0, -1*res, y0) # pixel ul sub_shape = np.abs((y1-y0)/res).astype(int), np.abs((x1-x0)/res).astype(int) arr = rasterize(shapes=gmw_africa.geometry, out_shape=sub_shape, transform=transform, fill=0, all_touched=True, default_value=1, dtype=np.uint8) xarr = xr.DataArray(arr, # pixel center coords={'y':y0-np.arange(sub_shape[0])*res-res/2, 'x':x0+np.arange(sub_shape[1])*res+res/2}, dims=('y','x'), name='gmw') xarr = assign_crs(xarr, str(crs)) write_cog(xarr, f'gmw_africa_{year}_{ix}_{iy}.tif', overwrite=True) cmd = f"gdalbuildvrt gmw_africa_{year}.vrt gmw_africa_{year}_*_*.tif" r = subprocess.call(cmd, shell=True) cmd = f"rio cogeo create --overview-level 0 gmw_africa_{year}.vrt deafrica_gmw_{year}.tif" r = subprocess.call(cmd, shell=True)
def image_segmentation(ndvi, predict): write_cog(ndvi.to_array().compute(), "NDVI.tif", overwrite=True) # store temp files somewhere directory = "tmp" if not os.path.exists(directory): os.mkdir(directory) tmp = "tmp/" # inputs to image seg tiff_to_segment = "NDVI.tif" kea_file = "NDVI.kea" segmented_kea_file = "segmented.kea" # convert tiff to kea gdal.Translate( destName=kea_file, srcDS=tiff_to_segment, format="KEA", outputSRS="EPSG:6933" ) # run image seg with HiddenPrints(): segutils.runShepherdSegmentation( inputImg=kea_file, outputClumps=segmented_kea_file, tmpath=tmp, numClusters=60, minPxls=100, ) # convert kea to tif kwargs = { 'outputType': gdal.GDT_Float32, } gdal.Translate( destName=segmented_kea_file[:-3]+'tif', srcDS=segmented_kea_file, outputSRS="EPSG:6933", format='GTiff', **kwargs ) # open segments segments = xr.open_rasterio(segmented_kea_file[:-3]+'tif').squeeze().values # calculate mode count, _sum = _stats(predict, labels=segments, index=segments) mode = _sum > (count / 2) mode = xr.DataArray( mode, coords=predict.coords, dims=predict.dims, attrs=predict.attrs ) # remove the tmp folder shutil.rmtree(tmp) os.remove(kea_file) os.remove(segmented_kea_file) os.remove(tiff_to_segment) os.remove(segmented_kea_file[:-3]+'tif') return mode.chunk({})