def create_mmaster_stack(filelist, extent=None, res=None, epsg=None, outfile='mmaster_stack.nc', clobber=False, uncert=False, coreg=False, ref_tiles=None, exc_mask=None, inc_mask=None, outdir='tmp', filt_dem=None, add_ref=False, add_corr=False, latlontile_nodata=None, filt_mm_corr=False, l1a_zipped=False, y0=1900, tmptag=None): """ Given a list of DEM files, create a stacked NetCDF file. :param filelist: List of DEM filenames to stack. :param extent: Spatial extent of DEMs to limit stack to [xmin, xmax, ymin, ymax]. :param res: Output spatial resolution of DEMs. :param epsg: EPSG code of output CRS. :param outfile: Filename for output NetCDF file. :param clobber: clobber existing dataset when creating NetCDF file. :param uncert: Include uncertainty variable in the output NetCDF. :param coreg: Co-register DEMs to an input DEM (given by a shapefile of tiles). :param ref_tiles: Filename of input reference DEM tiles. :param exc_mask: Filename of exclusion mask (i.e., glaciers) to use in co-registration :param inc_mask: Filename of inclusion mask (i.e., land) to use in co-registration. :param outdir: Output directory for temporary files. :param filt_dem: Filename of DEM to filter elevation differences to. :param add_ref: Add reference DEM as a stack variable :param add_corr: Add correlation masks as a stack variable :param latlontile_nodata: Apply nodata for a lat/lon tile footprint to avoid overlapping and simplify xarray merging :param filt_mm_corr: Filter MMASTER DEM with correlation mask out of mmaster_tools when stacking (disk space), :param l1a_zipped: Use if files have been zipped to save on space. :param y0: Year 0 to reference NetCDF time variable to. :param tmptag: string to append to temporary files. :type filelist: array-like :type extent: array-like :type res: float :type epsg: int :type outfile: str :type clobber: bool :type uncert: bool :type coreg: bool :type ref_tiles: str :type exc_mask: str :type inc_mask: str :type outdir: str :type filt_dem: str :type add_ref: bool :type add_corr: bool :type latlontile_nodata: str :type filt_mm_corr: bool :type l1a_zipped: bool :type y0: float :type tmptag: str :returns nco: NetCDF Dataset of stacked DEMs. """ if extent is not None: if type(extent) in [list, tuple]: xmin, xmax, ymin, ymax = extent elif type(extent) is Polygon: x, y = extent.boundary.coords.xy xmin, xmax = min(x), max(x) ymin, ymax = min(y), max(y) else: raise ValueError( 'extent should be a list, tuple, or shapely.Polygon') else: xmin, xmax, ymin, ymax = get_common_bbox(filelist, epsg) print('Searching for intersecting DEMs among the list of ' + str(len(filelist)) + '...') # check if each footprint falls within our given extent, and if not - remove from the list. if l1a_zipped: # if l1a are zipped, too long to extract archives and read extent from rasters ; so read metadata instead l1a_filelist = [ fn for fn in filelist if os.path.basename(fn)[0:3] == 'AST' ] rest_filelist = [fn for fn in filelist if fn not in l1a_filelist] l1a_inters = get_footprints_inters_ext(l1a_filelist, [xmin, ymin, xmax, ymax], epsg, use_l1a_met=True) rest_inters = get_footprints_inters_ext(rest_filelist, [xmin, ymin, xmax, ymax], epsg) filelist = l1a_inters + rest_inters else: filelist = get_footprints_inters_ext(filelist, [xmin, ymin, xmax, ymax], epsg) print('Found ' + str(len(filelist)) + '.') if len(filelist) == 0: print('Found no DEMs intersecting extent to stack. Skipping...') sys.exit() datelist = np.array([parse_date(f) for f in filelist]) sorted_inds = np.argsort(datelist) print(filelist[sorted_inds[0]]) if l1a_zipped and os.path.basename(filelist[sorted_inds[0]])[0:3] == 'AST': tmp_zip = filelist[sorted_inds[0]] z_name = '_'.join( os.path.basename(tmp_zip).split('_')[0:3]) + '_Z_adj_XAJ_final.tif' if tmptag is None: fn_tmp = os.path.join(os.path.dirname(tmp_zip), 'tmp_out.tif') else: fn_tmp = os.path.join(os.path.dirname(tmp_zip), 'tmp_out_' + tmptag + '.tif') mt.extract_file_from_zip(tmp_zip, z_name, fn_tmp) tmp_img = GeoImg(fn_tmp) else: tmp_img = GeoImg(filelist[sorted_inds[0]]) if res is None: res = np.round( tmp_img.dx) # make sure that we have a nice resolution for gdal if epsg is None: epsg = tmp_img.epsg # now, reproject the first image to the extent, resolution, and coordinate system needed. dest = gdal.Warp('', tmp_img.gd, format='MEM', dstSRS='EPSG:{}'.format(epsg), xRes=res, yRes=res, outputBounds=(xmin, ymin, xmax, ymax), resampleAlg=gdal.GRA_Bilinear) if l1a_zipped and os.path.basename(filelist[sorted_inds[0]])[0:3] == 'AST': os.remove(fn_tmp) first_img = GeoImg(dest) first_img.filename = filelist[sorted_inds[0]] # NetCDF assumes that coordinates are the cell center if first_img.is_area(): first_img.to_point() # first_img.info() nco, to, xo, yo = create_nc(first_img.img, outfile=outfile, clobber=clobber, t0=np.datetime64('{}-01-01'.format(y0))) create_crs_variable(first_img.epsg, nco) # crso.GeoTransform = ' '.join([str(i) for i in first_img.gd.GetGeoTransform()]) # maxchar = max([len(f.rsplit('.tif', 1)[0]) for f in args.filelist]) go = nco.createVariable('dem_names', str, ('time', )) go.long_name = 'Source DEM Filename' zo = nco.createVariable('z', 'f4', ('time', 'y', 'x'), fill_value=-9999, zlib=True, chunksizes=[ 500, min(150, first_img.npix_y), min(150, first_img.npix_x) ]) zo.units = 'meters' zo.long_name = 'Height above WGS84 ellipsoid' zo.grid_mapping = 'crs' zo.coordinates = 'x y' zo.set_auto_mask(True) if ref_tiles is not None: if ref_tiles.endswith('.shp'): master_tiles = gpd.read_file(ref_tiles) s = STRtree([f for f in master_tiles['geometry'].values]) bounds = Polygon([(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]) ref_vrt = get_tiles(bounds, master_tiles, s, outdir) elif ref_tiles.endswith('.vrt') or ref_tiles.endswith('.tif'): ref_vrt = ref_tiles ref = GeoImg(ref_vrt) if filt_dem is not None: filt_dem_img = GeoImg(filt_dem) filt_dem = filt_dem_img.reproject(first_img) # 3 overlapping pixels on each side of the tile in case reprojection is necessary; will be removed when merging if latlontile_nodata is not None and epsg is not None: mask = binary_dilation(vt.latlontile_nodatamask( first_img, latlontile_nodata), iterations=3) if uncert: uo = nco.createVariable('uncert', 'f4', ('time', )) uo.long_name = 'RMSE of stable terrain differences.' uo.units = 'meters' if add_ref and ref_tiles is not None: ro = nco.createVariable('ref_z', 'f4', ('y', 'x'), fill_value=-9999, chunksizes=[ min(150, first_img.npix_y), min(150, first_img.npix_x) ]) ro.units = 'meters' ro.long_name = 'Height above WGS84 ellipsoid' ro.grid_mapping = 'crs' ro.coordinates = 'x y' ro.set_auto_mask(True) ref_img = ref.reproject(first_img).img if latlontile_nodata is not None and epsg is not None: ref_img[~mask] = np.nan ro[:, :] = ref_img if add_corr: co = nco.createVariable('corr', 'i1', ('time', 'y', 'x'), fill_value=-1, zlib=True, chunksizes=[ 500, min(150, first_img.npix_y), min(150, first_img.npix_x) ]) co.units = 'percent' co.long_name = 'MMASTER correlation' co.grid_mapping = 'crs' co.coordinates = 'x y' co.set_auto_mask(True) x, y = first_img.xy(grid=False) xo[:] = x yo[:] = y # trying something else to speed up writting in compressed chunks list_img, list_corr, list_uncert, list_dt, list_name = ([] for i in range(5)) outind = 0 for ind in sorted_inds[0:]: print(filelist[ind]) # get instrument bname = os.path.splitext(os.path.basename(filelist[ind]))[0] splitname = bname.split('_') instru = splitname[0] # special case for MMASTER outputs (for disk usage) if instru == 'AST': fn_z = '_'.join(splitname[0:3]) + '_Z_adj_XAJ_final.tif' fn_corr = '_'.join(splitname[0:3]) + '_CORR_adj_final.tif' # to avoid running into issues in parallel if tmptag is None: fn_z_tmp = os.path.join(os.path.dirname(filelist[ind]), fn_z) fn_corr_tmp = os.path.join(os.path.dirname(filelist[ind]), fn_corr) else: fn_z_tmp = os.path.join( os.path.dirname(filelist[ind]), os.path.splitext(fn_z)[0] + '_' + tmptag + '.tif') fn_corr_tmp = os.path.join( os.path.dirname(filelist[ind]), os.path.splitext(fn_corr)[0] + '_' + tmptag + '.tif') list_fn_rm = [fn_z_tmp, fn_corr_tmp] # unzip if needed if l1a_zipped: mt.extract_file_from_zip(filelist[ind], fn_z, fn_z_tmp) if filt_mm_corr or add_corr: mt.extract_file_from_zip(filelist[ind], fn_corr, fn_corr_tmp) # open dem, filter with correlation mask if it comes out of MMASTER if filt_mm_corr: img = corr_filter_aster(fn_z_tmp, fn_corr_tmp, 70) else: img = GeoImg(fn_z_tmp) else: img = GeoImg(filelist[ind]) if img.is_area(): # netCDF assumes coordinates are the cell center img.to_point() if add_corr: if instru == 'AST': corr = GeoImg(fn_corr_tmp) if corr.is_area(): corr.to_point() if coreg: try: NDV = img.NDV coreg_outdir = os.path.join( outdir, os.path.basename(filelist[ind]).rsplit('.tif', 1)[0]) _, img, _, stats_final = dem_coregistration( ref, img, glaciermask=exc_mask, landmask=inc_mask, outdir=coreg_outdir, inmem=True) dest = gdal.Warp('', img.gd, format='MEM', dstSRS='EPSG:{}'.format(epsg), xRes=res, yRes=res, outputBounds=(xmin, ymin, xmax, ymax), resampleAlg=gdal.GRA_Bilinear, srcNodata=NDV, dstNodata=-9999) img = GeoImg(dest) if add_corr: if instru == 'AST': corr = corr.reproject(img) else: corr = img.copy() corr.img[:] = 100 co[outind, :, :] = corr.img.astype(np.int8) if filt_dem is not None: valid = np.logical_and(img.img - filt_dem.img > -400, img.img - filt_dem.img < 1000) img.img[~valid] = np.nan if latlontile_nodata is not None and epsg is not None: img.img[~mask] = np.nan if add_corr: corr.img[~mask] = -1 nvalid = np.count_nonzero(~np.isnan(img.img)) if nvalid == 0: print('No valid pixel in the stack extent: skipping...') if l1a_zipped and (instru == 'AST'): for fn_rm in list_fn_rm: if os.path.exists(fn_rm): os.remove(fn_rm) continue zo[outind, :, :] = img.img if uncert: uo[outind] = stats_final[3] print('Adding DEM that has ' + str(nvalid) + ' valid pixels in this extent, with a global RMSE of ' + str(stats_final[3])) except: print('Coregistration failed: skipping...') if l1a_zipped and (instru == 'AST'): for fn_rm in list_fn_rm: if os.path.exists(fn_rm): os.remove(fn_rm) continue else: img = img.reproject(first_img) if add_corr: if instru == 'AST': corr = corr.reproject(first_img) else: corr = img.copy() corr.img[:] = 100 # co[outind, :, :] = corr.img.astype(np.int8) if filt_dem is not None: valid = np.logical_and(img.img - filt_dem.img > -400, img.img - filt_dem.img < 1000) img.img[~valid] = np.nan if latlontile_nodata is not None and epsg is not None: img.img[~mask] = np.nan if add_corr: corr.img[~mask] = -1 nvalid = np.count_nonzero(~np.isnan(img.img)) if nvalid == 0: print('No valid pixel in the stack extent: skipping...') if l1a_zipped and (instru == 'AST'): for fn_rm in list_fn_rm: if os.path.exists(fn_rm): os.remove(fn_rm) continue # zo[outind, :, :] = img.img if uncert: try: stats = read_stats(os.path.dirname(filelist[ind])) except: stats = None # uo[outind] = stats['RMSE'] # to[outind] = datelist[ind].toordinal() - dt.date(y0, 1, 1).toordinal() # go[outind] = os.path.basename(filelist[ind]).rsplit('.tif', 1)[0] if stats is None: list_uncert.append(5.) else: try: list_uncert.append(stats['RMSE']) except KeyError: print('KeyError for RMSE here:' + filelist[ind]) continue list_img.append(img.img) list_corr.append(corr.img.astype(np.int8)) list_dt.append(datelist[ind].toordinal() - dt.date(y0, 1, 1).toordinal()) list_name.append(os.path.basename(filelist[ind]).rsplit('.tif', 1)[0]) outind += 1 if l1a_zipped and (instru == 'AST'): for fn_rm in list_fn_rm: if os.path.exists(fn_rm): os.remove(fn_rm) # then write all at once zo[0:outind, :, :] = np.stack(list_img, axis=0) co[0:outind, :, :] = np.stack(list_corr, axis=0) uo[0:outind] = np.array(list_uncert) to[0:outind] = np.array(list_dt) go[0:outind] = np.array(list_name) return nco
def reproj_stack(ds, utm_out, nice_latlon_tiling=False, write_ds=None, nproc=1): ds_out = ds.copy() tmp_img = make_geoimg(ds) res = tmp_img.dx if nice_latlon_tiling: tile_name = tilename_stack(ds) outputBounds = vt.niceextent_utm_latlontile(tile_name, utm_out, res) else: outputBounds = None dest = gdal.Warp('', tmp_img.gd, format='MEM', dstSRS='EPSG:{}'.format(vt.epsg_from_utm(utm_out)), xRes=res, yRes=res, outputBounds=outputBounds, resampleAlg=gdal.GRA_Bilinear) first_img = GeoImg(dest) if first_img.is_area(): first_img.to_point() x, y = first_img.xy(grid=False) ds_out = ds_out.drop(('z', 'z_ci', 'crs')) ds_out = ds_out.drop_dims(('x', 'y')) ds_out = ds_out.expand_dims(dim={'y': y, 'x': x}) ds_out.x.attrs = ds.x.attrs ds_out.y.attrs = ds.y.attrs if nproc == 1: for i in range(ds.time.size): new_z = np.zeros((ds.time.size, len(y), len(x)), dtype=np.float32) new_z_ci = np.zeros((ds.time.size, len(y), len(x)), dtype=np.float32) tmp_z = make_geoimg(ds, i, var='z') tmp_z_ci = make_geoimg(ds, i, var='z_ci') new_z[i, :] = tmp_z.reproject(first_img).img new_z_ci[i, :] = tmp_z_ci.reproject(first_img).img else: arr_z = ds.z.values arr_z_ci = ds.z_ci.values in_met = (tmp_img.gt, tmp_img.proj_wkt, tmp_img.npix_x, tmp_img.npix_y) out_met = (res, outputBounds, utm_out) argsin_z = [(arr_z[i, :], in_met, out_met) for i in range(ds.time.size)] argsin_z_ci = [(arr_z_ci[i, :], in_met, out_met) for i in range(ds.time.size)] pool = mp.Pool(nproc, maxtasksperchild=1) outputs_z = pool.map(wrapper_reproj, argsin_z) outputs_z_ci = pool.map(wrapper_reproj, argsin_z_ci) pool.close() pool.join() new_z = np.stack(outputs_z, axis=0) new_z_ci = np.stack(outputs_z_ci, axis=0) if nice_latlon_tiling: mask = vt.latlontile_nodatamask(first_img, tile_name) new_z[:, ~mask] = np.nan new_z_ci[:, ~mask] = np.nan ds_out['z'] = (['time', 'y', 'x'], new_z) ds_out['z_ci'] = (['time', 'y', 'x'], new_z_ci) ds_out['crs'] = ds['crs'] ds_out.z.attrs = ds.z.attrs ds_out.z_ci.attrs = ds.z_ci.attrs ds_out.crs.attrs = create_crs_variable(epsg=vt.epsg_from_utm(utm_out)) if write_ds is not None: ds_out.to_netcdf(write_ds) return ds_out