def test_resize_bbox(self): my_mask = SubsetMask(huc10190004.get('conus1_mask')) self.assertSequenceEqual((716, 745, 1039, 1123), my_mask.inner_mask_edges) self.assertSequenceEqual((716, 745, 1039, 1123), my_mask.bbox_edges) self.assertSequenceEqual((30, 85), my_mask.bbox_shape) self.assertSequenceEqual((30, 85), my_mask.inner_mask_shape) my_mask.add_bbox_to_mask(padding=(9, 9, 9, 9)) self.assertSequenceEqual((707, 754, 1030, 1132), my_mask.bbox_edges) self.assertSequenceEqual((716, 745, 1039, 1123), my_mask.inner_mask_edges) self.assertSequenceEqual((30, 85), my_mask.inner_mask_shape) self.assertSequenceEqual((48, 103), my_mask.bbox_shape) self.assertEqual(-999, my_mask.no_data_value) self.assertEqual(0, my_mask.bbox_val) self.assertSequenceEqual((9, 9, 9, 9), my_mask.get_bbox().get_padding()) my_mask.write_mask_to_tif("WBDHU8_conus1_mask_padded.tif")
def test_compare_box_clips(self): data_array = file_io_tools.read_file(test_files.conus1_dem.as_posix()) my_mask = SubsetMask( test_files.huc10190004.get('conus1_mask').as_posix()) clipper = MaskClipper(subset_mask=my_mask, no_data_threshold=-1) mask_subset, _, _, bbox = clipper.subset(data_array, crop_inner=0) box_clipper = BoxClipper(ref_array=data_array, x=bbox[0], y=bbox[1], nx=bbox[2], ny=bbox[3]) box_subset, _, _, _ = box_clipper.subset() self.assertEqual(mask_subset.shape[0], box_subset.shape[0]) self.assertEqual(mask_subset.shape[1], box_subset.shape[1]) self.assertEqual(mask_subset.shape[2], box_subset.shape[2]) self.assertIsNone( np.testing.assert_array_equal(mask_subset, box_subset))
def test_subset_tif_conus2(self): data_array = file_io_tools.read_file(test_files.conus2_dem.as_posix()) my_mask = SubsetMask( test_files.huc10190004.get('conus2_mask').as_posix()) clipper = MaskClipper(subset_mask=my_mask, no_data_threshold=-1) return_arr, new_geom, new_mask, bbox = clipper.subset(data_array) file_io_tools.write_array_to_geotiff("conus_2_clip_dem_test.tif", return_arr, new_geom, my_mask.mask_tif.GetProjection()) self.assertIsNone( np.testing.assert_array_equal( file_io_tools.read_file( test_files.huc10190004.get('conus2_dem').as_posix()), file_io_tools.read_file('conus_2_clip_dem_test.tif')), 'Clipping DEM matches reference') os.remove('conus_2_clip_dem_test.tif') file_io_tools.write_bbox(bbox, 'bbox_conus2_full.txt') self.assertSequenceEqual( file_io_tools.read_bbox('bbox_conus2_full.txt'), test_files.huc10190004.get('conus2_bbox'), 'Subset writes correct bounding box file') os.remove('bbox_conus2_full.txt')
def reproject_and_mask(self, dtype=gdal.GDT_Int32, no_data=None, attribute_name='OBJECTID', attribute_ids=None): """ Parameters ---------- attribute_ids : list of ints list of attribute ID values to select (Default value = None) dtype : gdal.datatype the datatype to write (Default value = gdal.GDT_Int32) no_data : str no_data value to use (Default value = None) attribute_name : str field in the shapefile to trace (Default value = 'OBJECTID') Returns ------- str path (virtual mem) to the reprojected full_dim_mask """ if attribute_ids is None: attribute_ids = [1] if no_data is None: no_data = self.no_data geom_ref = self.ds_ref.GetGeoTransform() tif_path = f'/vsimem/{self.shapefile_name}.tif' target_ds = gdal.GetDriverByName('GTiff').Create( tif_path, self.ds_ref.RasterXSize, self.ds_ref.RasterYSize, 1, dtype) target_ds.SetProjection(self.ds_ref.GetProjection()) target_ds.SetGeoTransform(geom_ref) target_ds.GetRasterBand(1).SetNoDataValue(no_data) target_ds.GetRasterBand(1).Fill(no_data) # shapefile shp_source = ogr.Open(self.full_shapefile_path) shp_layer = shp_source.GetLayer() # TODO: How to detect if the shape geometries extend beyond our reference bounds? # Filter by the shapefile attribute IDs we want shp_layer.SetAttributeFilter( f'{attribute_name} in ({",".join([str(i) for i in attribute_ids])})' ) # Rasterize layer rtn_code = gdal.RasterizeLayer(target_ds, [1], shp_layer, burn_values=[1]) if rtn_code == 0: target_ds.FlushCache() logging.info( f'reprojected shapefile from {str(shp_layer.GetSpatialRef()).replace(chr(10), "")} ' f'with extents {shp_layer.GetExtent()} ' f'to {self.ds_ref.GetProjectionRef()} with transform {self.ds_ref.GetGeoTransform()}' ) else: msg = f'error rasterizing layer: {shp_layer}, gdal returned non-zero value: {rtn_code}' logging.exception(msg) raise Exception(msg) self.subset_mask = SubsetMask(tif_path) return tif_path
class ShapefileRasterizer: """Class for converting shapefile to raster for use as mask""" def __init__(self, input_path, shapefile_name, reference_dataset, no_data=NO_DATA, output_path='.'): """ Parameters ---------- input_path : str path to input files (shapefile set) shapefile_name : str name of shapefile dataset reference_dataset : gdal.dataset gdal dataset defining the overall domain no_data : int, optional value to write for no_data cells (default -999) output_path : str, optional where to write the outputs (default '.') Returns ------- ShapefileRasterizer """ if no_data in [0, 1]: raise Exception( f'ShapfileRasterizer: ' f'Do not used reserved values 1 or 0 for no_data value: got no_data={no_data}' ) self.shapefile_path = input_path self.output_path = output_path self.shapefile_name = shapefile_name self.ds_ref = reference_dataset self.no_data = no_data # TODO Handle shape extension using Pathlib.path self.full_shapefile_path = os.path.join( self.shapefile_path, '.'.join((self.shapefile_name, 'shp'))) self.check_shapefile_parts() self.subset_mask = None def __repr__(self): return f"{self.__class__.__name__}(shapefile_path:{self.shapefile_path!r}, " \ f"shapefile_name:{self.shapefile_name!r}, output_path:{self.output_path!r}, ds_ref:{self.ds_ref!r}, " \ f"no_data:{self.no_data!r}, full_shapefile_path:{self.full_shapefile_path!r}, " \ f"subset_mask:{self.subset_mask!r}" def check_shapefile_parts(self): """verify the required parts of a shapefile are present in the same folder logs a warning Returns ------- None """ shape_parts = [ ".".join((self.shapefile_name, ext)) for ext in ['shp', 'dbf', 'prj', 'shx'] ] for shp_component_file in shape_parts: if not os.path.isfile( os.path.join(self.shapefile_path, shp_component_file)): logging.warning(f'Shapefile path missing {shp_component_file}') def reproject_and_mask(self, dtype=gdal.GDT_Int32, no_data=None, attribute_name='OBJECTID', attribute_ids=None): """ Parameters ---------- attribute_ids : list of ints list of attribute ID values to select (Default value = None) dtype : gdal.datatype the datatype to write (Default value = gdal.GDT_Int32) no_data : str no_data value to use (Default value = None) attribute_name : str field in the shapefile to trace (Default value = 'OBJECTID') Returns ------- str path (virtual mem) to the reprojected full_dim_mask """ if attribute_ids is None: attribute_ids = [1] if no_data is None: no_data = self.no_data geom_ref = self.ds_ref.GetGeoTransform() tif_path = f'/vsimem/{self.shapefile_name}.tif' target_ds = gdal.GetDriverByName('GTiff').Create( tif_path, self.ds_ref.RasterXSize, self.ds_ref.RasterYSize, 1, dtype) target_ds.SetProjection(self.ds_ref.GetProjection()) target_ds.SetGeoTransform(geom_ref) target_ds.GetRasterBand(1).SetNoDataValue(no_data) target_ds.GetRasterBand(1).Fill(no_data) # shapefile shp_source = ogr.Open(self.full_shapefile_path) shp_layer = shp_source.GetLayer() # TODO: How to detect if the shape geometries extend beyond our reference bounds? # Filter by the shapefile attribute IDs we want shp_layer.SetAttributeFilter( f'{attribute_name} in ({",".join([str(i) for i in attribute_ids])})' ) # Rasterize layer rtn_code = gdal.RasterizeLayer(target_ds, [1], shp_layer, burn_values=[1]) if rtn_code == 0: target_ds.FlushCache() logging.info( f'reprojected shapefile from {str(shp_layer.GetSpatialRef()).replace(chr(10), "")} ' f'with extents {shp_layer.GetExtent()} ' f'to {self.ds_ref.GetProjectionRef()} with transform {self.ds_ref.GetGeoTransform()}' ) else: msg = f'error rasterizing layer: {shp_layer}, gdal returned non-zero value: {rtn_code}' logging.exception(msg) raise Exception(msg) self.subset_mask = SubsetMask(tif_path) return tif_path def rasterize_shapefile_to_disk(self, out_dir=None, out_name=None, padding=(0, 0, 0, 0), attribute_name='OBJECTID', attribute_ids=None): """rasterize a shapefile to disk in the projection and extents of the reference dataset Parameters ---------- out_dir : str directory to write outputs (Default value = None) out_name : str filename for outputs (Default value = None) padding : tuple optional padding to add 0's around full_dim_mask (Default value = (0,0,0,0)) attribute_name : str optional name of shapefile attribute to select on (Default value = 'OBJECTID') attribute_ids : list optional list of attribute ids in shapefile to select for full_dim_mask (Default value = None) Returns ------- ndarray 3d array with no_data to extents, 0 in bounding box, 1 in full_dim_mask region """ if attribute_ids is None: attribute_ids = [1] if out_name is None: out_name = f'{self.shapefile_name}.tif' if out_dir is None: out_dir = self.output_path self.reproject_and_mask(attribute_ids=attribute_ids, attribute_name=attribute_name) self.subset_mask.add_bbox_to_mask(padding=padding) self.subset_mask.write_mask_to_tif( filename=os.path.join(out_dir, out_name)) self.subset_mask.write_bbox(os.path.join(out_dir, 'bbox.txt')) return self.subset_mask.mask_array
def subset_conus(input_path=None, shapefile=None, subset_tif=None, mask_value=1, conus_version=1, conus_files='.', out_dir='.', out_name=None, clip_clm=False, run_script=False, padding=(0, 0, 0, 0), attribute_name='OBJECTID', attribute_ids=None, write_tifs=False, manifest_file=conus_manifest): """subset a conus domain inputs for running a regional model Parameters ---------- input_path : str, optional path to input shapefile parts to use as mask. Only applicable if shapefile is also provided. shapefile : str, optional name of shapefile to use as mask. Required if subset_tif is not provided. subset_tif : str, optional path to tiff file containing mask. Required if shapefile is not provided. mask_value : int, optional integer value specifying the data value in the tiff file to consider as the masking value. Only applicable if subset_tif is provided. conus_version : int, optional version of the CONUS domain to use (1 or 2) (Default value = 1) conus_files : str, optional path to the CONUS source input files listed in conus_manifest.yaml (Default value = '.') out_dir : str, optional directory to write the outputs (default .) out_name : str, optional name to give the outputs (default shapefile name) clip_clm : int, optional whether or not to clip the CLM input files too (default no) run_script : int, optional whether or not to build and return a Run object for the subset (default no) padding : tuple, optional grid cells of no_data to add around domain mask. CSS Style (top, right, bottom, left) default 0 attribute_name : str, optional attribute name defined in shapefile to select as mask default 'OBJECTID' attribute_ids : list, optional list of attribute ID's defined in shapefile to use as mask input. default [1] write_tifs : int, optional whether or not to write outputs as TIF's in addition to PFB's. (default no) Returns ------- run_script : parflow.tools.Run The Run object which can be used to execute the ParFlow model subset that was created by subset_conus """ assert any((shapefile, subset_tif)) and not all((shapefile, subset_tif)), \ 'Specify either a shapefile or a subset_tif file.' if out_name is None: out_name = shapefile or os.path.splitext( os.path.basename(subset_tif))[0] conus = Conus(version=conus_version, local_path=conus_files, manifest_file=manifest_file) if attribute_ids is None: attribute_ids = [1] # Step 1, rasterize shapefile if shapefile is not None: rasterizer = ShapefileRasterizer( input_path, shapefile, reference_dataset=conus.get_domain_tif(), no_data=TIF_NO_DATA_VALUE_OUT, output_path=out_dir, ) mask_array = rasterizer.rasterize_shapefile_to_disk( out_name=f'{out_name}_raster_from_shapefile.tif', padding=padding, attribute_name=attribute_name, attribute_ids=attribute_ids) subset_mask = rasterizer.subset_mask else: subset_mask = SubsetMask(tif_file=subset_tif, mask_value=mask_value) mask_array = subset_mask.mask_array # Step 2, Generate solid file clip = MaskClipper(subset_mask, no_data_threshold=-1) sfb = SolidFileBuilder(top=3, bottom=6, side=1).mask( clip.subset(mask_array, crop_inner=0)[0][0, :, :]) # identify the unique patch ID's assigned to the solid file #TODO: get patch defs from a class sfb.top_ids(clip.subset(conus.get_domain_mask())[0][0, :, :]) sfb.side_ids(clip.subset(conus.get_border_mask())[0][0, :, :]) top_patchIDs = np.unique(clip.subset(conus.get_domain_mask())[0][0, :, :]) side_patchIDs = np.unique(clip.subset(conus.get_border_mask())[0][0, :, :]) side_patchIDs[side_patchIDs == 0] = 2 botom_patchIDs = [6] patch_ids = np.unique( np.concatenate((top_patchIDs, side_patchIDs, botom_patchIDs))) sfb = sfb.write(os.path.join(out_dir, f'{out_name}.pfsol'), cellsize=1000, vtk=True) # Step 3. Clip all the domain data inputs bulk_clipper.clip_inputs(clip, [ os.path.join(conus.local_path, value) for key, value in conus.required_files.items() if key not in ['DOMAIN_MASK', 'CHANNELS'] ], out_dir=out_dir, tif_outs=write_tifs) # Step 4. Clip CLM inputs if clip_clm == 1: clm_clipper = ClmClipper(subset_mask.get_bbox()) latlon_formatted, latlon_data = clm_clipper.clip_latlon( os.path.join(conus.local_path, conus.optional_files.get('LAT_LON'))) clm_clipper.write_lat_lon(latlon_formatted, os.path.join(out_dir, f'{out_name}_latlon.sa'), x=latlon_data.shape[2], y=latlon_data.shape[1], z=latlon_data.shape[0]) land_cover_data, vegm_data = clm_clipper.clip_land_cover( lat_lon_array=latlon_formatted, land_cover_file=os.path.join( conus.local_path, conus.optional_files.get('LAND_COVER'))) clm_clipper.write_land_cover( vegm_data, os.path.join(out_dir, f'{out_name}_vegm.dat')) # Step 5. Generate Run Script if run_script == 1: # the Run object reads sys.argv, and this is problematic because they share a common flag -r sys.argv = ['Run'] slopex_file = os.path.join( out_dir, f'{Path(conus.required_files.get("SLOPE_X")).stem}_clip.pfb') slopey_file = os.path.join( out_dir, f'{Path(conus.required_files.get("SLOPE_Y")).stem}_clip.pfb') solid_file = os.path.join(out_dir, f'{out_name}.pfsol') bbox = subset_mask.get_bbox() extents = bbox.get_padded_extents() NX = int(extents[3] - extents[2]) NY = int(extents[1] - extents[0]) out_name = f'{out_name}.conus{conus_version}.parking_lot' # TODO: associate model templates with models and versions, provide method to override boundary conditions run_script = parking_lot_template.get_parking_lot_model( out_name, slopex_file, slopey_file, solid_file, NX, NY) patch_names = [ conus.get_patch_name(patch_id) for patch_id in patch_ids[patch_ids > TIF_NO_DATA_VALUE_OUT] ] run_script.Geom.domain.Patches = ' '.join(patch_names) # convert patch ID's to patch names for run script for patch in patch_names: bc = parking_lot_template.get_parking_lot_model_boundary(patch) for k, v in bc.items(): # assign patch Boundary Conditions as defined by CONUS Model 1 or 2 run_script.Patch.pfset(key=f'{patch}.BCPressure.{k}', value=v) if conus_version == 1: # CONUS1 doesn't seem to work well with OverlandKinematic run_script.Patch.top.BCPressure.Type = 'OverlandFlow' run_script.validate() run_script.write(file_name=os.path.join(out_dir, out_name), file_format='pfidb') run_script.write(file_name=os.path.join(out_dir, out_name), file_format='yaml') run_script.write(file_name=os.path.join(out_dir, out_name), file_format='json') return run_script
def test_mask_print_no_exception(self): my_mask = SubsetMask( test_files.huc10190004.get('conus1_mask').as_posix()) clipper = MaskClipper(subset_mask=my_mask, no_data_threshold=-1) self.assertIsNone(print(clipper))
def test_normal_startup(self): my_mask = SubsetMask(huc10190004.get('conus1_mask')) self.assertEqual(-999, my_mask.no_data_value) self.assertEqual(0, my_mask.bbox_val) self.assertSequenceEqual((0, 0, 0, 0), my_mask.get_bbox().get_padding())
def test_normal_startup(self): my_mask = SubsetMask(huc10190004.get('conus1_mask').as_posix()) self.assertEqual(-999, my_mask.no_data_value) self.assertEqual(0, my_mask.bbox_val)