def get_input_area_boundary(input_area): """Extract and union polygons associated with input area into a single boundary (Multi)Polygon. Parameters ---------- input_area : str id of input area Returns ------- (Multi)Polygon """ # have to make valid or we get errors during union for FL values = [ e["value"] for e in INPUT_AREA_VALUES if input_area in set(e["id"].split(",")) ] inputs_df = gp.read_feather(bnd_dir / "input_areas.feather") bnd = pg.union_all( pg.make_valid( inputs_df.loc[inputs_df.value.isin(values)].geometry.values.data)) return bnd
def make_valid(geometry: Optional[sh_geom.base.BaseGeometry]) -> Optional[sh_geom.base.BaseGeometry]: """ Make a geometry valid. Args: geometry (Optional[sh_geom.base.BaseGeometry]): A (possibly) invalid geometry. Returns: Optional[sh_geom.base.BaseGeometry]: The fixed geometry. """ if geometry is None: return None else: return sh_wkb.loads(pygeos.io.to_wkb(pygeos.make_valid(pygeos.io.from_shapely(geometry))))
def make_valid(geometries): """Make geometries valid. Parameters ---------- geometries : ndarray of pygeos geometries Returns ------- ndarray of pygeos geometries """ ix = ~pg.is_valid(geometries) if ix.sum(): geometries = geometries.copy() print(f"Repairing {ix.sum()} geometries") geometries[ix] = pg.make_valid(geometries[ix]) return geometries
def constructive(arr, operation, *args, **kwargs): if operation == 'boundary': geometries = pg.boundary(pg.from_wkb(arr), **kwargs) elif operation == 'buffer': geometries = pg.buffer(pg.from_wkb(arr), *args, **kwargs) elif operation == 'build_area': geometries = pg.build_area(pg.from_wkb(arr), **kwargs) elif operation == 'centroid': geometries = pg.centroid(pg.from_wkb(arr), **kwargs) elif operation == 'clip_by_rect': geometries = pg.clip_by_rect(pg.from_wkb(arr), *args, **kwargs) elif operation == 'convex_hull': geometries = pg.convex_hull(pg.from_wkb(arr), **kwargs) elif operation == 'delaunay_triangles': geometries = pg.delaunay_triangles(pg.from_wkb(arr), **kwargs) elif operation == 'envelope': geometries = pg.envelope(pg.from_wkb(arr), **kwargs) elif operation == 'extract_unique_points': geometries = pg.extract_unique_points(pg.from_wkb(arr), **kwargs) elif operation == 'make_valid': geometries = pg.make_valid(pg.from_wkb(arr), **kwargs) elif operation == 'normalize': geometries = pg.normalize(pg.from_wkb(arr), **kwargs) elif operation == 'offset_curve': geometries = pg.offset_curve(pg.from_wkb(arr), *args, **kwargs) elif operation == 'point_on_surface': geometries = pg.point_on_surface(pg.from_wkb(arr), **kwargs) elif operation == 'reverse': geometries = pg.reverse(pg.from_wkb(arr), **kwargs) elif operation == 'simplify': geometries = pg.simplify(pg.from_wkb(arr), *args, **kwargs) elif operation == 'snap': geometries = pg.snap(pg.from_wkb(arr), *args, **kwargs) elif operation == 'voronoi_polygons': geometries = pg.voronoi_polygons(pg.from_wkb(arr), **kwargs) else: warnings.warn(f'Operation {operation} not supported.') return None return pg.to_wkb(geometries)
where=f"State_Nm in ({states})", ) # set the CRS, it is same as 5070 but not recognized properly df = df.set_crs(DATA_CRS) # drop BOEM lease block groups df = df.loc[df.Agg_Src != "USGS_PADUS2_0Marine_BOEM_Block_Dissolve"].drop( columns=["Agg_Src"]) tree = pg.STRtree(df.geometry.values.data) ix = tree.query(bnd_df.geometry.values.data[0], predicate="intersects") df = df.iloc[ix].copy() print("making valid...") df["geometry"] = pg.make_valid(df.geometry.values.data) df = explode(df).reset_index() # there are some geometry errors after cleaning up above, keep only polys df = df.loc[pg.get_type_id(df.geometry.values.data) == 3].copy() print("Writing files") df.to_feather(out_dir / "ownership.feather") write_dataframe(df, data_dir / "boundaries/ownership.gpkg", driver="GPKG") # Write for tiles print("Writing GeoJSON for tiles") write_dataframe( df[["geometry", "Own_Type", "GAP_Sts"]].to_crs(GEO_CRS), tile_dir / "ownership.geojson", driver="GeoJSONSeq",
{"break_geometry": breaks.take(left)}, index=df.index.take(right) ) grouped = pairs.groupby(level=0).break_geometry.apply( lambda g: pg.multipolygons(g.values.data) ) df.loc[grouped.index, "geometry"] = pg.difference( df.loc[grouped.index].geometry.values.data, grouped.values ) df = explode(df).reset_index(drop=True) # make sure all polygons are valid ix = ~pg.is_valid(df.geometry.values.data) if ix.sum(): print(f"Repairing {ix.sum()} invalid waterbodies") df.loc[ix, "geometry"] = pg.make_valid(df.loc[ix].geometry.values.data) df = explode(explode(df)) df = df.loc[pg.get_type_id(df.geometry.values.data) == 3].reset_index() # assign a new unique wbID df["wbID"] = df.index.values.astype("uint32") + 1 + int(huc2) * 1000000 df["km2"] = pg.area(df.geometry.values.data) / 1e6 df.to_feather(huc2_dir / "waterbodies.feather") write_dataframe(df, huc2_dir / "waterbodies.gpkg") print("--------------------") print(f"HUC2: {huc2} done in {time() - huc2_start:.0f}s\n\n") print(f"Done in {time() - start:.2f}s\n============================")
county_df = (read_dataframe( county_filename, columns=["NAME", "GEOID", "STATEFP"], ).to_crs(CRS).rename(columns={ "NAME": "County", "GEOID": "COUNTYFIPS", "STATEFP": "STATEFIPS" })) # keep only those within the region HUC4 outer boundary tree = pg.STRtree(county_df.geometry.values.data) ix = np.unique( tree.query_bulk(huc4_df.geometry.values.data, predicate="intersects")[1]) ix.sort() county_df = county_df.iloc[ix].reset_index(drop=True) county_df.geometry = pg.make_valid(county_df.geometry.values.data) # keep larger set for spatial joins county_df.to_feather(out_dir / "counties.feather") # Subset these in the region and SARP for tiles write_dataframe( county_df.loc[county_df.STATEFIPS.isin(states)].rename(columns={ "COUNTYFIPS": "id", "County": "name" }), out_dir / "region_counties.gpkg", ) write_dataframe( county_df.loc[county_df.STATEFIPS.isin(sarp_states)].rename( columns={
async def create_custom_report(ctx, zip_filename, dataset, layer, name=""): """Create a Blueprint report for a user-uploaded GIS file contained in a zip. Zip must contain either a shapefile or a file geodatabase. Parameters ---------- ctx : job context zip_filename : str full path to zip filename dataset : str full path to dataset within zip file layer : str name of layer within dataset name : str, optional (default: "") Name of area of interest (included in output report) Returns ------- str path to output file Raises ------ DataError Raised if bounds are too large or if area of interest doesn't overalap SA region """ errors = [] await set_progress(ctx["job_id"], 0, "Loading data") path = f"/vsizip/{zip_filename}/{dataset}" df = read_dataframe(path, layer=layer) geometry = pg.make_valid(df.geometry.values.data) await set_progress(ctx["job_id"], 5, "Preparing area of interest") # dissolve geometry = np.asarray([pg.union_all(geometry)]) geo_geometry = to_crs(geometry, df.crs, GEO_CRS) bounds = pg.total_bounds(geo_geometry) # estimate area extent_area = ( pg.area(pg.box(*pg.total_bounds(to_crs(geometry, df.crs, DATA_CRS)))) * M2_ACRES) if extent_area >= CUSTOM_REPORT_MAX_ACRES: raise DataError( f"The bounding box of your area of interest is too large ({extent_area:,.0f} acres), it must be < {CUSTOM_REPORT_MAX_ACRES:,.0f} acres." ) await set_progress(ctx["job_id"], 10, "Calculating results (this might take a while)") # calculate results, data must be in DATA_CRS print("Calculating results...") results = CustomArea(geometry, df.crs, name).get_results() if results is None: raise DataError( "area of interest does not overlap Southeast Blueprint") if name: results["name"] = name has_urban = "proj_urban" in results and results["proj_urban"][4] > 0 has_slr = "slr" in results has_ownership = "ownership" in results has_protection = "protection" in results # compile indicator IDs across all inputs indicators = [] for input_area in results["inputs"]: for ecosystem in input_area.get("ecosystems", []): indicators.extend([i["id"] for i in ecosystem["indicators"]]) await set_progress(ctx["job_id"], 25, "Creating maps (this might take a while)") print("Rendering maps...") maps, scale, map_errors = await render_maps( bounds, geometry=geo_geometry[0], input_ids=results["input_ids"], indicators=indicators, urban=has_urban, slr=has_slr, ownership=has_ownership, protection=has_protection, ) if map_errors: log.error(f"Map rendering errors: {map_errors}") if "basemap" in map_errors: errors.append("Error creating basemap for all maps") if "aoi" in map_errors: errors.append("Error rendering area of interest on maps") if set(map_errors.keys()).difference(["basemap", "aoi"]): errors.append("Error creating one or more maps") await set_progress(ctx["job_id"], 75, "Creating PDF (this might take a while)", errors=errors) results["scale"] = scale pdf = create_report(maps=maps, results=results) await set_progress(ctx["job_id"], 95, "Nearly done", errors=errors) fp, name = tempfile.mkstemp(suffix=".pdf", dir=TEMP_DIR) with open(fp, "wb") as out: out.write(pdf) await set_progress(ctx["job_id"], 100, "All done!", errors=errors) log.debug(f"Created PDF at: {name}") return name, errors
# {"name": "TriState area at junction of MO, OK, KS", "path": "TriState"}, # {"name": "Quincy, FL area", "path": "Quincy"}, # {"name": "Doyle Springs, TN area", "path": "DoyleSprings"}, # {"name": "Cave Spring, VA area", "path": "CaveSpring"}, # {"name": "South Atlantic Offshore", "path": "SAOffshore"}, # {"name": "Florida Offshore", "path": "FLOffshore"} ] for aoi in aois: name = aoi["name"] path = aoi["path"] print(f"Creating report for {name}...") start = time() df = read_dataframe(f"examples/{path}.shp", columns=[]) geometry = pg.make_valid(df.geometry.values.data) # dissolve geometry = np.asarray([pg.union_all(geometry)]) extent_area = ( pg.area(pg.box(*pg.total_bounds(to_crs(geometry, df.crs, DATA_CRS)))) * M2_ACRES) print("Area of extent", extent_area.round()) ### calculate results, data must be in DATA_CRS print("Calculating results...") results = CustomArea(geometry, df.crs, name=name).get_results() if results is None: print(f"AOI: {path} does not overlap Blueprint")
def test_make_valid_1d(geom, expected): actual = pygeos.make_valid(geom) # normalize needed to handle variation in output across GEOS versions assert np.all(pygeos.normalize(actual) == pygeos.normalize(expected))
tile_dir = data_dir / "for_tiles" if not out_dir.exists(): os.makedirs(out_dir) if not tile_dir.exists(): os.makedirs(tile_dir) ### Extract the boundary bnd_df = read_dataframe( src_dir / "blueprint/SE_Blueprint_2021_Vectors.gdb", layer="SECAS_Boundary_2021_20211117", )[["geometry"]] # boundary has multiple geometries, union together and cleanup bnd_df = gp.GeoDataFrame( geometry=[pg.union_all(pg.make_valid(bnd_df.geometry.values.data))], index=[0], crs=bnd_df.crs, ) bnd_df.to_feather(out_dir / "se_boundary.feather") write_dataframe(bnd_df, data_dir / "boundaries/se_boundary.fgb") # create GeoJSON for tiling bnd_geo = bnd_df.to_crs(GEO_CRS) write_dataframe(bnd_geo, tile_dir / "se_boundary.geojson", driver="GeoJSONSeq") ### Create mask by cutting SA bounds out of world bounds print("Creating mask...") world = pg.box(-180, -85, 180, 85) mask = pg.normalize(pg.difference(world, bnd_geo.geometry.values.data))
from analysis.lib.pygeos_util import explode src_dir = Path("source_data") data_dir = Path("data") out_dir = data_dir / "inputs/boundaries" # used as inputs for other steps tile_dir = data_dir / "for_tiles" sa_df = read_dataframe(src_dir / "boundaries/SABlueprint2020_Extent.shp") ### Create mask by cutting SA bounds out of world bounds print("Creating mask...") world = pg.box(-180, -85, 180, 85) # boundary has self-intersections and 4 geometries, need to clean up bnd = pg.union_all(pg.make_valid(sa_df.geometry.values.data)) bnd_geo = pg.union_all( pg.make_valid(sa_df.to_crs(GEO_CRS).geometry.values.data)) mask = pg.normalize(pg.difference(world, bnd_geo)) gp.GeoDataFrame(geometry=[bnd], crs=DATA_CRS).to_feather(out_dir / "sa_boundary.feather") write_dataframe( gp.GeoDataFrame({"geometry": bnd_geo}, index=[0], crs=GEO_CRS), tile_dir / "sa_boundary.geojson", driver="GeoJSONSeq", ) write_dataframe( gp.GeoDataFrame({"geometry": mask}, index=[0], crs=GEO_CRS),
def summarize_by_areas(df, state, rank_only=False): """Calculate acres by value and area-weighted value for each CHAT field in fields. Parameters ---------- df : GeoDataFrame area(s) of interest state : str, one of ['ok', 'tx'] rank_only : bool (default False) if True, will only calculate areas for CHAT Rank Returns ------- DataFrame columns for total_acres, analysis_acrs, chat_acres, and avg (bare) and _x suffixed fields for each field """ if not df.index.name: df.index.name = "index" index_name = df.index.name df = df.reset_index() chat_df = gp.read_feather(chat_dir / f"{state}chat.feather") fields = ["chatrank"] if not rank_only: fields += [e["id"] for e in INPUTS[f"{state}chat"]["indicators"]] print("Intersecting with CHAT...") chat_df = intersection(df, chat_df) chat_df["acres"] = pg.area(chat_df.geometry_right.values.data) * M2_ACRES chat_df = chat_df.loc[chat_df.acres > 0].copy() if not len(chat_df): return None # total_acres = chat_df.groupby(index_name).geometry.first() total_acres = df.loc[df[index_name].isin(chat_df[index_name])].set_index(index_name) total_acres["total_acres"] = pg.area(total_acres.geometry.values.data) * M2_ACRES results = pd.DataFrame( chat_df.groupby(index_name).acres.sum().rename("chat_acres") ).join(total_acres[["total_acres"]], how="left") # intersect edge units with SE input areas to determine areas outside edge_df = explode( df.loc[ df[index_name].isin( results.loc[(results.chat_acres < results.total_acres - 1)].index ) ].copy()[[index_name, "geometry"]] ) print("Intersecting with input areas, this may take a while...") input_df = gp.read_feather(input_filename).reset_index(drop=True) # this is inverted because input_df performs better if prepared (left side) # note: we don't do intersection() here because of topology errors left = pd.Series(input_df.geometry.values.data, index=input_df.index) right = pd.Series(edge_df.geometry.values.data, index=edge_df.index) intersects = sjoin_geometry(left, right, predicate="intersects") tmp = input_df.loc[intersects.index.unique()] # have to make valid first or fails with topology errors tmp.geometry = pg.make_valid(tmp.geometry.values.data) # clip to general area, otherwise intersection takes a way long time clip_box = pg.box(*pg.total_bounds(edge_df.geometry.values.data)) tmp.geometry = pg.intersection(tmp.geometry.values.data, clip_box) tmp = tmp.join(intersects, how="inner").join( edge_df, on="index_right", rsuffix="_right" ) tmp.geometry_right = pg.intersection( tmp.geometry.values.data, tmp.geometry_right.values.data ) tmp["acres"] = pg.area(tmp.geometry_right.values.data) * M2_ACRES analysis_acres = ( tmp.groupby(index_name) .acres.sum() .round(ACRES_PRECISION) .rename("analysis_acres") ) # join analysis acres back to results results = results.join(analysis_acres) results.loc[results.analysis_acres.isnull(), "analysis_acres"] = results.total_acres area_results = dict() avg_results = dict() for field in fields: # Note: values are categorical, so this will add 0 area values for each category grouped = ( chat_df.groupby([index_name, field]) .acres.sum() .fillna(0) .round(ACRES_PRECISION) .reset_index() ) # create an array of [<acres for value 0>, <acres for value 1>,... ] area_results[field] = grouped.groupby(index_name).acres.apply(np.array) # exclude nodata to calculate area-weighted average values = grouped.loc[grouped[field] > 0].set_index(index_name) total_acres = values.groupby(level=0).acres.sum().rename("total") values = values.join(total_acres) values["wtd_value"] = (values.acres / values.total) * values[field].astype( "uint8" ) avg_results[field] = values.groupby(level=0).wtd_value.sum().round(1) area_results = pd.DataFrame(area_results) avg_results = pd.DataFrame(avg_results) results = results.join(avg_results).fillna(0) for field in fields: # convert areas array to columns s = area_results[field].apply(pd.Series) s.columns = [f"{field}_{c}" for c in s.columns] # drop any that are all 0; these are not present s = s.drop(columns=s.columns[s.max() == 0].tolist()) results = results.join(s) return results
message=".*initial implementation of Parquet.*") from analysis.constants import DATA_CRS, GEO_CRS, M2_ACRES src_dir = Path("source_data") data_dir = Path("data") analysis_dir = data_dir / "inputs/summary_units" bnd_dir = data_dir / "boundaries" # GPKGs output for reference tile_dir = data_dir / "for_tiles" ### Extract the boundary sa_df = read_dataframe(src_dir / "boundaries/SABlueprint2020_Extent.shp")[["geometry"]] # boundary has self-intersections and 4 geometries, need to clean up bnd = pg.union_all(pg.make_valid(sa_df.geometry.values.data)) ### Extract HUC12 within boundary print("Reading source HUC12s...") merged = None for huc2 in [2, 3, 5, 6]: df = read_dataframe( src_dir / f"summary_units/WBD_0{huc2}_HU2_GDB/WBD_0{huc2}_HU2_GDB.gdb", layer="WBDHU12", )[["huc12", "name", "geometry"]].rename(columns={"huc12": "id"}) if merged is None: merged = df else: merged = merged.append(df, ignore_index=True)
def test_make_valid_none(): actual = pygeos.make_valid(None) assert actual is None
def test_make_valid(geom, expected): actual = pygeos.make_valid(geom) assert actual is not expected # normalize needed to handle variation in output across GEOS versions assert pygeos.normalize(actual) == expected
### Process waterbodies # only keep that intersect flowlines print(f"Extracted {len(waterbodies):,} NWI lakes and ponds") left, right = tree.query_bulk(waterbodies.geometry.values.data, predicate="intersects") waterbodies = waterbodies.iloc[np.unique(left)].reset_index(drop=True) print(f"Kept {len(waterbodies):,} that intersect flowlines") # TODO: explode, repair, dissolve, explode, reset index waterbodies = explode(waterbodies) # make valid ix = ~pg.is_valid(waterbodies.geometry.values.data) if ix.sum(): print(f"Repairing {ix.sum():,} invalid waterbodies") waterbodies.loc[ix, "geometry"] = pg.make_valid( waterbodies.loc[ix].geometry.values.data) # note: nwi_code, nwi_type are discarded here since they aren't used later print("Dissolving adjacent waterbodies") waterbodies = dissolve(waterbodies, by=["altered"]) waterbodies = explode(waterbodies).reset_index(drop=True) waterbodies["km2"] = pg.area(waterbodies.geometry.values.data) / 1e6 waterbodies.to_feather(huc2_dir / "waterbodies.feather") write_dataframe(waterbodies, huc2_dir / "waterbodies.gpkg") ### Process riverine print(f"Extracted {len(rivers):,} NWI altered river polygons") left, right = tree.query_bulk(rivers.geometry.values.data, predicate="intersects")
def convert_to_polygon(input_array, trim_invalid_geometry=False, autocorrect_invalid_geometry=False): r"""Convert an input array to a Polygon array. Args: input_array (ndarray, list): A ndarray of Polygons optionally followed by a confidence value and/or a label where each row is: ``[[[outer_ring], [inner_rings]], (confidence), (label)]`` trim_invalid_geometry (bool): Optional, default to ``False``. If set to ``True`` conversion will ignore invalid geometries and leave them out of ``output_array``. This means that the function will return an array where ``output_array.shape[0] <= input_array.shape[0]``. If set to ``False``, an invalid geometry will raise an :exc:`~playground_metrics.utils.geometry_utils.InvalidGeometryError`. autocorrect_invalid_geometry (Bool): Optional, default to ``False``. Whether to attempt correcting a faulty geometry to form a valid one. If set to ``True`` and the autocorrect attempt is unsuccessful, it falls back to the behaviour defined in ``trim_invalid_geometry``. Note: * Polygon auto-correction only corrects self-crossing exterior rings, in which case it creates one Polygon out of every simple ring which might be extracted from the original Polygon exterior. * Polygon auto-correction will systematically fail on Polygons with at least one inner ring. Returns: ndarray: A Polygon ndarray where each row contains a geometry followed by optionally confidence and a label e.g.: ``[Polygon, (confidence), (label)]`` Raises: ValueError: If ``input_array`` have invalid dimensions. """ input_array = np.array(input_array, dtype=np.dtype('O')) if input_array.size == 0: return 'undefined', input_array if (len(input_array.shape) == 1 or len(input_array.shape) > 2) and \ (not len(input_array.shape) == 5 and not len(input_array.shape) == 3): raise ValueError('Invalid array number of dimensions: ' 'Expected a 2D array, found {}D.'.format( len(input_array.shape))) if len(input_array.shape) == 5 and not input_array.shape[4] == 2: raise ValueError('Invalid array fifth dimension: ' 'Expected 2, found {}.'.format(len( input_array.shape))) elif len(input_array.shape) == 3 and not input_array.shape[2] == 1: raise ValueError('Invalid array third dimension: ' 'Expected 1, found {}.'.format(len( input_array.shape))) object_array = np.ndarray((input_array.shape[0], input_array.shape[1]), dtype=np.dtype('O')) for i, coordinate in enumerate(input_array[:, 0]): line = [polygons(np.array(coordinate[0], dtype=np.float64), np.array(coordinate[1:], dtype=np.float64))] \ if len(coordinate) > 1 else [polygons(np.array(coordinate[0], dtype=np.float64))] line.extend(input_array[i, 1:]) object_array[i] = np.array(line, dtype=np.dtype('O')) if autocorrect_invalid_geometry: object_array[:, 0] = _clean_multi_geometries( make_valid(object_array[:, 0])) if trim_invalid_geometry: object_array = object_array[is_valid(object_array[:, 0]), :] if not np.all(is_type(object_array[:, 0], GeometryType.POLYGON)): raise ValueError( 'Conversion is impossible: Some geometries could not be converted to valid polygons.' ) return object_array
def test_make_valid(geom, expected): actual = pygeos.make_valid(geom) assert actual is not expected assert actual == expected
def test_make_valid_1d(geom, expected): actual = pygeos.make_valid(geom) assert np.all(actual == expected)
ui_dir = Path("ui/data") state_filename = data_dir / "boundaries/source/tl_2019_us_state/tl_2019_us_state.shp" wbd_gdb = data_dir / "nhd/source/wbd/WBD_National_GDB/WBD_National_GDB.gdb" ### Construct region and SARP boundaries from states print("Processing states...") state_df = (read_dataframe( state_filename, columns=["STUSPS", "STATEFP", "NAME"], ).to_crs(CRS).rename(columns={ "STUSPS": "id", "NAME": "State", "STATEFP": "STATEFIPS" })) state_df.geometry = pg.make_valid(state_df.geometry.values.data) # save all states for spatial joins state_df.to_feather(out_dir / "states.feather") state_df = state_df.loc[state_df.id.isin(STATES.keys())].copy() state_df.to_feather(out_dir / "region_states.feather") write_dataframe( state_df[["State", "geometry"]].rename(columns={"State": "id"}), out_dir / "region_states.gpkg", ) # dissolve to create outer state boundary for total analysis area and regions bnd_df = gp.GeoDataFrame( [ {
nhd_dams = dissolve( explode(nhd_dams), by=["HUC2", "source", "group"], agg={ "GNIS_Name": lambda n: ", ".join({s for s in n if s}), # set missing NHD fields as 0 "FType": lambda n: ", ".join({str(s) for s in n}), "FCode": lambda n: ", ".join({str(s) for s in n}), "NHDPlusID": lambda n: ", ".join({str(s) for s in n}), }, ).reset_index(drop=True) # fill in missing values nhd_dams.GNIS_Name = nhd_dams.GNIS_Name.fillna("") nhd_dams.geometry = pg.make_valid(nhd_dams.geometry.values.data) nhd_dams["damID"] = nhd_dams.index.copy() nhd_dams.damID = nhd_dams.damID.astype("uint32") nhd_dams = nhd_dams.set_index("damID") merged = None for huc2 in huc2s: region_start = time() print(f"----- {huc2} ------") dams = nhd_dams.loc[nhd_dams.HUC2 == huc2, ["geometry"]].copy() print("Reading flowlines...")
else: merged = merged.append(df, ignore_index=True) print("Projecting to match SE region data...") huc12 = merged.to_crs(DATA_CRS) # select out those within the SE states print("Selecting HUC12s in region...") tree = pg.STRtree(huc12.geometry.values.data) ix = tree.query(bnd, predicate="intersects") huc12 = huc12.iloc[ix].copy().reset_index(drop=True) # make sure data are valid huc12["geometry"] = pg.make_valid(huc12.geometry.values.data) # calculate area huc12["acres"] = (pg.area(huc12.geometry.values.data) * M2_ACRES).round().astype("uint") # for those that touch the edge of the region, drop any that are not >= 50% in # raster input area. We are not able to use polygon intersection because it # takes too long. tree = pg.STRtree(huc12.geometry.values.data) ix = tree.query(bnd, predicate="contains") edge_df = huc12.loc[~huc12.id.isin(huc12.iloc[ix].id)].copy() geometries = pd.Series(edge_df.geometry.values.data, index=edge_df.id) drop_ids = [] for id, geometry in Bar( "Calculating HUC12 overlap with input area", max=len(geometries)