def test_polygons_geofeather(tmpdir, pg_polygons_wgs84): """Confirm that we can round-trip polygons to / from feather file""" filename = tmpdir / "polygons_wgs84.feather" to_geofeather(pg_polygons_wgs84, filename, crs="EPSG:4326") assert os.path.exists(filename) df = from_geofeather(filename) cols = df.columns.drop("geometry") assert_frame_equal(df[cols], pg_polygons_wgs84[cols]) assert_geometry_equal(df.geometry, pg_polygons_wgs84.geometry) assert df.crs == GEO_CRS
def test_points_geofeather_no_crs(tmpdir, pg_points_wgs84): """Confirm that we can round-trip points to / from feather file""" filename = tmpdir / "points_wgs84.feather" to_geofeather(pg_points_wgs84, filename) assert os.path.exists(filename) with pytest.warns(UserWarning): df = from_geofeather(filename) cols = df.columns.drop("geometry") assert_frame_equal(df[cols], pg_points_wgs84[cols]) assert_geometry_equal(df.geometry, pg_points_wgs84.geometry) assert df.crs == None
def snap_to_large_waterbodies(df, to_snap): """Snap to nearest large waterbody. NOTE: only run this on dams that could not snap to flowlines, to avoid moving them far away. This captures large dam centerpoints that are not near enough to flowlines. Updates df with snapping results, and returns to_snap as set of dams still needing to be snapped after this operation. Parameters ---------- df : GeoDataFrame master dataset, this is where all snapping gets recorded to_snap : DataFrame data frame containing pygeos geometries to snap ("geometry") and snapping tolerance ("snap_tolerance") Returns ------- tuple of (GeoDataFrame, DataFrame) (df, to_snap) """ wb = from_geofeather(nhd_dir / "merged" / "large_waterbodies.feather").set_index( "wbID" ) drains = ( from_geofeather(nhd_dir / "merged" / "large_waterbody_drain_points.feather") .rename(columns={"id": "drainID"}) .set_index("drainID") ) near_wb = nearest(to_snap.geometry, pg.boundary(wb.geometry), NEAR_WB_TOLERANCE) near_wb = ( pd.DataFrame(near_wb) .join(to_snap.geometry) .join( drains.reset_index() .set_index("wbID")[["geometry", "drainID", "lineID"]] .rename(columns={"geometry": "drain"}), on="wbID", ) .dropna(subset=["drain"]) ) near_wb["snap_dist"] = pg.distance(near_wb.geometry, near_wb.drain) # drop any that are > 250 m away, these aren't useful near_wb = near_wb.loc[near_wb.snap_dist <= WB_DRAIN_MAX_TOLERANCE].copy() # take the closest drain point near_wb = near_wb.sort_values(by="snap_dist").groupby(level=0).first() ix = near_wb.index df.loc[ix, "snapped"] = True df.loc[ix, "geometry"] = near_wb.drain df.loc[ix, "snap_dist"] = near_wb.distance df.loc[ix, "snap_ref_id"] = near_wb.drainID df.loc[ix, "lineID"] = near_wb.lineID df.loc[ix, "wbID"] = near_wb.wbID df.loc[ix, "snap_log"] = ndarray_append_strings( "snapped: within ", WB_DRAIN_MAX_TOLERANCE, "m tolerance of drain point of large waterbody that is within ", NEAR_WB_TOLERANCE, "m of dam", ) to_snap = to_snap.loc[~to_snap.index.isin(ix)].copy() print( "Found {:,} dams within {}m of large waterbodies and within {}m of the drain point of those waterbodies".format( len(near_wb), NEAR_WB_TOLERANCE, WB_DRAIN_MAX_TOLERANCE ) ) return df, to_snap
def snap_to_flowlines(df, to_snap): """Snap to nearest flowline, within tolerance Updates df with snapping results, and returns to_snap as set of dams still needing to be snapped after this operation. Parameters ---------- df : GeoDataFrame master dataset, this is where all snapping gets recorded to_snap : DataFrame data frame containing pygeos geometries to snap ("geometry") and snapping tolerance ("snap_tolerance") Returns ------- tuple of (GeoDataFrame, DataFrame) (df, to_snap) """ for region, HUC2s in list(REGION_GROUPS.items()): region_start = time() print("\n----- {} ------\n".format(region)) print("Reading flowlines...") flowlines = from_geofeather( nhd_dir / "clean" / region / "flowlines.feather" ).set_index("lineID") in_region = to_snap.loc[to_snap.HUC2.isin(HUC2s)] print( "Selected {:,} barriers in region to snap against {:,} flowlines".format( len(in_region), len(flowlines) ) ) if len(in_region) == 0: print("No barriers in region to snap") continue print("Finding nearest flowlines...") # TODO: can use near instead of nearest, and persist list of near lineIDs per barrier # so that we can construct subnetworks with just those lines = nearest( in_region.geometry, flowlines.geometry, in_region.snap_tolerance ) lines = lines.join(in_region.geometry).join( flowlines.geometry.rename("line"), on="lineID", ) # project the point to the line, # find out its distance on the line, # then interpolate its new coordinates lines["geometry"] = pg.line_interpolate_point( lines.line, pg.line_locate_point(lines.line, lines.geometry) ) ix = lines.index df.loc[ix, "snapped"] = True df.loc[ix, "geometry"] = lines.geometry df.loc[ix, "snap_dist"] = lines.distance df.loc[ix, "snap_ref_id"] = lines.lineID df.loc[ix, "lineID"] = lines.lineID df.loc[ix, "snap_log"] = ndarray_append_strings( "snapped: within ", to_snap.loc[ix].snap_tolerance, "m tolerance of flowline", ) to_snap = to_snap.loc[~to_snap.index.isin(ix)].copy() print( "{:,} barriers snapped in region in {:.2f}s".format( len(ix), time() - region_start ) ) # TODO: flag those that joined to loops return df, to_snap
def snap_to_nhd_dams(df, to_snap): """Attempt to snap points from to_snap to NHD dams. Updates df with snapping results, and returns to_snap as set of dams still needing to be snapped after this operation. Parameters ---------- df : GeoDataFrame master dataset, this is where all snapping gets recorded to_snap : DataFrame data frame containing pygeos geometries to snap ("geometry") and snapping tolerance ("snap_tolerance") Returns ------- tuple of (GeoDataFrame, DataFrame) (df, to_snap) """ print("Snapping to NHD dams...") # NOTE: id is not unique for points nhd_dams_poly = ( from_geofeather(nhd_dir / "merged" / "nhd_dams_poly.feather") .rename(columns={"id": "damID"}) .set_index("damID") .drop(columns=["index"], errors="ignore") ) nhd_dams = ( from_geofeather(nhd_dir / "merged" / "nhd_dams_pt.feather") .rename(columns={"id": "damID"}) .set_index("damID") .drop(columns=["index"], errors="ignore") ) # set nulls back to na nhd_dams.wbID = nhd_dams.wbID.replace(-1, np.nan) ### Find dams that are really close (50m) to NHD dam polygons # Those that have multiple dams nearby are usually part of a dam complex snap_start = time() near_nhd = nearest( to_snap.geometry, nhd_dams_poly.geometry, distance=NHD_DAM_TOLERANCE )[["damID"]] # snap to nearest dam point for that dam (some are > 1 km away) # NOTE: this will create multiple entries for some dams near_nhd = near_nhd.join(to_snap.geometry.rename("source_pt")).join( nhd_dams, on="damID" ) near_nhd["snap_dist"] = pg.distance(near_nhd.geometry, near_nhd.source_pt) near_nhd = ( near_nhd.reset_index().sort_values(by=["id", "snap_dist"]).groupby("id").first() ) ix = near_nhd.index df.loc[ix, "snapped"] = True df.loc[ix, "geometry"] = near_nhd.geometry df.loc[ix, "snap_dist"] = near_nhd.snap_dist df.loc[ix, "snap_ref_id"] = near_nhd.damID df.loc[ix, "lineID"] = near_nhd.lineID df.loc[ix, "wbID"] = near_nhd.wbID df.loc[ix, "snap_log"] = ndarray_append_strings( "snapped: within ", NHD_DAM_TOLERANCE, "m of NHD dam polygon" ) to_snap = to_snap.loc[~to_snap.index.isin(ix)].copy() print( "Snapped {:,} dams to NHD dam polygons in {:.2f}s".format( len(ix), time() - snap_start ) ) ### Find dams that are close (within snapping tolerance) of NHD dam points snap_start = time() tmp = nhd_dams.reset_index() # reset index so we have unique index to join on near_nhd = nearest( to_snap.geometry, tmp.geometry, distance=to_snap.snap_tolerance ).rename(columns={"distance": "snap_dist"}) near_nhd = near_nhd.join(to_snap.geometry.rename("source_pt")).join( tmp, on="index_right" ) near_nhd = ( near_nhd.reset_index().sort_values(by=["id", "snap_dist"]).groupby("id").first() ) ix = near_nhd.index df.loc[ix, "snapped"] = True df.loc[ix, "geometry"] = near_nhd.geometry df.loc[ix, "snap_dist"] = near_nhd.snap_dist df.loc[ix, "snap_ref_id"] = near_nhd.damID df.loc[ix, "lineID"] = near_nhd.lineID df.loc[ix, "wbID"] = near_nhd.wbID df.loc[ix, "snap_log"] = ndarray_append_strings( "snapped: within ", to_snap.loc[ix].snap_tolerance, "m tolerance of NHD dam point but >", NHD_DAM_TOLERANCE, "m from NHD dam polygon", ) to_snap = to_snap.loc[~to_snap.index.isin(ix)].copy() print( "Snapped {:,} dams to NHD dam points in {:.2f}s".format( len(ix), time() - snap_start ) ) ### TODO: identify any NHD dam points that didn't get claimed (need to do this after snapping others) return df, to_snap
def snap_to_waterbodies(df, to_snap): """Attempt to snap points from to_snap to waterbody drain points. Updates df with snapping results, and returns to_snap as set of dams still needing to be snapped after this operation. Parameters ---------- df : GeoDataFrame master dataset, this is where all snapping gets recorded to_snap : DataFrame data frame containing pygeos geometries to snap ("geometry") and snapping tolerance ("snap_tolerance") Returns ------- tuple of (GeoDataFrame, DataFrame) (df, to_snap) """ ### Attempt to snap to waterbody drain points for major waterbodies # Use larger tolerance for larger waterbodies print("Snapping to waterbodies and drain points..") wb = from_geofeather(nhd_dir / "merged" / "waterbodies.feather").set_index("wbID") drains = ( from_geofeather(nhd_dir / "merged" / "waterbody_drain_points.feather") .rename(columns={"id": "drainID"}) .set_index("drainID") ) ### First pass - find the dams that are contained by waterbodies contained_start = time() in_wb = sjoin(to_snap, wb, how="inner").index_right.rename("wbID") # update wbID in dataset, but this doesn't mean it is snapped ix = in_wb.index df.loc[ix, "wbID"] = in_wb print( "Found {:,} dams in waterbodies in {:.2f}s".format( len(in_wb), time() - contained_start ) ) print("Finding nearest drain points...") snap_start = time() # join back to pygeos geoms and join to drains # NOTE: this may produce multiple drains for some waterbodies in_wb = ( pd.DataFrame(in_wb) .join(to_snap[["geometry", "snap_tolerance"]]) .join( drains.reset_index() .set_index("wbID")[["geometry", "drainID", "lineID"]] .rename(columns={"geometry": "drain"}), on="wbID", ) .dropna(subset=["drain"]) ) in_wb["snap_dist"] = pg.distance(in_wb.geometry, in_wb.drain) # drop any that are > 500 m away, these aren't useful in_wb = in_wb.loc[in_wb.snap_dist <= 500].copy() # take the closest drain point in_wb.index.name = "index" in_wb = ( in_wb.reset_index() .sort_values(by=["index", "snap_dist"]) .groupby("index") .first() ) # Any that are within the snap tolerance just snap to that drain close_enough = in_wb.loc[in_wb.snap_dist <= in_wb.snap_tolerance] ix = close_enough.index df.loc[ix, "snapped"] = True df.loc[ix, "geometry"] = close_enough.drain df.loc[ix, "snap_dist"] = close_enough.snap_dist df.loc[ix, "snap_ref_id"] = close_enough.drainID df.loc[ix, "lineID"] = close_enough.lineID df.loc[ix, "wbID"] = close_enough.wbID df.loc[ix, "snap_log"] = ndarray_append_strings( "snapped: within ", to_snap.loc[ix].snap_tolerance, "m tolerance of drain point for waterbody that contains this dam", ) to_snap = to_snap.loc[~to_snap.index.isin(ix)].copy() print( "Found {:,} dams within tolerance of the drain points for their waterbody in {:.2f}s".format( len(ix), time() - snap_start ) ) # Any that are > tolerance away from their own drain, but within tolerance of another drain # should snap to the other drain; these are in chains of multiple waterbodies. # Visually confirmed this by looking at several. snap_start = time() further = in_wb.loc[in_wb.snap_dist > in_wb.snap_tolerance].copy() nearest_drains = nearest(further.geometry, drains.geometry, further.snap_tolerance) maybe_near_neighbor = further.join(nearest_drains, rsuffix="_nearest") ix = maybe_near_neighbor.loc[ maybe_near_neighbor.distance < maybe_near_neighbor.snap_dist ].index near_neighbor = ( ( maybe_near_neighbor.loc[ix] .drop(columns=["drain", "drainID", "wbID", "lineID", "snap_dist"]) .rename(columns={"drainID_nearest": "drainID", "distance": "snap_dist"}) .join( drains[["geometry", "lineID", "wbID"]].rename( columns={"geometry": "drain"} ), on="drainID", ) ) .sort_values(by="snap_dist") .groupby(level=0) .first() ) df.loc[ix, "snapped"] = True df.loc[ix, "geometry"] = near_neighbor.drain df.loc[ix, "snap_dist"] = near_neighbor.snap_dist df.loc[ix, "snap_ref_id"] = near_neighbor.drainID df.loc[ix, "lineID"] = near_neighbor.lineID df.loc[ix, "wbID"] = near_neighbor.wbID df.loc[ix, "snap_log"] = ndarray_append_strings( "snapped: within ", to_snap.loc[ix].snap_tolerance, "m tolerance of drain point for adjacent waterbody", ) to_snap = to_snap.loc[~to_snap.index.isin(ix)].copy() print( "Found {:,} dams close to drain points for an adjacent waterbody in {:.2f}s".format( len(ix), time() - snap_start ) ) # Any that remain and are < 250 in their waterbody snap to nearest drain further = further.loc[ ~further.index.isin(ix) & (further.snap_dist <= WB_DRAIN_MAX_TOLERANCE) ].copy() ix = further.index df.loc[ix, "snapped"] = True df.loc[ix, "geometry"] = further.drain df.loc[ix, "snap_dist"] = further.snap_dist df.loc[ix, "snap_ref_id"] = further.drainID df.loc[ix, "lineID"] = further.lineID df.loc[ix, "wbID"] = further.wbID df.loc[ix, "snap_log"] = ndarray_append_strings( "snapped: within ", to_snap.loc[ix].snap_tolerance, "-", WB_DRAIN_MAX_TOLERANCE, "m tolerance of drain point of waterbody that contains this dam", ) to_snap = to_snap.loc[~to_snap.index.isin(ix)].copy() print( "Found {:,} dams within <{}m of the drain points for their waterbody".format( len(ix), WB_DRAIN_MAX_TOLERANCE ) ) ### Find the ones that are not in a waterbody but within tolerance of a drain # Visually inspected several that had multiple waterbodies nearby # in all cases, the nearest one was sufficient print("Finding nearest waterbody drains for unsnapped dams...") snap_start = time() nearest_drains = nearest(to_snap.geometry, drains.geometry, to_snap.snap_tolerance) nearest_drains = nearest_drains.join(to_snap.geometry).join( drains[["geometry", "wbID", "lineID"]].rename(columns={"geometry": "drain"}), on="drainID", ) ix = nearest_drains.index df.loc[ix, "snapped"] = True df.loc[ix, "geometry"] = nearest_drains.drain df.loc[ix, "snap_dist"] = nearest_drains.distance df.loc[ix, "snap_ref_id"] = nearest_drains.drainID df.loc[ix, "lineID"] = nearest_drains.lineID df.loc[ix, "wbID"] = nearest_drains.wbID df.loc[ix, "snap_log"] = ndarray_append_strings( "snapped: within ", to_snap.loc[ix].snap_tolerance, "m tolerance of drain point of waterbody (dam not in waterbody)", ) to_snap = to_snap.loc[~to_snap.index.isin(ix)].copy() print( "Found {:,} dams within {}m of waterbody drain points".format( len(ix), to_snap.snap_tolerance.max() ) ) # TODO: need to track which waterbodies were claimed by dams return df, to_snap
df["dup_count"] = np.nan df["dup_log"] = "not a duplicate" df["dup_sort"] = 0 # not meaningful for waterfalls df["ManualReview"] = 0 # not meaningful for waterfalls dedup_start = time() df, to_dedup = find_duplicates(df, to_dedup=df.copy(), tolerance=DUPLICATE_TOLERANCE) print("Found {:,} total duplicates in {:.2f}s".format( len(df.loc[df.duplicate]), time() - dedup_start)) ### Deduplicate by dams # any that are within duplicate tolerance of dams may be duplicating those dams dams = from_geofeather(master_dir / "dams.feather") near_dams = nearest(df.geometry, dams.geometry, DUPLICATE_TOLERANCE) ix = near_dams.index df.loc[ix, "duplicate"] = True df.loc[ix, "dup_log"] = "Within {}m of an existing dam".format(DUPLICATE_TOLERANCE) print("Found {} waterfalls within {}m of dams".format(len(ix), DUPLICATE_TOLERANCE)) ### Join to line atts flowlines = deserialize_dfs( [ nhd_dir / "clean" / region / "flowlines.feather" for region in REGION_GROUPS
from analysis.constants import REGION_GROUPS, CRS_WKT data_dir = Path("data") networks_dir = data_dir / "networks" df = deserialize_dfs([ networks_dir / region / "small_barriers/barriers_network.feather" for region in REGION_GROUPS ], ) networkIDs = df.loc[df.kind == "small_barrier"].upNetID.unique() for region in list(REGION_GROUPS.keys()): print("\n----------------\n processing {}".format(region)) networks = from_geofeather(networks_dir / region / "small_barriers" / "network.feather") # Extract only the networks associated with small barriers, the rest are dams networks = networks.loc[networks.networkID.isin(networkIDs), ["networkID", "geometry"]] if len(networks) == 0: print("No small barriers in this region, skipping") continue print("Writing to GPKG") to_gpkg( networks.reset_index(drop=True), data_dir / "tiles" / "small_barriers_network{}".format(region), index=False, name="networks",
def add_spatial_joins(df): """Add spatial joins needed for network analysis. Parameters ---------- df : GeoDataFrame Returns ------- GeoDataFrame has fields added by spatial joins to other datasets """ print("Joining to HUC12") huc12 = from_geofeather(boundaries_dir / "HUC12.feather") df = spatial_join(df, huc12) # Expected: not all barriers fall cleanly within the states dataset print("{:,} barriers were not assigned HUC12".format( len(df.loc[df.HUC12.isnull()]))) # Calculate HUC codes for other levels from HUC12 df["HUC2"] = df["HUC12"].str.slice(0, 2) # region df["HUC6"] = df["HUC12"].str.slice(0, 6) # basin df["HUC8"] = df["HUC12"].str.slice(0, 8) # subbasin # Read in HUC6 and join in basin name huc6 = (from_geofeather(boundaries_dir / "HUC6.feather")[["HUC6", "NAME"]].rename(columns={ "NAME": "Basin" }).set_index("HUC6")) df = df.join(huc6, on="HUC6") print("Joining to counties") counties = from_geofeather(boundaries_dir / "counties.feather")[[ "geometry", "County", "COUNTYFIPS", "STATEFIPS" ]] df = spatial_join(df, counties) # Join in state name based on STATEFIPS from county states = deserialize_df(boundaries_dir / "states.feather")[["STATEFIPS", "State"]].set_index("STATEFIPS") df = df.join(states, on="STATEFIPS") # Expected: not all barriers fall cleanly within the states dataset print("{:,} barriers were not assigned states".format( len(df.loc[df.STATEFIPS.isnull()]))) ### Level 3 & 4 Ecoregions print("Joining to ecoregions") # Only need to join in ECO4 dataset since it has both ECO3 and ECO4 codes eco4 = from_geofeather(boundaries_dir / "eco4.feather")[["geometry", "ECO3", "ECO4"]] df = spatial_join(df, eco4) # Expected: not all barriers fall cleanly within the ecoregions dataset print("{:,} barriers were not assigned ecoregions".format( len(df.loc[df.ECO4.isnull()]))) return df
nhd_dir = Path("data/nhd") src_dir = nhd_dir / "raw" start = time() for region, HUC2s in list(REGION_GROUPS.items())[4:]: region_start = time() print("\n----- {} ------\n".format(region)) out_dir = nhd_dir / "clean" / region if not os.path.exists(out_dir): os.makedirs(out_dir) print("Reading flowlines...") flowlines = from_geofeather(src_dir / region / "flowlines.feather").set_index("lineID") joins = deserialize_df(src_dir / region / "flowline_joins.feather") print("Read {:,} flowlines".format(len(flowlines))) ### Drop underground conduits ix = flowlines.loc[flowlines.FType == 420].index print("Removing {:,} underground conduits".format(len(ix))) flowlines = flowlines.loc[~flowlines.index.isin(ix)].copy() joins = remove_joins(joins, ix, downstream_col="downstream_id", upstream_col="upstream_id") ### Manual fixes for flowlines exclude_ids = EXCLUDE_IDS.get(region, []) if exclude_ids:
SNAP_TOLERANCE = 50 DUPLICATE_TOLERANCE = 10 # meters data_dir = Path("data") boundaries_dir = data_dir / "boundaries" nhd_dir = data_dir / "nhd" barriers_dir = data_dir / "barriers" src_dir = barriers_dir / "source" master_dir = barriers_dir / "master" snapped_dir = barriers_dir / "snapped" qa_dir = barriers_dir / "qa" start = time() df = from_geofeather(src_dir / "sarp_small_barriers.feather") print("Read {:,} small barriers".format(len(df))) ### Add IDs for internal use # internal ID df["id"] = df.index.astype("uint32") df = df.set_index("id", drop=False) ######### Fix data issues # Fix mixed casing of values for column in ("CrossingType", "RoadType", "Stream", "Road"): df[column] = df[column].fillna("Unknown").str.title().str.strip() df.loc[df[column].str.len() == 0, column] = "Unknown" # Fix line returns in stream name and road name
src_dir = barriers_dir / "source" master_dir = barriers_dir / "master" snapped_dir = barriers_dir / "snapped" qa_dir = barriers_dir / "qa" dams_filename = "Raw_Featureservice_SARPUniqueID.gdb" gdb = src_dir / dams_filename # dams that fall outside SARP outside_layer = "Dams_Non_SARP_States_09052019" start = time() ### Read in SARP states and merge print("Reading dams in SARP states") df = from_geofeather(src_dir / "sarp_dams.feather") print("Read {:,} dams in SARP states".format(len(df))) ### Read in non-SARP states and join in # these are for states that overlap with HUC4s that overlap with SARP states print( "Reading dams that fall outside SARP states, but within HUC4s that overlap with SARP states..." ) outside_df = ( gp.read_file(gdb, layer=outside_layer) # SARPID is Old, use SARPUniqueID for it instead .drop(columns=["SARPID"]) .rename(columns={"SARPUniqueID": "SARPID", "Snap2018": "ManualReview"})[ DAM_COLS + ["geometry"] ] .to_crs(CRS)
from analysis.pygeos_compat import sjoin_geometry as sjoin, dissolve, to_gdf from analysis.constants import REGION_GROUPS, CRS from analysis.util import append from analysis.prep.barriers.lib.points import nearest, neighborhoods nhd_dir = Path("data/nhd") src_dir = nhd_dir / "clean" out_dir = nhd_dir / "merged" extra_dir = nhd_dir / "extra" start = time() ### Merge NHD lines and areas that represent dams and dam-related features print("Reading NHD lines and areas, and merging...") nhd_lines = from_geofeather(extra_dir / "nhd_lines.feather") nhd_lines = nhd_lines.loc[(nhd_lines.FType.isin([343, 369, 398])) & nhd_lines.geometry.notnull()].copy() # create buffers (10m) to merge with NHD areas # from visual inspection, this helps coalesce those that are in pairs nhd_lines["geometry"] = pg.buffer(nhd_lines.geometry, 10, quadsegs=1) # All NHD areas indicate a dam-related feature nhd_areas = from_geofeather(extra_dir / "nhd_areas.feather") nhd_areas = nhd_areas.loc[nhd_areas.geometry.notnull()].copy() # buffer polygons slightly so we can dissolve touching ones together. nhd_areas["geometry"] = pg.buffer(nhd_areas.geometry, 5) # Dissolve adjacent nhd lines and waterbodies together nhd_dams = nhd_lines.append(nhd_areas, ignore_index=True, sort=False) nearby = sjoin(nhd_dams.geometry, nhd_dams.geometry, how="inner")
from analysis.constants import REGION_GROUPS, CRS_WKT data_dir = Path("data") networks_dir = data_dir / "networks" df = deserialize_dfs([ networks_dir / region / "dams/barriers_network.feather" for region in REGION_GROUPS ], ) networkIDs = df.loc[df.kind == "dam"].upNetID.unique() for region in list(REGION_GROUPS.keys()): print("\n----------------\n processing {}".format(region)) networks = from_geofeather(networks_dir / region / "dams" / "network.feather") # Extract only the networks associated with small barriers, the rest are dams networks = networks.loc[networks.networkID.isin(networkIDs), ["networkID", "geometry"]] if len(networks) == 0: print("No small barriers in this region, skipping") continue print("Writing to GPKG") to_gpkg( networks.reset_index(drop=True), data_dir / "tiles" / "dam_networks{}".format(region), index=False, name="networks",