def aggregate_stream_networks(nhdplus_vectors_dir, agg_nhd_headwaters_adj_fileName, agg_nhd_streams_adj_fileName, huc_list): for huc in huc_list: # aggregated final filenames nhd_agg_adj_huc_subset = os.path.join( nhdplus_vectors_dir, huc, 'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') nhd_agg_adj_headwaters_subset = os.path.join( nhdplus_vectors_dir, huc, 'nhd' + str(huc) + '_headwaters_adj.gpkg') if os.path.isfile(nhd_agg_adj_huc_subset): adj_nhd_streams_all = gpd.read_file(nhd_agg_adj_huc_subset) # Write out FR adjusted if os.path.isfile(agg_nhd_streams_adj_fileName): adj_nhd_streams_all.to_file( agg_nhd_streams_adj_fileName, driver=getDriver(agg_nhd_streams_adj_fileName), index=False, mode='a') else: adj_nhd_streams_all.to_file( agg_nhd_streams_adj_fileName, driver=getDriver(agg_nhd_streams_adj_fileName), index=False) del adj_nhd_streams_all if os.path.isfile(nhd_agg_adj_headwaters_subset): adj_nhd_headwater_points_all = gpd.read_file( nhd_agg_adj_headwaters_subset) # Write out FR adjusted if os.path.isfile(agg_nhd_headwaters_adj_fileName): adj_nhd_headwater_points_all.to_file( agg_nhd_headwaters_adj_fileName, driver=getDriver(agg_nhd_headwaters_adj_fileName), index=False, mode='a') else: adj_nhd_headwater_points_all.to_file( agg_nhd_headwaters_adj_fileName, driver=getDriver(agg_nhd_headwaters_adj_fileName), index=False) del adj_nhd_headwater_points_all
def convert_grid_cells_to_points(raster, index_option, output_points_filename=False): # Input raster if isinstance(raster, str): raster = rasterio.open(raster, 'r') elif isinstance(raster, rasterio.io.DatasetReader): pass else: raise TypeError("Pass raster dataset or filepath for raster") (upper_left_x, x_size, x_rotation, upper_left_y, y_rotation, y_size) = raster.get_transform() indices = np.nonzero(raster.read(1) >= 1) id = [None] * len(indices[0]) points = [None] * len(indices[0]) # Iterate over the Numpy points.. i = 1 for y_index, x_index in zip(*indices): x = x_index * x_size + upper_left_x + (x_size / 2 ) # add half the cell size y = y_index * y_size + upper_left_y + (y_size / 2 ) # to center the point points[i - 1] = Point(x, y) if index_option == 'reachID': reachID = np.array( list(raster.sample((Point(x, y).coords), indexes=1))).item( ) # check this; needs to add raster cell value + index id[i - 1] = reachID * 10000 + i #reachID + i/100 elif (index_option == 'featureID') | (index_option == 'pixelID'): id[i - 1] = i i += 1 pointGDF = gpd.GeoDataFrame({ 'id': id, 'geometry': points }, crs=PREP_PROJECTION, geometry='geometry') if output_points_filename == False: return pointGDF else: pointGDF.to_file(output_points_filename, driver=getDriver(output_points_filename), index=False)
def identify_nwm_ms_streams(nwm_streams_filename, ahps_filename, nwm_streams_all_filename): # Subset nwm network to ms ahps_headwaters = gpd.read_file(ahps_filename) nwm_streams = gpd.read_file(nwm_streams_filename) # Remove mainstem column if it already exists nwm_streams = nwm_streams.drop(['mainstem'], axis=1, errors='ignore') nwm_streams['is_headwater'] = False nwm_streams.loc[nwm_streams.ID.isin(list(ahps_headwaters.nwm_featur)), 'is_headwater'] = True # Subset NHDPlus HR nwm_streams['is_relevant_stream'] = nwm_streams['is_headwater'].copy() nwm_streams = nwm_streams.explode() # Trace down from headwaters nwm_streams.set_index('ID', inplace=True, drop=False) Q = deque(nwm_streams.loc[nwm_streams['is_headwater'], 'ID'].tolist()) visited = set() while Q: q = Q.popleft() if q in visited: continue visited.add(q) toNode = nwm_streams.loc[q, 'to'] if not toNode == 0: nwm_streams.loc[nwm_streams.ID == toNode, 'is_relevant_stream'] = True if toNode not in visited: Q.append(toNode) nwm_streams_ms = nwm_streams.loc[nwm_streams['is_relevant_stream'], :] ms_segments = nwm_streams_ms.ID.to_list() nwm_streams.reset_index(drop=True, inplace=True) # Add column to FR nwm layer to indicate MS segments nwm_streams['mainstem'] = np.where(nwm_streams.ID.isin(ms_segments), 1, 0) nwm_streams = nwm_streams.drop(['is_relevant_stream', 'is_headwater'], axis=1, errors='ignore') nwm_streams.to_file(nwm_streams_all_filename, driver=getDriver(nwm_streams_all_filename), index=False, layer='nwm_streams') return ms_segments
def reformat_inundation_maps(args): try: lid = args[0] grid_path = args[1] gpkg_dir = args[2] fim_version = args[3] huc = args[4] magnitude = args[5] nws_lid_attributes_filename = args[6] # Convert raster to to shapes with rasterio.open(grid_path) as src: image = src.read(1) mask = image > 0 # Aggregate shapes results = ({'properties': {'extent': 1}, 'geometry': s} for i, (s, v) in enumerate(shapes(image, mask=mask,transform=src.transform))) # Convert list of shapes to polygon extent_poly = gpd.GeoDataFrame.from_features(list(results), crs=PREP_PROJECTION) # Dissolve polygons extent_poly_diss = extent_poly.dissolve(by='extent') # Update attributes extent_poly_diss = extent_poly_diss.reset_index(drop=True) extent_poly_diss['ahps_lid'] = lid extent_poly_diss['magnitude'] = magnitude extent_poly_diss['version'] = fim_version extent_poly_diss['huc'] = huc # Project to Web Mercator extent_poly_diss = extent_poly_diss.to_crs(VIZ_PROJECTION) # Join attributes nws_lid_attributes_table = pd.read_csv(nws_lid_attributes_filename, dtype={'huc':str}) nws_lid_attributes_table = nws_lid_attributes_table.loc[(nws_lid_attributes_table.magnitude==magnitude) & (nws_lid_attributes_table.nws_lid==lid)] extent_poly_diss = extent_poly_diss.merge(nws_lid_attributes_table, left_on=['ahps_lid','magnitude','huc'], right_on=['nws_lid','magnitude','huc']) extent_poly_diss = extent_poly_diss.drop(columns='nws_lid') # Save dissolved multipolygon handle = os.path.split(grid_path)[1].replace('.tif', '') diss_extent_filename = os.path.join(gpkg_dir, handle + "_dissolved.gpkg") extent_poly_diss["geometry"] = [MultiPolygon([feature]) if type(feature) == Polygon else feature for feature in extent_poly_diss["geometry"]] if not extent_poly_diss.empty: extent_poly_diss.to_file(diss_extent_filename,driver=getDriver(diss_extent_filename),index=False) except Exception as e: # Log and clean out the gdb so it's not merged in later try: f = open(log_file, 'a+') f.write(str(diss_extent_filename) + " - dissolve error: " + str(e)) f.close() except: pass
def collect_stream_attributes(nhdplus_vectors_dir, huc): print(f"Starting attribute collection for HUC {huc}", flush=True) # Collecting NHDPlus HR attributes burnline_filename = os.path.join( nhdplus_vectors_dir, huc, 'NHDPlusBurnLineEvent' + str(huc) + '.gpkg') vaa_filename = os.path.join(nhdplus_vectors_dir, huc, 'NHDPlusFlowLineVAA' + str(huc) + '.gpkg') flowline_filename = os.path.join(nhdplus_vectors_dir, huc, 'NHDFlowline' + str(huc) + '.gpkg') if os.path.exists( os.path.join(nhdplus_vectors_dir, huc, 'NHDPlusBurnLineEvent' + str(huc) + '.gpkg')): burnline = gpd.read_file(burnline_filename) burnline = burnline[['NHDPlusID', 'ReachCode', 'geometry']] flowline = gpd.read_file(flowline_filename) flowline = flowline[['NHDPlusID', 'FType', 'FCode']] # flowline = flowline.loc[flowline["FType"].isin([334,420,428,460,558])] flowline = flowline.loc[~flowline["FType"].isin([566, 420])] nhd_streams_vaa = gpd.read_file(vaa_filename) nhd_streams_vaa = nhd_streams_vaa[[ 'FromNode', 'ToNode', 'NHDPlusID', 'StreamOrde', 'DnLevelPat', 'LevelPathI' ]] nhd_streams = burnline.merge(nhd_streams_vaa, on='NHDPlusID', how='inner') nhd_streams = nhd_streams.merge(flowline, on='NHDPlusID', how='inner') del burnline, flowline, nhd_streams_vaa nhd_streams = nhd_streams.to_crs(PREP_PROJECTION) nhd_streams = nhd_streams.loc[ nhd_streams.geometry != None, :] # special case: remove segments without geometries nhd_streams['HUC4'] = str(huc) # special case; breach in network at Tiber Dam if huc == '1003' and nhd_streams.loc[nhd_streams.NHDPlusID == 23001300078682.0, 'DnLevelPat'] == 23001300001574.0: nhd_streams = nhd_streams.loc[ nhd_streams.NHDPlusID != 23001300009084.0] nhd_streams.loc[nhd_streams.NHDPlusID == 23001300078682.0, 'DnLevelPat'] = 23001300001566.0 # Write out NHDPlus HR aggregated nhd_streams_agg_fileName = os.path.join( nhdplus_vectors_dir, huc, 'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') nhd_streams.to_file(nhd_streams_agg_fileName, driver=getDriver(nhd_streams_agg_fileName), index=False) del nhd_streams print(f"finished attribute collection for HUC {huc}", flush=True) else: print(f"missing data for HUC {huc}", flush=True)
def subset_stream_networks(args, huc): nwm_headwaters_filename = args[0] ahps_filename = args[1] wbd4 = args[2] wbd8 = args[3] nhdplus_vectors_dir = args[4] nwm_huc4_intersections_filename = args[5] print(f"starting stream subset for HUC {huc}", flush=True) nwm_headwater_id = 'ID' ahps_headwater_id = 'nws_lid' headwater_pts_id = 'site_id' column_order = ['pt_type', headwater_pts_id, 'mainstem', 'geometry'] nhd_streams_filename = os.path.join( nhdplus_vectors_dir, huc, 'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') # Subset to reduce footprint selected_wbd4 = wbd4.loc[wbd4.HUC4.str.startswith(huc)] del wbd4 selected_wbd8 = wbd8.loc[wbd8.HUC8.str.startswith(huc)] del wbd8 huc_mask = selected_wbd4.loc[selected_wbd4.HUC4.str.startswith(huc)] huc_mask = huc_mask.explode() huc_mask = huc_mask.reset_index(drop=True) if len(selected_wbd8.HUC8) > 0: selected_wbd8 = selected_wbd8.reset_index(drop=True) # Identify FR/NWM headwaters and subset HR network try: nhd_streams_fr = subset_nhd_network( huc, huc_mask, selected_wbd8, nhd_streams_filename, nwm_headwaters_filename, nwm_headwater_id, nwm_huc4_intersections_filename) except: print(f"Error subsetting NHD HR network for HUC {huc}", flush=True) # Identify nhd mainstem streams try: nhd_streams_all = subset_nhd_network( huc, huc_mask, selected_wbd8, nhd_streams_fr, ahps_filename, ahps_headwater_id, nwm_huc4_intersections_filename, True) except: print(f"Error identifing MS network for HUC {huc}", flush=True) # Identify HUC8 intersection points nhd_huc8_intersections = find_nwm_incoming_streams( nhd_streams_all, selected_wbd8, 8) # Load nwm headwaters nwm_headwaters = gpd.read_file(nwm_headwaters_filename, mask=huc_mask) nwm_headwaters['pt_type'] = 'nwm_headwater' nwm_headwaters = nwm_headwaters.rename( columns={"ID": headwater_pts_id}) # Load nws lids nws_lids = gpd.read_file(ahps_filename, mask=huc_mask) nws_lids = nws_lids.drop(columns=[ 'name', 'nwm_feature_id', 'usgs_site_code', 'states', 'HUC8', 'is_headwater', 'is_colocated' ]) nws_lids = nws_lids.rename(columns={"nws_lid": headwater_pts_id}) nws_lids['pt_type'] = 'nws_lid' nws_lids['mainstem'] = True if (len(nwm_headwaters) > 0) or (len(nws_lids) > 0): # Adjust FR/NWM headwater segments adj_nhd_streams_all, adj_nhd_headwater_points = adjust_headwaters( huc, nhd_streams_all, nwm_headwaters, nws_lids, headwater_pts_id) adj_nhd_headwater_points = adj_nhd_headwater_points[column_order] nhd_huc8_intersections['pt_type'] = 'nhd_huc8_intersections' nhd_huc8_intersections = nhd_huc8_intersections.rename( columns={"NHDPlusID": headwater_pts_id}) nhd_huc8_intersections = nhd_huc8_intersections[column_order] adj_nhd_headwater_points_all = adj_nhd_headwater_points.append( nhd_huc8_intersections) adj_nhd_headwater_points_all = adj_nhd_headwater_points_all.reset_index( drop=True) adj_nhd_streams_all_fileName = os.path.join( nhdplus_vectors_dir, huc, 'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') adj_nhd_headwaters_all_fileName = os.path.join( nhdplus_vectors_dir, huc, 'nhd' + str(huc) + '_headwaters_adj.gpkg') # Write out FR adjusted adj_nhd_streams_all.to_file( adj_nhd_streams_all_fileName, driver=getDriver(adj_nhd_streams_all_fileName), index=False) adj_nhd_headwater_points_all.to_file( adj_nhd_headwaters_all_fileName, driver=getDriver(adj_nhd_headwaters_all_fileName), index=False) del adj_nhd_streams_all, adj_nhd_headwater_points_all else: print(f"skipping headwater adjustments for HUC {huc}") del nhd_streams_fr print(f"finished stream subset for HUC {huc}", flush=True)
def add_crosswalk(input_catchments_fileName, input_flows_fileName, input_srcbase_fileName, output_catchments_fileName, output_flows_fileName, output_src_fileName, output_src_json_fileName, output_crosswalk_fileName, output_hydro_table_fileName, input_huc_fileName, input_nwmflows_fileName, input_nwmcatras_fileName, mannings_n, input_nwmcat_fileName, extent, calibration_mode=False): input_catchments = gpd.read_file(input_catchments_fileName) input_flows = gpd.read_file(input_flows_fileName) input_huc = gpd.read_file(input_huc_fileName) input_nwmflows = gpd.read_file(input_nwmflows_fileName) if extent == 'FR': ## crosswalk using majority catchment method # calculate majority catchments majority_calc = zonal_stats(input_catchments, input_nwmcatras_fileName, stats=['majority'], geojson_out=True) input_majorities = gpd.GeoDataFrame.from_features(majority_calc) input_majorities = input_majorities.rename( columns={'majority': 'feature_id'}) input_majorities = input_majorities[:][ input_majorities['feature_id'].notna()] if input_majorities.feature_id.dtype != 'int': input_majorities.feature_id = input_majorities.feature_id.astype( int) if input_majorities.HydroID.dtype != 'int': input_majorities.HydroID = input_majorities.HydroID.astype(int) input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'}) if input_nwmflows.feature_id.dtype != 'int': input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int) relevant_input_nwmflows = input_nwmflows[ input_nwmflows['feature_id'].isin(input_majorities['feature_id'])] relevant_input_nwmflows = relevant_input_nwmflows.filter( items=['feature_id', 'order_']) if calibration_mode == False: if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) output_catchments = input_catchments.merge( input_majorities[['HydroID', 'feature_id']], on='HydroID') output_catchments = output_catchments.merge( relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id') if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) output_flows = input_flows.merge( input_majorities[['HydroID', 'feature_id']], on='HydroID') if output_flows.HydroID.dtype != 'int': output_flows.HydroID = output_flows.HydroID.astype(int) output_flows = output_flows.merge( relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id') elif extent == 'MS': ## crosswalk using stream segment midpoint method input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc) input_nwmcat = input_nwmcat.rename(columns={'ID': 'feature_id'}) if input_nwmcat.feature_id.dtype != 'int': input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int) input_nwmcat = input_nwmcat.set_index('feature_id') input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'}) if input_nwmflows.feature_id.dtype != 'int': input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int) # Get stream midpoint stream_midpoint = [] hydroID = [] for i, lineString in enumerate(input_flows.geometry): hydroID = hydroID + [input_flows.loc[i, 'HydroID']] stream_midpoint = stream_midpoint + [ lineString.interpolate(0.05, normalized=True) ] input_flows_midpoint = gpd.GeoDataFrame( { 'HydroID': hydroID, 'geometry': stream_midpoint }, crs=input_flows.crs, geometry='geometry') input_flows_midpoint = input_flows_midpoint.set_index('HydroID') # Create crosswalk crosswalk = gpd.sjoin(input_flows_midpoint, input_nwmcat, how='left', op='within').reset_index() crosswalk = crosswalk.rename(columns={"index_right": "feature_id"}) crosswalk = crosswalk.filter(items=['HydroID', 'feature_id']) crosswalk = crosswalk.merge(input_nwmflows[['feature_id', 'order_']], on='feature_id') if len(crosswalk) < 1: print("No relevant streams within HUC boundaries.") sys.exit(0) if calibration_mode == False: if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) output_catchments = input_catchments.merge(crosswalk, on='HydroID') if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) output_flows = input_flows.merge(crosswalk, on='HydroID') # read in manning's n values if calibration_mode == False: with open(mannings_n, "r") as read_file: mannings_dict = json.load(read_file) else: mannings_dict = {} for cnt, value in enumerate(mannings_n.split(",")[2:]): streamorder = cnt + 1 mannings_dict[str(streamorder)] = value output_flows['ManningN'] = output_flows['order_'].astype(str).map( mannings_dict) # calculate src_full input_src_base = pd.read_csv(input_srcbase_fileName, dtype=object) if input_src_base.CatchId.dtype != 'int': input_src_base.CatchId = input_src_base.CatchId.astype(int) input_src_base = input_src_base.merge(output_flows[['ManningN', 'HydroID']], left_on='CatchId', right_on='HydroID') input_src_base = input_src_base.rename(columns=lambda x: x.strip(" ")) input_src_base = input_src_base.apply(pd.to_numeric, **{'errors': 'coerce'}) input_src_base['TopWidth (m)'] = input_src_base[ 'SurfaceArea (m2)'] / input_src_base['LENGTHKM'] / 1000 input_src_base['WettedPerimeter (m)'] = input_src_base[ 'BedArea (m2)'] / input_src_base['LENGTHKM'] / 1000 input_src_base['WetArea (m2)'] = input_src_base[ 'Volume (m3)'] / input_src_base['LENGTHKM'] / 1000 input_src_base['HydraulicRadius (m)'] = input_src_base[ 'WetArea (m2)'] / input_src_base['WettedPerimeter (m)'] input_src_base['HydraulicRadius (m)'].fillna(0, inplace=True) input_src_base['Discharge (m3s-1)'] = input_src_base['WetArea (m2)']* \ pow(input_src_base['HydraulicRadius (m)'],2.0/3)* \ pow(input_src_base['SLOPE'],0.5)/input_src_base['ManningN'] # set nans to 0 input_src_base.loc[input_src_base['Stage'] == 0, ['Discharge (m3s-1)']] = 0 output_src = input_src_base.drop(columns=['CatchId']) if output_src.HydroID.dtype != 'int': output_src.HydroID = output_src.HydroID.astype(int) if extent == 'FR': output_src = output_src.merge( input_majorities[['HydroID', 'feature_id']], on='HydroID') elif extent == 'MS': output_src = output_src.merge(crosswalk[['HydroID', 'feature_id']], on='HydroID') output_crosswalk = output_src[['HydroID', 'feature_id']] output_crosswalk = output_crosswalk.drop_duplicates(ignore_index=True) # make hydroTable output_hydro_table = output_src.loc[:, [ 'HydroID', 'feature_id', 'Stage', 'Discharge (m3s-1)' ]] output_hydro_table.rename(columns={ 'Stage': 'stage', 'Discharge (m3s-1)': 'discharge_cms' }, inplace=True) if output_hydro_table.HydroID.dtype != 'str': output_hydro_table.HydroID = output_hydro_table.HydroID.astype(str) output_hydro_table['HydroID'] = output_hydro_table.HydroID.str.zfill(8) output_hydro_table['fossid'] = output_hydro_table.loc[:, 'HydroID'].apply( lambda x: str(x)[0:4]) if input_huc.fossid.dtype != 'str': input_huc.fossid = input_huc.fossid.astype(str) output_hydro_table = output_hydro_table.merge( input_huc.loc[:, ['fossid', 'HUC8']], how='left', on='fossid') if output_flows.HydroID.dtype != 'str': output_flows.HydroID = output_flows.HydroID.astype(str) output_flows['HydroID'] = output_flows.HydroID.str.zfill(8) output_hydro_table = output_hydro_table.merge( output_flows.loc[:, ['HydroID', 'LakeID']], how='left', on='HydroID') output_hydro_table['LakeID'] = output_hydro_table['LakeID'].astype(int) output_hydro_table = output_hydro_table.rename(columns={'HUC8': 'HUC'}) if output_hydro_table.HUC.dtype != 'str': output_hydro_table.HUC = output_hydro_table.HUC.astype(str) output_hydro_table.HUC = output_hydro_table.HUC.str.zfill(8) output_hydro_table.drop(columns='fossid', inplace=True) if output_hydro_table.feature_id.dtype != 'int': output_hydro_table.feature_id = output_hydro_table.feature_id.astype( int) if output_hydro_table.feature_id.dtype != 'str': output_hydro_table.feature_id = output_hydro_table.feature_id.astype( str) # write out based on mode if calibration_mode == True: output_hydro_table.to_csv(output_hydro_table_fileName, index=False) else: # make src json output_src_json = dict() hydroID_list = unique(output_src['HydroID']) for hid in hydroID_list: indices_of_hid = output_src['HydroID'] == hid stage_list = output_src['Stage'][indices_of_hid].astype(float) q_list = output_src['Discharge (m3s-1)'][indices_of_hid].astype( float) stage_list = stage_list.tolist() q_list = q_list.tolist() output_src_json[str(hid)] = { 'q_list': q_list, 'stage_list': stage_list } # write out output_catchments.to_file(output_catchments_fileName, driver=getDriver(output_catchments_fileName), index=False) output_flows.to_file(output_flows_fileName, driver=getDriver(output_flows_fileName), index=False) output_src.to_csv(output_src_fileName, index=False) output_crosswalk.to_csv(output_crosswalk_fileName, index=False) output_hydro_table.to_csv(output_hydro_table_fileName, index=False) with open(output_src_json_fileName, 'w') as f: json.dump(output_src_json, f, sort_keys=True, indent=2)
'--headwaters-filename', help='Headwaters points layer name', required=True, type=str) parser.add_argument('-s', '--subset-nhd-streams-fileName', help='Output streams layer name', required=False, type=str, default=None) parser.add_argument('-i', '--headwater-id', help='Headwater points ID column', required=True) parser.add_argument('-i', '--nwm-intersections-filename', help='NWM HUC4 intersection points', required=True) args = vars(parser.parse_args()) subset_streams_gdf = subset_nhd_network(huc_number, huc4_mask, selected_wbd8, nhd_streams, headwaters_filename, headwater_id) if subset_nhd_streams_fileName is not None: subset_streams_gdf.to_file(args['subset_nhd_streams_fileName'], driver=getDriver( args['subset_nhd_streams_fileName']), index=False)
def pull_and_prepare_wbd(path_to_saved_data_parent_dir, nwm_dir_name, nwm_file_to_use, overwrite_wbd, num_workers): """ This helper function pulls and unzips Watershed Boundary Dataset (WBD) data. It uses the WBD URL defined by WBD_NATIONAL_URL. This function also subsets the WBD layers (HU4, HU6, HU8) to CONUS and converts to geopkacage layers. Args: path_to_saved_data_parent_dir (str): The system path to where the WBD will be downloaded, unzipped, and preprocessed. """ # Construct path to wbd_directory and create if not existent. wbd_directory = os.path.join(path_to_saved_data_parent_dir, 'wbd') if not os.path.exists(wbd_directory): os.mkdir(wbd_directory) wbd_gdb_path = os.path.join(wbd_directory, 'WBD_National_GDB.gdb') pulled_wbd_zipped_path = os.path.join(wbd_directory, 'WBD_National_GDB.zip') multilayer_wbd_geopackage = os.path.join(wbd_directory, 'WBD_National.gpkg') nwm_huc_list_file_template = os.path.join(wbd_directory, 'nwm_wbd{}.csv') nwm_file_to_use = os.path.join(path_to_saved_data_parent_dir, nwm_dir_name, nwm_file_to_use) if not os.path.isfile(nwm_file_to_use): raise IOError( "NWM File to Subset Too Not Available: {}".format(nwm_file_to_use)) if not os.path.exists(multilayer_wbd_geopackage) or overwrite_wbd: # Download WBD and unzip if it's not already done. if not os.path.exists(wbd_gdb_path): if not os.path.exists(pulled_wbd_zipped_path): pull_file(WBD_NATIONAL_URL, pulled_wbd_zipped_path) os.system( "7za x {pulled_wbd_zipped_path} -o{wbd_directory}".format( pulled_wbd_zipped_path=pulled_wbd_zipped_path, wbd_directory=wbd_directory)) procs_list, wbd_gpkg_list = [], [] multilayer_wbd_geopackage = os.path.join(wbd_directory, 'WBD_National.gpkg') # Add fimid to HU8, project, and convert to geopackage. if os.path.isfile(multilayer_wbd_geopackage): os.remove(multilayer_wbd_geopackage) print("Making National WBD GPKG...") print("\tWBDHU8") wbd_hu8 = gpd.read_file(wbd_gdb_path, layer='WBDHU8') wbd_hu8 = wbd_hu8.rename(columns={'huc8': 'HUC8'}) # rename column to caps wbd_hu8 = wbd_hu8.sort_values('HUC8') fimids = [ str(item).zfill(4) for item in list(range(1000, 1000 + len(wbd_hu8))) ] wbd_hu8[FIM_ID] = fimids wbd_hu8 = wbd_hu8.to_crs(PREP_PROJECTION) # Project. wbd_hu8 = subset_wbd_to_nwm_domain(wbd_hu8, nwm_file_to_use) wbd_hu8.geometry = wbd_hu8.buffer(0) wbd_hu8.to_file(multilayer_wbd_geopackage, layer='WBDHU8', driver=getDriver(multilayer_wbd_geopackage), index=False) # Save. wbd_hu8.HUC8.to_csv(nwm_huc_list_file_template.format('8'), index=False, header=False) #wbd_gpkg_list.append(os.path.join(wbd_directory, 'WBDHU8.gpkg')) # Append to wbd_gpkg_list for subsetting later. del wbd_hu8 # Prepare procs_list for multiprocessed geopackaging. for wbd_layer_num in ['4', '6']: wbd_layer = 'WBDHU' + wbd_layer_num print("\t{}".format(wbd_layer)) wbd = gpd.read_file(wbd_gdb_path, layer=wbd_layer) wbd = wbd.to_crs(PREP_PROJECTION) wbd = wbd.rename( columns={'huc' + wbd_layer_num: 'HUC' + wbd_layer_num}) wbd = subset_wbd_to_nwm_domain(wbd, nwm_file_to_use) wbd.geometry = wbd.buffer(0) wbd.to_file(multilayer_wbd_geopackage, layer=wbd_layer, driver=getDriver(multilayer_wbd_geopackage), index=False) wbd['HUC{}'.format(wbd_layer_num)].to_csv( nwm_huc_list_file_template.format(wbd_layer_num), index=False, header=False) #output_gpkg = os.path.join(wbd_directory, wbd_layer + '.gpkg') #wbd_gpkg_list.append(output_gpkg) #procs_list.append(['ogr2ogr -overwrite -progress -f GPKG -t_srs "{projection}" {output_gpkg} {wbd_gdb_path} {wbd_layer}'.format(output_gpkg=output_gpkg, wbd_gdb_path=wbd_gdb_path, wbd_layer=wbd_layer, projection=PREP_PROJECTION)]) # with Pool(processes=num_workers) as pool: # pool.map(run_system_command, procs_list) # Subset WBD layers to CONUS and add to single geopackage. #print("Subsetting WBD layers to CONUS...") #multilayer_wbd_geopackage = os.path.join(wbd_directory, 'WBD_National.gpkg') #for gpkg in wbd_gpkg_list: # subset_wbd_gpkg(gpkg, multilayer_wbd_geopackage) # Clean up temporary files. #for temp_layer in ['WBDHU4', 'WBDHU6', 'WBDHU8']: # delete_file(os.path.join(wbd_directory, temp_layer + '.gpkg')) #pulled_wbd_zipped_path = os.path.join(wbd_directory, 'WBD_National_GDB.zip') #delete_file(pulled_wbd_zipped_path) #delete_file(os.path.join(wbd_directory, 'WBD_National_GDB.jpg')) return (wbd_directory)
def split_flows(max_length, slope_min, lakes_buffer_input, flows_filename, dem_filename, split_flows_filename, split_points_filename, wbd8_clp_filename, lakes_filename): wbd = gpd.read_file(wbd8_clp_filename) toMetersConversion = 1e-3 print('Loading data ...') flows = gpd.read_file(flows_filename) if not len(flows) > 0: print ("No relevant streams within HUC boundaries.") sys.exit(0) wbd8 = gpd.read_file(wbd8_clp_filename) dem = rasterio.open(dem_filename,'r') if isfile(lakes_filename): lakes = gpd.read_file(lakes_filename) else: lakes = None wbd8 = wbd8.filter(items=[FIM_ID, 'geometry']) wbd8 = wbd8.set_index(FIM_ID) flows = flows.explode() # temp flows = flows.to_crs(wbd8.crs) split_flows = [] slopes = [] hydro_id = 'HydroID' # split at HUC8 boundaries print ('splitting stream segments at HUC8 boundaries') flows = gpd.overlay(flows, wbd8, how='union').explode().reset_index(drop=True) # check for lake features if lakes is not None: if len(lakes) > 0: print ('splitting stream segments at ' + str(len(lakes)) + ' waterbodies') #create splits at lake boundaries lakes = lakes.filter(items=['newID', 'geometry']) lakes = lakes.set_index('newID') flows = gpd.overlay(flows, lakes, how='union').explode().reset_index(drop=True) lakes_buffer = lakes.copy() lakes_buffer['geometry'] = lakes.buffer(lakes_buffer_input) # adding X meter buffer for spatial join comparison (currently using 20meters) print ('splitting ' + str(len(flows)) + ' stream segments based on ' + str(max_length) + ' m max length') # remove empty geometries flows = flows.loc[~flows.is_empty,:] for i,lineString in tqdm(enumerate(flows.geometry),total=len(flows.geometry)): # Reverse geometry order (necessary for BurnLines) lineString = LineString(lineString.coords[::-1]) # skip lines of zero length if lineString.length == 0: continue # existing reaches of less than max_length if lineString.length < max_length: split_flows = split_flows + [lineString] line_points = [point for point in zip(*lineString.coords.xy)] # Calculate channel slope start_point = line_points[0]; end_point = line_points[-1] start_elev,end_elev = [i[0] for i in rasterio.sample.sample_gen(dem,[start_point,end_point])] slope = float(abs(start_elev - end_elev) / lineString.length) if slope < slope_min: slope = slope_min slopes = slopes + [slope] continue splitLength = lineString.length / np.ceil(lineString.length / max_length) cumulative_line = [] line_points = [] last_point = [] last_point_in_entire_lineString = list(zip(*lineString.coords.xy))[-1] for point in zip(*lineString.coords.xy): cumulative_line = cumulative_line + [point] line_points = line_points + [point] numberOfPoints_in_cumulative_line = len(cumulative_line) if last_point: cumulative_line = [last_point] + cumulative_line numberOfPoints_in_cumulative_line = len(cumulative_line) elif numberOfPoints_in_cumulative_line == 1: continue cumulative_length = LineString(cumulative_line).length if cumulative_length >= splitLength: splitLineString = LineString(cumulative_line) split_flows = split_flows + [splitLineString] # Calculate channel slope start_point = cumulative_line[0]; end_point = cumulative_line[-1] start_elev,end_elev = [i[0] for i in rasterio.sample.sample_gen(dem,[start_point,end_point])] slope = float(abs(start_elev - end_elev) / splitLineString.length) if slope < slope_min: slope = slope_min slopes = slopes + [slope] last_point = end_point if (last_point == last_point_in_entire_lineString): continue cumulative_line = [] line_points = [] splitLineString = LineString(cumulative_line) split_flows = split_flows + [splitLineString] # Calculate channel slope start_point = cumulative_line[0]; end_point = cumulative_line[-1] start_elev,end_elev = [i[0] for i in rasterio.sample.sample_gen(dem,[start_point,end_point])] slope = float(abs(start_elev - end_elev) / splitLineString.length) if slope < slope_min: slope = slope_min slopes = slopes + [slope] split_flows_gdf = gpd.GeoDataFrame({'S0' : slopes ,'geometry':split_flows}, crs=flows.crs, geometry='geometry') split_flows_gdf['LengthKm'] = split_flows_gdf.geometry.length * toMetersConversion if lakes is not None: split_flows_gdf = gpd.sjoin(split_flows_gdf, lakes_buffer, how='left', op='within') #options: intersects, within, contains, crosses split_flows_gdf = split_flows_gdf.rename(columns={"index_right": "LakeID"}).fillna(-999) else: split_flows_gdf['LakeID'] = -999 # need to figure out why so many duplicate stream segments for 04010101 FR split_flows_gdf = split_flows_gdf.drop_duplicates() # Create Ids and Network Traversal Columns addattributes = build_stream_traversal.build_stream_traversal_columns() tResults=None tResults = addattributes.execute(split_flows_gdf, wbd8, hydro_id) if tResults[0] == 'OK': split_flows_gdf = tResults[1] else: print ('Error: Could not add network attributes to stream segments') # remove single node segments split_flows_gdf = split_flows_gdf.query("From_Node != To_Node") # Get all vertices split_points = OrderedDict() for index, segment in split_flows_gdf.iterrows(): lineString = segment.geometry for point in zip(*lineString.coords.xy): if point in split_points: if segment.NextDownID == split_points[point]: pass else: split_points[point] = segment[hydro_id] else: split_points[point] = segment[hydro_id] hydroIDs_points = [hidp for hidp in split_points.values()] split_points = [Point(*point) for point in split_points] split_points_gdf = gpd.GeoDataFrame({'id': hydroIDs_points , 'geometry':split_points}, crs=flows.crs, geometry='geometry') print('Writing outputs ...') if isfile(split_flows_filename): remove(split_flows_filename) split_flows_gdf.to_file(split_flows_filename,driver=getDriver(split_flows_filename),index=False) if isfile(split_points_filename): remove(split_points_filename) split_points_gdf.to_file(split_points_filename,driver=getDriver(split_points_filename),index=False)
def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir, nws_lid_attributes_filename, log_file): # Create workspace gpkg_dir = os.path.join(output_cat_fim_dir, 'gpkg') if not os.path.exists(gpkg_dir): os.mkdir(gpkg_dir) # Find the FIM version fim_version = os.path.basename(output_cat_fim_dir) merged_layer = os.path.join(output_cat_fim_dir, 'catfim_library.shp') if not os.path.exists(merged_layer): # prevents appending to existing output huc_ahps_dir_list = os.listdir(output_cat_fim_dir) skip_list=['errors','logs','gpkg',merged_layer] for magnitude in magnitude_list: procs_list = [] # Loop through all categories for huc in huc_ahps_dir_list: if huc not in skip_list: huc_dir = os.path.join(output_cat_fim_dir, huc) ahps_dir_list = os.listdir(huc_dir) # Loop through ahps sites for ahps_lid in ahps_dir_list: ahps_lid_dir = os.path.join(huc_dir, ahps_lid) extent_grid = os.path.join(ahps_lid_dir, ahps_lid + '_' + magnitude + '_extent_' + huc + '.tif') if os.path.exists(extent_grid): procs_list.append([ahps_lid, extent_grid, gpkg_dir, fim_version, huc, magnitude, nws_lid_attributes_filename]) else: try: f = open(log_file, 'a+') f.write(f"Missing layers: {extent_gpkg}\n") f.close() except: pass # Multiprocess with instructions with Pool(processes=number_of_jobs) as pool: pool.map(reformat_inundation_maps, procs_list) # Merge all layers print(f"Merging {len(os.listdir(gpkg_dir))} layers...") for layer in os.listdir(gpkg_dir): diss_extent_filename = os.path.join(gpkg_dir, layer) # Open diss_extent diss_extent = gpd.read_file(diss_extent_filename) diss_extent['viz'] = 'yes' # Write/append aggregate diss_extent if os.path.isfile(merged_layer): diss_extent.to_file(merged_layer,driver=getDriver(merged_layer),index=False, mode='a') else: diss_extent.to_file(merged_layer,driver=getDriver(merged_layer),index=False) del diss_extent shutil.rmtree(gpkg_dir) else: print(f"{merged_layer} already exists.")
#line_points = np.append(line_points,g_points) for i,sp in enumerate(starting_points): #print(sp) if sp not in end_points: headwater_points += [sp] #print(headwater_points) headwater_points_geometries = [Point(*hwp) for hwp in headwater_points] hw_gdf = gpd.GeoDataFrame({'geometry' : headwater_points_geometries},crs=flows.crs,geometry='geometry') return(hw_gdf) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Derive headwater points from flowlines. Linestrings must flow downstream') parser.add_argument('-f','--input-flows',help='Input flowlines. Linestrings must flow downstream',required=True,type=str) parser.add_argument('-l','--input-flows-layer',help='Input layer name',required=False,type=str,default=None) parser.add_argument('-o','--output-headwaters',help='Output headwaters points',required=False,type=str,default=None) args = vars(parser.parse_args()) flows = gpd.read_file(args['input_flows'],layer=args['input_flows_layer']) hw_gdf = findHeadWaterPoints(flows) if output_headwaters is not None: hw_gdf.to_file(args['output_headwaters'],driver=getDriver(args['output_headwaters']))
parser.add_argument('-b', '--nws-lids', help='NWS lid points', required=True) parser.add_argument('-i', '--headwater-id', help='Headwater id column name', required=True) args = vars(parser.parse_args()) #TODO variables below are not defined adj_streams_gdf, adj_headwaters_gdf = adjust_headwaters( huc, nhd_streams, nwm_headwaters, nws_lids, headwater_id) if subset_nhd_streams_fileName is not None: adj_streams_gdf.to_file(args['subset_nhd_streams_fileName'], driver=getDriver( args['subset_nhd_streams_fileName'])) if headwater_points_fileName is not None: headwater_points_fileName.to_file( args['headwater_points_fileName'], driver=getDriver(args['headwater_points_fileName'])) if adj_headwater_points_fileName is not None: adj_headwaters_gdf.to_file(args['adj_headwater_points_fileName'], driver=getDriver( args['adj_headwater_points_fileName']))
for y_index,x_index in tqdm(zip(*indices),total=len(indices[0])): x = x_index * x_size + upper_left_x + (x_size / 2) #add half the cell size y = y_index * y_size + upper_left_y + (y_size / 2) #to centre the point # get raster value #reachID = a[y_index,x_index] #point = osgeo.ogr.Geometry(osgeo.ogr.wkbPoint) #point.SetPoint(0, x, y) points[i-1] = Point(x,y) #feature = osgeo.ogr.Feature(layerDefinition) #feature.SetGeometry(point) #feature.SetFID(i) if writeOption == 'reachID': reachID = a[y_index,x_index] id[i-1] = reachID #feature.SetField("id",reachID) elif (writeOption == 'featureID') |( writeOption == 'pixelID'): #feature.SetField("id",i) id[i-1] = i #layer.CreateFeature(feature) i += 1 pointGDF = gpd.GeoDataFrame({'id' : id, 'geometry' : points},crs=boolean.proj,geometry='geometry') pointGDF.to_file(outputFileName,driver=getDriver(outputFileName),index=False) print("Complete") #shapeData.Destroy()
def compare_thalweg(args): huc_dir = args[0] stream_type = args[1] point_density = args[2] huc = args[3] dem_meters_filename = args[4] dem_lateral_thalweg_adj_filename = args[5] dem_thalwegCond_filename = args[6] profile_plots_filename = args[7] profile_gpkg_filename = args[8] profile_table_filename = args[9] flows_grid_boolean_filename = args[10] if stream_type == 'derived': dem_derived_reaches_filename = os.path.join( huc_dir, 'demDerived_reaches_split.gpkg') streams = gpd.read_file(dem_derived_reaches_filename) nhd_headwater_filename = os.path.join( huc_dir, 'nhd_headwater_points_subset.gpkg') wbd_filename = os.path.join(huc_dir, 'wbd.gpkg') wbd = gpd.read_file(wbd_filename) headwaters_layer = gpd.read_file(nhd_headwater_filename, mask=wbd) headwater_list = headwaters_layer.loc[headwaters_layer.pt_type == 'nws_lid'] stream_id = 'HydroID' elif stream_type == 'burnline': nhd_reaches_filename = os.path.join( huc_dir, 'NHDPlusBurnLineEvent_subset.gpkg') nhd_reaches = gpd.read_file(nhd_reaches_filename) streams = nhd_reaches.copy() headwaters_layer = None # Get lists of all complete reaches using headwater attributes headwater_list = streams.loc[streams.nws_lid != ''].nws_lid stream_id = 'NHDPlusID' headwater_col = 'is_headwater' streams[headwater_col] = False headwater_list = headwater_list.reset_index(drop=True) if stream_type == 'derived': streams['nws_lid'] = '' if streams.NextDownID.dtype != 'int': streams.NextDownID = streams.NextDownID.astype(int) min_dist = np.empty(len(headwater_list)) streams['min_dist'] = 1000 for i, point in headwater_list.iterrows(): streams['min_dist'] = [ point.geometry.distance(line) for line in streams.geometry ] streams.loc[streams.min_dist == np.min(streams.min_dist), 'nws_lid'] = point.site_id headwater_list = headwater_list.site_id streams.set_index(stream_id, inplace=True, drop=False) # Collect headwater streams single_stream_paths = [] dem_meters = rasterio.open(dem_meters_filename, 'r') index_option = 'reachID' for index, headwater_site in enumerate(headwater_list): stream_path = get_downstream_segments(streams.copy(), 'nws_lid', headwater_site, 'downstream', stream_id, stream_type) stream_path = stream_path.reset_index(drop=True) stream_path = stream_path.sort_values(by=['downstream_count']) stream_path = stream_path.loc[stream_path.downstream == True] if stream_type == 'burnline': geom_value = [] for index, segment in stream_path.iterrows(): lineString = LineString(segment.geometry.coords[::-1]) geom_value = geom_value + [ (lineString, segment.downstream_count) ] nhd_reaches_raster = features.rasterize( shapes=geom_value, out_shape=[dem_meters.height, dem_meters.width], fill=dem_meters.nodata, transform=dem_meters.transform, all_touched=True, dtype=np.float32) flow_bool = rasterio.open(flows_grid_boolean_filename) flow_bool_data = flow_bool.read(1) nhd_reaches_raster = np.where(flow_bool_data == int(0), -9999.0, (nhd_reaches_raster).astype( rasterio.float32)) out_dem_filename = os.path.join(huc_dir, 'NHDPlusBurnLineEvent_raster.tif') with rasterio.open(out_dem_filename, "w", **dem_meters.profile, BIGTIFF='YES') as dest: dest.write(nhd_reaches_raster, indexes=1) stream_path = convert_grid_cells_to_points(out_dem_filename, index_option) stream_path["headwater_path"] = headwater_site single_stream_paths = single_stream_paths + [stream_path] print(f"length of {headwater_site} path: {len(stream_path)}") # Collect elevation values from multiple grids along each individual reach point dem_lateral_thalweg_adj = rasterio.open(dem_lateral_thalweg_adj_filename, 'r') dem_thalwegCond = rasterio.open(dem_thalwegCond_filename, 'r') thalweg_points = gpd.GeoDataFrame() for path in single_stream_paths: split_points = [] stream_ids = [] dem_m_elev = [] dem_burned_filled_elev = [] dem_lat_thal_adj_elev = [] dem_thal_adj_elev = [] headwater_path = [] index_count = [] for index, segment in path.iterrows(): if stream_type == 'derived': linestring = segment.geometry if point_density == 'midpoints': midpoint = linestring.interpolate(0.5, normalized=True) stream_ids = stream_ids + [segment[stream_id]] split_points = split_points + [midpoint] index_count = index_count + [segment.downstream_count] dem_m_elev = dem_m_elev + [ np.array( list( dem_meters.sample((Point(midpoint).coords), indexes=1))).item() ] dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [ np.array( list( dem_lateral_thalweg_adj.sample( (Point(midpoint).coords), indexes=1))).item() ] dem_thal_adj_elev = dem_thal_adj_elev + [ np.array( list( dem_thalwegCond.sample( (Point(midpoint).coords), indexes=1))).item() ] headwater_path = headwater_path + [segment.headwater_path] elif point_density == 'all_points': count = 0 for point in zip(*linestring.coords.xy): stream_ids = stream_ids + [segment[stream_id]] split_points = split_points + [Point(point)] count = count + 1 index_count = index_count + [ segment.downstream_count * 1000 + count ] dem_m_elev = dem_m_elev + [ np.array( list( dem_meters.sample((Point(point).coords), indexes=1))).item() ] dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [ np.array( list( dem_lateral_thalweg_adj.sample( (Point(point).coords), indexes=1))).item() ] dem_thal_adj_elev = dem_thal_adj_elev + [ np.array( list( dem_thalwegCond.sample( (Point(point).coords), indexes=1))).item() ] headwater_path = headwater_path + [ segment.headwater_path ] elif stream_type == 'burnline': stream_ids = stream_ids + [segment['id']] split_points = split_points + [Point(segment.geometry)] index_count = index_count + [segment['id']] dem_m_elev = dem_m_elev + [ np.array( list( dem_meters.sample((Point(segment.geometry).coords), indexes=1))).item() ] dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [ np.array( list( dem_lateral_thalweg_adj.sample( (Point(segment.geometry).coords), indexes=1))).item() ] dem_thal_adj_elev = dem_thal_adj_elev + [ np.array( list( dem_thalwegCond.sample( (Point(segment.geometry).coords), indexes=1))).item() ] headwater_path = headwater_path + [segment.headwater_path] # gpd.GeoDataFrame({**data, "source": "dem_m"}) dem_m_pts = gpd.GeoDataFrame( { 'stream_id': stream_ids, 'source': 'dem_m', 'elevation_m': dem_m_elev, 'headwater_path': headwater_path, 'index_count': index_count, 'geometry': split_points }, crs=path.crs, geometry='geometry') dem_lat_thal_adj_pts = gpd.GeoDataFrame( { 'stream_id': stream_ids, 'source': 'dem_lat_thal_adj', 'elevation_m': dem_lat_thal_adj_elev, 'headwater_path': headwater_path, 'index_count': index_count, 'geometry': split_points }, crs=path.crs, geometry='geometry') dem_thal_adj_pts = gpd.GeoDataFrame( { 'stream_id': stream_ids, 'source': 'thal_adj_dem', 'elevation_m': dem_thal_adj_elev, 'headwater_path': headwater_path, 'index_count': index_count, 'geometry': split_points }, crs=path.crs, geometry='geometry') for raster in [dem_m_pts, dem_lat_thal_adj_pts, dem_thal_adj_pts]: raster = raster.sort_values(by=['index_count']) raster.set_index('index_count', inplace=True, drop=True) raster = raster.reset_index(drop=True) raster.index.names = ['index_count'] raster = raster.reset_index(drop=False) thalweg_points = thalweg_points.append(raster, ignore_index=True) del raster del dem_m_pts, dem_lat_thal_adj_pts, dem_thal_adj_pts del dem_lateral_thalweg_adj, dem_thalwegCond, dem_meters try: # Remove nodata_pts and convert elevation to ft thalweg_points = thalweg_points.loc[thalweg_points.elevation_m > 0.0] thalweg_points.elevation_m = np.round(thalweg_points.elevation_m, 3) thalweg_points['elevation_ft'] = np.round( thalweg_points.elevation_m * 3.28084, 3) # Plot thalweg profile plot_profile(thalweg_points, profile_plots_filename) # Filter final thalweg ajdusted layer thal_adj_points = thalweg_points.loc[thalweg_points.source == 'thal_adj_dem'].copy() # thal_adj_points.to_file(profile_gpkg_filename,driver=getDriver(profile_gpkg_filename)) # Identify significant rises/drops in elevation thal_adj_points['elev_change'] = thal_adj_points.groupby( ['headwater_path', 'source'])['elevation_m'].apply(lambda x: x - x.shift()) elev_changes = thal_adj_points.loc[( thal_adj_points.elev_change <= -lateral_elevation_threshold) | (thal_adj_points.elev_change > 0.0)] if not elev_changes.empty: # elev_changes.to_csv(profile_table_filename,index=False) elev_changes.to_file(profile_gpkg_filename, index=False, driver=getDriver(profile_gpkg_filename)) # Zoom in to plot only areas with steep elevation changes # select_streams = elev_changes.stream_id.to_list() # downstream_segments = [index + 1 for index in select_streams] # upstream_segments = [index - 1 for index in select_streams] # select_streams = list(set(upstream_segments + downstream_segments + select_streams)) # thal_adj_points_select = thal_adj_points.loc[thal_adj_points.stream_id.isin(select_streams)] # plot_profile(thal_adj_points_select, profile_plots_filename_zoom) except: print(f"huc {huc} has {len(thalweg_points)} thalweg points")
spatial_list = os.listdir(spatial_dir) agg_thalweg_elevations_gpkg_fileName = os.path.join( output_dir, f"agg_thalweg_elevation_changes_{point_density}_{stream_type}.gpkg") agg_thalweg_elevation_table_fileName = os.path.join( output_dir, f"agg_thalweg_elevation_changes_{point_density}_{stream_type}.csv") for layer in spatial_list: huc_gpd = gpd.read_file(os.path.join(spatial_dir, layer)) # Write aggregate layer if os.path.isfile(agg_thalweg_elevations_gpkg_fileName): huc_gpd.to_file( agg_thalweg_elevations_gpkg_fileName, driver=getDriver(agg_thalweg_elevations_gpkg_fileName), index=False, mode='a') else: huc_gpd.to_file( agg_thalweg_elevations_gpkg_fileName, driver=getDriver(agg_thalweg_elevations_gpkg_fileName), index=False) del huc_gpd # Create csv of elevation table huc_table = gpd.read_file(agg_thalweg_elevations_gpkg_fileName) huc_table.to_csv(agg_thalweg_elevation_table_fileName, index=False) # Close log file
def subset_vector_layers(hucCode, nwm_streams_filename, nhd_streams_filename, nwm_lakes_filename, nld_lines_filename, nwm_catchments_filename, nhd_headwaters_filename, landsea_filename, wbd_filename, wbd_buffer_filename, subset_nhd_streams_filename, subset_nld_lines_filename, subset_nwm_lakes_filename, subset_nwm_catchments_filename, subset_nhd_headwaters_filename, subset_nwm_streams_filename, subset_landsea_filename, dissolveLinks=False): hucUnitLength = len(str(hucCode)) # Get wbd buffer wbd = gpd.read_file(wbd_filename) wbd_buffer = gpd.read_file(wbd_buffer_filename) projection = wbd_buffer.crs # Clip ocean water polygon for future masking ocean areas (where applicable) landsea = gpd.read_file(landsea_filename, mask=wbd_buffer) if not landsea.empty: landsea.to_file(subset_landsea_filename, driver=getDriver(subset_landsea_filename), index=False) del landsea # find intersecting lakes and writeout print("Subsetting NWM Lakes for HUC{} {}".format(hucUnitLength, hucCode), flush=True) nwm_lakes = gpd.read_file(nwm_lakes_filename, mask=wbd_buffer) if not nwm_lakes.empty: # perform fill process to remove holes/islands in the NWM lake polygons nwm_lakes = nwm_lakes.explode() nwm_lakes_fill_holes = MultiPolygon( Polygon(p.exterior) for p in nwm_lakes['geometry']) # remove donut hole geometries # loop through the filled polygons and insert the new geometry for i in range(len(nwm_lakes_fill_holes)): nwm_lakes.loc[i, 'geometry'] = nwm_lakes_fill_holes[i] nwm_lakes.to_file(subset_nwm_lakes_filename, driver=getDriver(subset_nwm_lakes_filename), index=False) del nwm_lakes # find intersecting levee lines print("Subsetting NLD levee lines for HUC{} {}".format( hucUnitLength, hucCode), flush=True) nld_lines = gpd.read_file(nld_lines_filename, mask=wbd_buffer) if not nld_lines.empty: nld_lines.to_file(subset_nld_lines_filename, driver=getDriver(subset_nld_lines_filename), index=False) del nld_lines # find intersecting nwm_catchments print("Subsetting NWM Catchments for HUC{} {}".format( hucUnitLength, hucCode), flush=True) nwm_catchments = gpd.read_file(nwm_catchments_filename, mask=wbd_buffer) nwm_catchments.to_file(subset_nwm_catchments_filename, driver=getDriver(subset_nwm_catchments_filename), index=False) del nwm_catchments # subset nhd headwaters print("Subsetting NHD Headwater Points for HUC{} {}".format( hucUnitLength, hucCode), flush=True) nhd_headwaters = gpd.read_file(nhd_headwaters_filename, mask=wbd_buffer) # subset nhd streams print("Querying NHD Streams for HUC{} {}".format(hucUnitLength, hucCode), flush=True) nhd_streams = gpd.read_file(nhd_streams_filename, mask=wbd_buffer) ## identify local headwater stream segments nhd_streams_subset = gpd.read_file(nhd_streams_filename, mask=wbd) nhd_streams_subset = nhd_streams_subset.loc[ ~nhd_streams_subset.FromNode.isin( list( set(nhd_streams_subset.ToNode) & set(nhd_streams_subset.FromNode)))] nhd_streams_subset = nhd_streams_subset[ ~nhd_streams_subset['is_headwater']] if not nhd_streams_subset.empty: nhd_streams_subset = nhd_streams_subset.reset_index(drop=True) start_coords = [] NHDPlusIDs = [] for index, linestring in enumerate(nhd_streams_subset.geometry): start_coords = start_coords + [linestring.coords[-1]] NHDPlusIDs = NHDPlusIDs + [ nhd_streams_subset.iloc[index].NHDPlusID ] start_geoms = [Point(point) for point in start_coords] local_headwaters = gpd.GeoDataFrame( { 'NHDPlusID': NHDPlusIDs, 'geometry': start_geoms }, crs=projection, geometry='geometry') nhd_headwaters = nhd_headwaters.append(local_headwaters) # nhd_streams = nhd_streams.loc[~nhd_streams.NHDPlusID.isin(NHDPlusIDs)] if len(nhd_streams) > 0: nhd_streams.to_file(subset_nhd_streams_filename, driver=getDriver(subset_nhd_streams_filename), index=False) else: print("No NHD streams within HUC " + str(hucCode) + " boundaries.") sys.exit(0) if len(nhd_headwaters) > 0: nhd_headwaters.to_file( subset_nhd_headwaters_filename, driver=getDriver(subset_nhd_headwaters_filename), index=False) del nhd_headwaters, nhd_streams else: print("No headwater point(s) within HUC " + str(hucCode) + " boundaries.") sys.exit(0) # subset nwm streams print("Subsetting NWM Streams and deriving headwaters for HUC{} {}".format( hucUnitLength, hucCode), flush=True) nwm_streams = gpd.read_file(nwm_streams_filename, mask=wbd_buffer) nwm_streams.to_file(subset_nwm_streams_filename, driver=getDriver(subset_nwm_streams_filename), index=False) del nwm_streams
) parser.add_argument( '-f', '--input-flows', help='Input flowlines. Linestrings must flow downstream', required=True, type=str) parser.add_argument('-l', '--input-flows-layer', help='Input layer name', required=False, type=str, default=None) parser.add_argument('-o', '--output-headwaters', help='Output headwaters points', required=False, type=str, default=None) args = vars(parser.parse_args()) flows = gpd.read_file(args['input_flows'], layer=args['input_flows_layer']) hw_gdf = findHeadWaterPoints(flows) #TODO check output_headwaters variable, not defined if output_headwaters is not None: hw_gdf.to_file(args['output_headwaters'], driver=getDriver(args['output_headwaters']))
help='flag for mainstems network', required=False, default=False) args = vars(parser.parse_args()) huc_number = args['huc_number'] huc4_mask = args['huc4_mask'] selected_wbd8 = args['selected_wbd8'] nhd_streams = args['nhd_streams'] headwaters_filename = args['headwaters_filename'] subset_nhd_streams_fileName = args['subset_nhd_streams_fileName'] headwater_id = args['headwater_id'] nwm_intersections_filename = args['nwm_intersections_filename'] mainstem_flag = args['mainstem_flag'] subset_streams_gdf = subset_nhd_network(huc_number, huc4_mask, selected_wbd8, nhd_streams, headwaters_filename, headwater_id, nwm_intersections_filename, mainstem_flag=False) if subset_nhd_streams_fileName is not None: subset_streams_gdf.to_file( subset_nhd_streams_fileName, driver=getDriver(subset_nhd_streams_fileName), index=False)
def add_crosswalk(input_catchments_fileName, input_flows_fileName, input_srcbase_fileName, input_bathy_fileName, output_bathy_fileName, output_bathy_streamorder_fileName, output_bathy_thalweg_fileName, output_bathy_xs_lookup_fileName, output_catchments_fileName, output_flows_fileName, output_src_fileName, output_src_json_fileName, output_crosswalk_fileName, output_hydro_table_fileName, input_huc_fileName, input_nwmflows_fileName, input_nwmcatras_fileName, mannings_n, input_nwmcat_fileName, extent, small_segments_filename, calibration_mode=False): input_catchments = gpd.read_file(input_catchments_fileName) input_flows = gpd.read_file(input_flows_fileName) input_huc = gpd.read_file(input_huc_fileName) input_nwmflows = gpd.read_file(input_nwmflows_fileName) min_catchment_area = float(os.environ['min_catchment_area']) #0.25# min_stream_length = float(os.environ['min_stream_length']) #0.5# bathy_src_calc = os.environ[ 'bathy_src_modification'] == "True" # env variable to toggle on/off the bathy calc and src modifications if extent == 'FR': ## crosswalk using majority catchment method # calculate majority catchments majority_calc = zonal_stats(input_catchments, input_nwmcatras_fileName, stats=['majority'], geojson_out=True) input_majorities = gpd.GeoDataFrame.from_features(majority_calc) input_majorities = input_majorities.rename( columns={'majority': 'feature_id'}) input_majorities = input_majorities[:][ input_majorities['feature_id'].notna()] if input_majorities.feature_id.dtype != 'int': input_majorities.feature_id = input_majorities.feature_id.astype( int) if input_majorities.HydroID.dtype != 'int': input_majorities.HydroID = input_majorities.HydroID.astype(int) input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'}) if input_nwmflows.feature_id.dtype != 'int': input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int) relevant_input_nwmflows = input_nwmflows[ input_nwmflows['feature_id'].isin(input_majorities['feature_id'])] relevant_input_nwmflows = relevant_input_nwmflows.filter( items=['feature_id', 'order_']) if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) output_catchments = input_catchments.merge( input_majorities[['HydroID', 'feature_id']], on='HydroID') output_catchments = output_catchments.merge( relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id') if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) output_flows = input_flows.merge( input_majorities[['HydroID', 'feature_id']], on='HydroID') if output_flows.HydroID.dtype != 'int': output_flows.HydroID = output_flows.HydroID.astype(int) output_flows = output_flows.merge( relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id') output_flows = output_flows.merge( output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID') elif extent == 'MS': ## crosswalk using stream segment midpoint method input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc) input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem == 1] input_nwmcat = input_nwmcat.rename(columns={'ID': 'feature_id'}) if input_nwmcat.feature_id.dtype != 'int': input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int) input_nwmcat = input_nwmcat.set_index('feature_id') input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'}) if input_nwmflows.feature_id.dtype != 'int': input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int) # Get stream midpoint stream_midpoint = [] hydroID = [] for i, lineString in enumerate(input_flows.geometry): hydroID = hydroID + [input_flows.loc[i, 'HydroID']] stream_midpoint = stream_midpoint + [ lineString.interpolate(0.5, normalized=True) ] input_flows_midpoint = gpd.GeoDataFrame( { 'HydroID': hydroID, 'geometry': stream_midpoint }, crs=input_flows.crs, geometry='geometry') input_flows_midpoint = input_flows_midpoint.set_index('HydroID') # Create crosswalk crosswalk = gpd.sjoin(input_flows_midpoint, input_nwmcat, how='left', op='within').reset_index() crosswalk = crosswalk.rename(columns={"index_right": "feature_id"}) # fill in missing ms crosswalk_missing = crosswalk.loc[crosswalk.feature_id.isna()] for index, stream in crosswalk_missing.iterrows(): # find closest nwm catchment by distance distances = [ stream.geometry.distance(poly) for poly in input_nwmcat.geometry ] min_dist = min(distances) nwmcat_index = distances.index(min_dist) # update crosswalk crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'feature_id'] = input_nwmcat.iloc[nwmcat_index].name crosswalk.loc[ crosswalk.HydroID == stream.HydroID, 'AreaSqKM'] = input_nwmcat.iloc[nwmcat_index].AreaSqKM crosswalk.loc[ crosswalk.HydroID == stream.HydroID, 'Shape_Length'] = input_nwmcat.iloc[nwmcat_index].Shape_Length crosswalk.loc[ crosswalk.HydroID == stream.HydroID, 'Shape_Area'] = input_nwmcat.iloc[nwmcat_index].Shape_Area crosswalk = crosswalk.filter(items=['HydroID', 'feature_id']) crosswalk = crosswalk.merge(input_nwmflows[['feature_id', 'order_']], on='feature_id') if len(crosswalk) < 1: print("No relevant streams within HUC boundaries.") sys.exit(0) if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) output_catchments = input_catchments.merge(crosswalk, on='HydroID') if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) output_flows = input_flows.merge(crosswalk, on='HydroID') output_flows = output_flows.merge( output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID') # read in manning's n values if calibration_mode == False: with open(mannings_n, "r") as read_file: mannings_dict = json.load(read_file) else: mannings_dict = {} for cnt, value in enumerate(mannings_n.split(",")[2:]): streamorder = cnt + 1 mannings_dict[str(streamorder)] = value output_flows['ManningN'] = output_flows['order_'].astype(str).map( mannings_dict) if output_flows.NextDownID.dtype != 'int': output_flows.NextDownID = output_flows.NextDownID.astype(int) # Adjust short model reach rating curves print("Adjusting model reach rating curves") sml_segs = pd.DataFrame() # replace small segment geometry with neighboring stream for stream_index in output_flows.index: if output_flows["areasqkm"][ stream_index] < min_catchment_area and output_flows["LengthKm"][ stream_index] < min_stream_length and output_flows[ "LakeID"][stream_index] < 0: short_id = output_flows['HydroID'][stream_index] to_node = output_flows['To_Node'][stream_index] from_node = output_flows['From_Node'][stream_index] # multiple upstream segments if len(output_flows.loc[output_flows['NextDownID'] == short_id] ['HydroID']) > 1: try: max_order = max( output_flows.loc[output_flows['NextDownID'] == short_id]['order_'] ) # drainage area would be better than stream order but we would need to calculate except: print( f"short_id {short_id} cannot calculate max stream order for multiple upstream segments scenario" ) if len(output_flows.loc[ (output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order)]['HydroID']) == 1: update_id = output_flows.loc[ (output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order )]['HydroID'].item() else: update_id = output_flows.loc[( output_flows['NextDownID'] == short_id ) & ( output_flows['order_'] == max_order )]['HydroID'].values[ 0] # get the first one (same stream order, without drainage area info it is hard to know which is the main channel) # single upstream segments elif len(output_flows.loc[output_flows['NextDownID'] == short_id] ['HydroID']) == 1: update_id = output_flows.loc[output_flows.To_Node == from_node]['HydroID'].item() # no upstream segments; multiple downstream segments elif len(output_flows.loc[output_flows.From_Node == to_node] ['HydroID']) > 1: try: max_order = max( output_flows.loc[output_flows.From_Node == to_node]['HydroID']['order_'] ) # drainage area would be better than stream order but we would need to calculate except: print( f"To Node {to_node} cannot calculate max stream order for no upstream segments; multiple downstream segments scenario" ) if len(output_flows.loc[ (output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order)]['HydroID']) == 1: update_id = output_flows.loc[ (output_flows.From_Node == to_node) & (output_flows['order_'] == max_order )]['HydroID'].item() else: update_id = output_flows.loc[( output_flows.From_Node == to_node ) & ( output_flows['order_'] == max_order )]['HydroID'].values[ 0] # get the first one (same stream order, without drainage area info it is hard to know which is the main channel) # no upstream segments; single downstream segment elif len(output_flows.loc[output_flows.From_Node == to_node] ['HydroID']) == 1: update_id = output_flows.loc[output_flows.From_Node == to_node]['HydroID'].item() else: update_id = output_flows.loc[output_flows.HydroID == short_id]['HydroID'].item() str_order = output_flows.loc[output_flows.HydroID == short_id]['order_'].item() sml_segs = sml_segs.append( { 'short_id': short_id, 'update_id': update_id, 'str_order': str_order }, ignore_index=True) print("Number of short reaches [{} < {} and {} < {}] = {}".format( "areasqkm", min_catchment_area, "LengthKm", min_stream_length, len(sml_segs))) # calculate src_full input_src_base = pd.read_csv(input_srcbase_fileName, dtype=object) if input_src_base.CatchId.dtype != 'int': input_src_base.CatchId = input_src_base.CatchId.astype(int) input_src_base = input_src_base.merge( output_flows[['ManningN', 'HydroID', 'NextDownID', 'order_']], left_on='CatchId', right_on='HydroID') input_src_base = input_src_base.rename(columns=lambda x: x.strip(" ")) input_src_base = input_src_base.apply(pd.to_numeric, **{'errors': 'coerce'}) input_src_base['TopWidth (m)'] = input_src_base[ 'SurfaceArea (m2)'] / input_src_base['LENGTHKM'] / 1000 input_src_base['WettedPerimeter (m)'] = input_src_base[ 'BedArea (m2)'] / input_src_base['LENGTHKM'] / 1000 input_src_base['WetArea (m2)'] = input_src_base[ 'Volume (m3)'] / input_src_base['LENGTHKM'] / 1000 input_src_base['HydraulicRadius (m)'] = input_src_base[ 'WetArea (m2)'] / input_src_base['WettedPerimeter (m)'] input_src_base['HydraulicRadius (m)'].fillna(0, inplace=True) input_src_base['Discharge (m3s-1)'] = input_src_base['WetArea (m2)']* \ pow(input_src_base['HydraulicRadius (m)'],2.0/3)* \ pow(input_src_base['SLOPE'],0.5)/input_src_base['ManningN'] # set nans to 0 input_src_base.loc[input_src_base['Stage'] == 0, ['Discharge (m3s-1)']] = 0 output_src = input_src_base.drop(columns=['CatchId']) if output_src.HydroID.dtype != 'int': output_src.HydroID = output_src.HydroID.astype(int) # update rating curves if len(sml_segs) > 0: sml_segs.to_csv(small_segments_filename, index=False) print("Update rating curves for short reaches.") for index, segment in sml_segs.iterrows(): short_id = segment[0] update_id = segment[1] new_values = output_src.loc[output_src['HydroID'] == update_id][[ 'Stage', 'Discharge (m3s-1)' ]] for src_index, src_stage in new_values.iterrows(): output_src.loc[(output_src['HydroID'] == short_id) & (output_src['Stage'] == src_stage[0]), ['Discharge (m3s-1)']] = src_stage[1] if extent == 'FR': output_src = output_src.merge( input_majorities[['HydroID', 'feature_id']], on='HydroID') elif extent == 'MS': output_src = output_src.merge(crosswalk[['HydroID', 'feature_id']], on='HydroID') output_crosswalk = output_src[['HydroID', 'feature_id']] output_crosswalk = output_crosswalk.drop_duplicates(ignore_index=True) ## bathy estimation integration in synthetic rating curve calculations if (bathy_src_calc == True and extent == 'MS'): output_src = bathy_rc_lookup(output_src, input_bathy_fileName, output_bathy_fileName, output_bathy_streamorder_fileName, output_bathy_thalweg_fileName, output_bathy_xs_lookup_fileName) else: print('Note: NOT using bathy estimation approach to modify the SRC...') # make hydroTable output_hydro_table = output_src.loc[:, [ 'HydroID', 'feature_id', 'NextDownID', 'order_', 'Stage', 'Discharge (m3s-1)', 'HydraulicRadius (m)', 'WetArea (m2)', 'SLOPE', 'ManningN' ]] output_hydro_table.rename(columns={ 'Stage': 'stage', 'Discharge (m3s-1)': 'discharge_cms' }, inplace=True) if output_hydro_table.HydroID.dtype != 'str': output_hydro_table.HydroID = output_hydro_table.HydroID.astype(str) output_hydro_table[FIM_ID] = output_hydro_table.loc[:, 'HydroID'].apply( lambda x: str(x)[0:4]) if input_huc[FIM_ID].dtype != 'str': input_huc[FIM_ID] = input_huc[FIM_ID].astype(str) output_hydro_table = output_hydro_table.merge( input_huc.loc[:, [FIM_ID, 'HUC8']], how='left', on=FIM_ID) if output_flows.HydroID.dtype != 'str': output_flows.HydroID = output_flows.HydroID.astype(str) output_hydro_table = output_hydro_table.merge( output_flows.loc[:, ['HydroID', 'LakeID']], how='left', on='HydroID') output_hydro_table['LakeID'] = output_hydro_table['LakeID'].astype(int) output_hydro_table = output_hydro_table.rename(columns={'HUC8': 'HUC'}) if output_hydro_table.HUC.dtype != 'str': output_hydro_table.HUC = output_hydro_table.HUC.astype(str) output_hydro_table.drop(columns=FIM_ID, inplace=True) if output_hydro_table.feature_id.dtype != 'int': output_hydro_table.feature_id = output_hydro_table.feature_id.astype( int) if output_hydro_table.feature_id.dtype != 'str': output_hydro_table.feature_id = output_hydro_table.feature_id.astype( str) # write out based on mode if calibration_mode == True: output_hydro_table.to_csv(output_hydro_table_fileName, index=False) else: # make src json output_src_json = dict() hydroID_list = unique(output_src['HydroID']) for hid in hydroID_list: indices_of_hid = output_src['HydroID'] == hid stage_list = output_src['Stage'][indices_of_hid].astype(float) q_list = output_src['Discharge (m3s-1)'][indices_of_hid].astype( float) stage_list = stage_list.tolist() q_list = q_list.tolist() output_src_json[str(hid)] = { 'q_list': q_list, 'stage_list': stage_list } # write out output_catchments.to_file(output_catchments_fileName, driver=getDriver(output_catchments_fileName), index=False) output_flows.to_file(output_flows_fileName, driver=getDriver(output_flows_fileName), index=False) output_src.to_csv(output_src_fileName, index=False) output_crosswalk.to_csv(output_crosswalk_fileName, index=False) output_hydro_table.to_csv(output_hydro_table_fileName, index=False) with open(output_src_json_fileName, 'w') as f: json.dump(output_src_json, f, sort_keys=True, indent=2)
def subset_vector_layers( hucCode, nwm_streams_filename, nhd_streams_filename, nwm_lakes_filename, nld_lines_filename, nwm_catchments_filename, nhd_headwaters_filename, landsea_filename, wbd_filename, wbd_buffer_filename, subset_nhd_streams_filename, subset_nld_lines_filename, subset_nwm_lakes_filename, subset_nwm_catchments_filename, subset_nhd_headwaters_filename, subset_nwm_streams_filename, subset_landsea_filename, extent, great_lakes_filename, wbd_buffer_distance, lake_buffer_distance): hucUnitLength = len(str(hucCode)) # Get wbd buffer wbd = gpd.read_file(wbd_filename) wbd_buffer = wbd.copy() wbd_buffer.geometry = wbd.geometry.buffer(wbd_buffer_distance, resolution=32) projection = wbd_buffer.crs great_lakes = gpd.read_file(great_lakes_filename, mask=wbd_buffer).reset_index(drop=True) if not great_lakes.empty: print("Masking Great Lakes for HUC{} {}".format( hucUnitLength, hucCode), flush=True) # Clip excess lake area great_lakes = gpd.clip(great_lakes, wbd_buffer) # Buffer remaining lake area great_lakes.geometry = great_lakes.buffer(lake_buffer_distance) # Removed buffered GL from WBD buffer wbd_buffer = gpd.overlay(wbd_buffer, great_lakes, how='difference') wbd_buffer = wbd_buffer[['geometry']] wbd_buffer.to_file(wbd_buffer_filename, driver=getDriver(wbd_buffer_filename), index=False) else: wbd_buffer = wbd_buffer[['geometry']] wbd_buffer.to_file(wbd_buffer_filename, driver=getDriver(wbd_buffer_filename), index=False) del great_lakes # Clip ocean water polygon for future masking ocean areas (where applicable) landsea = gpd.read_file(landsea_filename, mask=wbd_buffer) if not landsea.empty: landsea.to_file(subset_landsea_filename, driver=getDriver(subset_landsea_filename), index=False) del landsea # Find intersecting lakes and writeout print("Subsetting NWM Lakes for HUC{} {}".format(hucUnitLength, hucCode), flush=True) nwm_lakes = gpd.read_file(nwm_lakes_filename, mask=wbd_buffer) nwm_lakes = nwm_lakes.loc[nwm_lakes.Shape_Area < 18990454000.0] if not nwm_lakes.empty: # Perform fill process to remove holes/islands in the NWM lake polygons nwm_lakes = nwm_lakes.explode() nwm_lakes_fill_holes = MultiPolygon( Polygon(p.exterior) for p in nwm_lakes['geometry']) # remove donut hole geometries # Loop through the filled polygons and insert the new geometry for i in range(len(nwm_lakes_fill_holes)): nwm_lakes.loc[i, 'geometry'] = nwm_lakes_fill_holes[i] nwm_lakes.to_file(subset_nwm_lakes_filename, driver=getDriver(subset_nwm_lakes_filename), index=False) del nwm_lakes # Find intersecting levee lines print("Subsetting NLD levee lines for HUC{} {}".format( hucUnitLength, hucCode), flush=True) nld_lines = gpd.read_file(nld_lines_filename, mask=wbd_buffer) if not nld_lines.empty: nld_lines.to_file(subset_nld_lines_filename, driver=getDriver(subset_nld_lines_filename), index=False) del nld_lines # Subset nhd headwaters print("Subsetting NHD Headwater Points for HUC{} {}".format( hucUnitLength, hucCode), flush=True) nhd_headwaters = gpd.read_file(nhd_headwaters_filename, mask=wbd_buffer) if extent == 'MS': nhd_headwaters = nhd_headwaters.loc[nhd_headwaters.mainstem == 1] if len(nhd_headwaters) > 0: nhd_headwaters.to_file( subset_nhd_headwaters_filename, driver=getDriver(subset_nhd_headwaters_filename), index=False) else: print("No headwater point(s) within HUC " + str(hucCode) + " boundaries.") sys.exit(0) del nhd_headwaters # Subset nhd streams print("Querying NHD Streams for HUC{} {}".format(hucUnitLength, hucCode), flush=True) nhd_streams = gpd.read_file(nhd_streams_filename, mask=wbd_buffer) if extent == 'MS': nhd_streams = nhd_streams.loc[nhd_streams.mainstem == 1] if len(nhd_streams) > 0: # Find incoming stream segments (to WBD buffer) and identify which are upstream threshold_segments = gpd.overlay(nhd_streams, wbd_buffer, how='symmetric_difference') from_list = threshold_segments.FromNode.to_list() to_list = nhd_streams.ToNode.to_list() missing_segments = list(set(from_list) - set(to_list)) # special case: stream meanders in and out of WBD buffer boundary if str(hucCode) == '10030203': missing_segments = missing_segments + [ 23001300001840.0, 23001300016571.0 ] # Remove incoming stream segment so it won't be routed as outflow during hydroconditioning nhd_streams = nhd_streams.loc[~nhd_streams.FromNode. isin(missing_segments)] nhd_streams.to_file(subset_nhd_streams_filename, driver=getDriver(subset_nhd_streams_filename), index=False) else: print("No NHD streams within HUC " + str(hucCode) + " boundaries.") sys.exit(0) del nhd_streams # Find intersecting nwm_catchments print("Subsetting NWM Catchments for HUC{} {}".format( hucUnitLength, hucCode), flush=True) nwm_catchments = gpd.read_file(nwm_catchments_filename, mask=wbd_buffer) if extent == 'MS': nwm_catchments = nwm_catchments.loc[nwm_catchments.mainstem == 1] if len(nwm_catchments) > 0: nwm_catchments.to_file( subset_nwm_catchments_filename, driver=getDriver(subset_nwm_catchments_filename), index=False) else: print("No NHD catchments within HUC " + str(hucCode) + " boundaries.") sys.exit(0) del nwm_catchments # Subset nwm streams print("Subsetting NWM Streams and deriving headwaters for HUC{} {}".format( hucUnitLength, hucCode), flush=True) nwm_streams = gpd.read_file(nwm_streams_filename, mask=wbd_buffer) if extent == 'MS': nwm_streams = nwm_streams.loc[nwm_streams.mainstem == 1] if len(nwm_streams) > 0: nwm_streams.to_file(subset_nwm_streams_filename, driver=getDriver(subset_nwm_streams_filename), index=False) else: print("No NWM stream segments within HUC " + str(hucCode) + " boundaries.") sys.exit(0) del nwm_streams
nhd_headwater_points_adj = gpd.GeoDataFrame({'NHDPlusID' : nhd_headwater_streams_adj['NHDPlusID'], 'geometry' : hw_points},geometry='geometry',crs=PREP_PROJECTION) del nhd_headwater_streams_adj return(nhd_streams, nhd_headwater_points_adj) if __name__ == '__main__': parser = argparse.ArgumentParser(description='adjust headwater stream geometery based on headwater start points') parser.add_argument('-f','--huc',help='huc number',required=True) parser.add_argument('-l','--nhd-streams',help='NHDPlus HR geodataframe',required=True) parser.add_argument('-p','--headwaters',help='Headwater points layer',required=True,type=str) parser.add_argument('-s','--subset-nhd-streams-fileName',help='Output streams layer name',required=False,type=str,default=None) parser.add_argument('-s','--adj-headwater-points-fileName',help='Output adj headwater points layer name',required=False,type=str,default=None) parser.add_argument('-g','--headwater-points-fileName',help='Output headwater points layer name',required=False,type=str,default=None) parser.add_argument('-i','--headwater-id',help='Output headwaters points',required=True) args = vars(parser.parse_args()) adj_streams_gdf,adj_headwaters_gdf = adjust_headwaters(huc,nhd_streams,headwaters,headwater_id) if subset_nhd_streams_fileName is not None: adj_streams_gdf.to_file(args['subset_nhd_streams_fileName'],driver=getDriver(args['subset_nhd_streams_fileName']),index=False) if headwater_points_fileName is not None: headwater_points_fileName.to_file(args['headwater_points_fileName'],driver=getDriver(args['headwater_points_fileName']),index=False) if adj_headwater_points_fileName is not None: adj_headwaters_gdf.to_file(args['adj_headwater_points_fileName'],driver=getDriver(args['adj_headwater_points_fileName']),index=False)
def rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster, hydroid_fileName, dem_reaches_filename): """ Calculates REM/HAND/Detrended DEM Parameters ---------- dem_fileName : str File name of pit filled DEM raster. pixel_watersheds_fileName : str File name of stream pixel watersheds raster. rem_fileName : str File name of output relative elevation raster. hydroid_fileName : str File name of the hydroid raster (i.e. gw_catchments_reaches.tif) dem_reaches_filename File name of the reaches layer to populate HAND elevation attribute values and overwrite as output """ # ------------------------------------------- Get catchment_hydroid_dict --------------------------------------------------- # # The following creates a dictionary of the catchment ids (key) and their hydroid along the thalweg (value). # This is needed to produce a HAND zero reference elevation by hydroid dataframe (helpful for evaluating rating curves & bathy properties) @njit def make_catchment_hydroid_dict(flat_value_raster, catchment_hydroid_dict, flat_catchments, thalweg_window): for i, cm in enumerate(flat_catchments): if thalweg_window[ i] == 1: # Only allow reference hydroid to be within thalweg. catchment_hydroid_dict[cm] = flat_value_raster[i] return (catchment_hydroid_dict) # Open files. gw_catchments_pixels_masked_object = rasterio.open( pixel_watersheds_fileName) hydroid_pixels_object = rasterio.open(hydroid_fileName) thalweg_raster_object = rasterio.open(thalweg_raster) # Specify raster object metadata. meta = hydroid_pixels_object.meta.copy() meta['tiled'], meta['compress'] = True, 'lzw' # -- Create catchment_hydroid_dict -- # catchment_hydroid_dict = typed.Dict.empty( types.int64, types.int64 ) # Initialize an empty dictionary to store the catchment hydroid. # Update catchment_hydroid_dict with each pixel sheds hydroid. # Creating dictionary containing catchment ids (key) and corresponding hydroid within the thalweg... for ji, window in hydroid_pixels_object.block_windows( 1): # Iterate over windows, using dem_rasterio_object as template hydroid_window = hydroid_pixels_object.read( 1, window=window).ravel() # Define hydroid_window catchments_window = gw_catchments_pixels_masked_object.read( 1, window=window).ravel() # Define catchments_window thalweg_window = thalweg_raster_object.read( 1, window=window).ravel() # Define cost_window # Call numba-optimized function to update catchment_hydroid_dict with pixel sheds overlapping hydroid. catchment_hydroid_dict = make_catchment_hydroid_dict( hydroid_window, catchment_hydroid_dict, catchments_window, thalweg_window) hydroid_pixels_object.close() gw_catchments_pixels_masked_object.close() thalweg_raster_object.close() # ------------------------------------------- Get catchment_min_dict --------------------------------------------------- # # The following creates a dictionary of the catchment ids (key) and their elevation along the thalweg (value). @njit def make_catchment_min_dict(flat_dem, catchment_min_dict, flat_catchments, thalweg_window): for i, cm in enumerate(flat_catchments): if thalweg_window[ i] == 1: # Only allow reference elevation to be within thalweg # If the catchment really exists in the dictionary, compare elevation values if (cm in catchment_min_dict): if (flat_dem[i] < catchment_min_dict[cm]): # If the flat_dem's elevation value is less than the catchment_min_dict min, update the catchment_min_dict min catchment_min_dict[cm] = flat_dem[i] else: catchment_min_dict[cm] = flat_dem[i] return (catchment_min_dict) # Open files. gw_catchments_pixels_masked_object = rasterio.open( pixel_watersheds_fileName) dem_thalwegCond_masked_object = rasterio.open(dem_fileName) thalweg_raster_object = rasterio.open(thalweg_raster) # Specify raster object metadata. meta = dem_thalwegCond_masked_object.meta.copy() meta['tiled'], meta['compress'] = True, 'lzw' # -- Create catchment_min_dict -- # catchment_min_dict = typed.Dict.empty( types.int64, types.float32 ) # Initialize an empty dictionary to store the catchment minimums # Update catchment_min_dict with pixel sheds minimum. # Creating dictionary containing catchment ids (key) and corresponding elevation within the thalweg (value)... for ji, window in dem_thalwegCond_masked_object.block_windows( 1): # Iterate over windows, using dem_rasterio_object as template dem_window = dem_thalwegCond_masked_object.read( 1, window=window).ravel() # Define dem_window catchments_window = gw_catchments_pixels_masked_object.read( 1, window=window).ravel() # Define catchments_window thalweg_window = thalweg_raster_object.read( 1, window=window).ravel() # Define thalweg_window # Call numba-optimized function to update catchment_min_dict with pixel sheds minimum. catchment_min_dict = make_catchment_min_dict(dem_window, catchment_min_dict, catchments_window, thalweg_window) dem_thalwegCond_masked_object.close() gw_catchments_pixels_masked_object.close() thalweg_raster_object.close() # Merge and export dictionary to csv. catchment_min_dict_df = pd.DataFrame.from_dict( catchment_min_dict, orient='index') # convert dict to dataframe catchment_min_dict_df.columns = ['Median_Thal_Elev_m'] catchment_hydroid_dict_df = pd.DataFrame.from_dict( catchment_hydroid_dict, orient='index') # convert dict to dataframe catchment_hydroid_dict_df.columns = ['HydroID'] merge_df = catchment_hydroid_dict_df.merge(catchment_min_dict_df, left_index=True, right_index=True) merge_df.index.name = 'pixelcatch_id' # Merge the HAND reference elevation by HydroID dataframe with the demDerived_reaches layer (add new layer attribute) min_by_hydroid = merge_df.groupby(['HydroID']).min( ) # min value of all med_thal_elev for pixel catchments in each HydroID reach min_by_hydroid.columns = ['min_thal_elev'] med_by_hydroid = merge_df.groupby(['HydroID']).median( ) # median value of all med_thal_elev for pixel catchments in each HydroID reach med_by_hydroid.columns = ['med_thal_elev'] max_by_hydroid = merge_df.groupby(['HydroID']).max( ) # max value of all med_thal_elev for pixel catchments in each HydroID reach max_by_hydroid.columns = ['max_thal_elev'] input_reaches = gpd.read_file(dem_reaches_filename) input_reaches = input_reaches.merge( min_by_hydroid, on='HydroID') # merge dataframes by HydroID variable input_reaches = input_reaches.merge( med_by_hydroid, on='HydroID') # merge dataframes by HydroID variable input_reaches = input_reaches.merge( max_by_hydroid, on='HydroID') # merge dataframes by HydroID variable input_reaches.to_file(dem_reaches_filename, driver=getDriver(dem_reaches_filename), index=False) # ------------------------------------------------------------------------------------------------------------------------ # # ------------------------------------------- Produce relative elevation model ------------------------------------------- # @njit def calculate_rem(flat_dem, catchmentMinDict, flat_catchments, ndv): rem_window = np.zeros(len(flat_dem), dtype=np.float32) for i, cm in enumerate(flat_catchments): if cm in catchmentMinDict: if catchmentMinDict[cm] == ndv: rem_window[i] = ndv else: rem_window[i] = flat_dem[i] - catchmentMinDict[cm] return (rem_window) rem_rasterio_object = rasterio.open( rem_fileName, 'w', **meta) # Open rem_rasterio_object for writing to rem_fileName. pixel_catchments_rasterio_object = rasterio.open( pixel_watersheds_fileName) # Open pixel_catchments_rasterio_object dem_rasterio_object = rasterio.open(dem_fileName) # Producing relative elevation model raster for ji, window in dem_rasterio_object.block_windows(1): dem_window = dem_rasterio_object.read(1, window=window) window_shape = dem_window.shape dem_window = dem_window.ravel() catchments_window = pixel_catchments_rasterio_object.read( 1, window=window).ravel() rem_window = calculate_rem(dem_window, catchment_min_dict, catchments_window, meta['nodata']) rem_window = rem_window.reshape(window_shape).astype(np.float32) rem_rasterio_object.write(rem_window, window=window, indexes=1) dem_rasterio_object.close() pixel_catchments_rasterio_object.close() rem_rasterio_object.close()