def add_spatial_joins(df): """Add spatial joins of data provided by API, but not needed for network analysis. Parameters ---------- df : GeoDataFrame Returns ------- GeoDataFrame has fields added by spatial joins to other datasets """ ### Protected lands print("Joining to protected areas") protected = from_geofeather(boundaries_dir / "protected_areas.feather") df = spatial_join(df, protected) df.OwnerType = df.OwnerType.fillna(-1).astype("int8") df.ProtectedLand = df.ProtectedLand.fillna(False).astype("bool") ### Priority layers print("Joining to priority watersheds") priorities = ( deserialize_df(boundaries_dir / "priorities.feather") .set_index("HUC8") .rename(columns={"usfs": "HUC8_USFS", "coa": "HUC8_COA", "sgcn": "HUC8_SGCN"}) ) df = df.join(priorities, on="HUC8") df[priorities.columns] = df[priorities.columns].fillna(0).astype("uint8") return df
def test_points_geofeather_wkt(tmpdir, points_albers_conus_wkt): """Confirm that we can round-trip points to / from feather file with a wkt defined CRS""" filename = tmpdir / "points_albers_conus.feather" to_geofeather(points_albers_conus_wkt, filename) df = from_geofeather(filename) assert_frame_equal(df, points_albers_conus_wkt) assert df.crs == points_albers_conus_wkt.crs
def main(tile, input_pt_feather): # Data Directories source_dir = '/Users/arbailey/natcap/idb/data/source/' data_dir = '/Users/arbailey/natcap/idb/data/work/mangroves' work_dir = os.path.join(data_dir, 'yucatan') pt_data_source = os.path.join(work_dir, input_pt_feather) out_feather_path = os.path.join(work_dir, "gliht_srtm_{}.feather".format(tile)) #--- Load the G-LiHT points print("Loading data from: {}".format(pt_data_source)) start_time = time.time() gliht_pts = from_geofeather(os.path.join(work_dir, pt_data_source)) print("Load time for {0}: {1}".format(pt_data_source, time_elapsed(start_time))) print(gliht_pts.dtypes) print(gliht_pts) #--- SRTM elevation data srtm_source = os.path.join(source_dir, 'srtm/nasa', ".".join( (tile, 'SRTMGL1', 'hgt', 'zip'))) # Clip the points to SRTM raster extent (1 degree tile) # gliht_pts_clip = clip_pts_with_raster(gliht_pts[1:100], srtm_source) # subset for testing gliht_pts_clip = clip_pts_with_raster(gliht_pts, srtm_source) # Sample the SRTM raster gliht_pts_clip = sample_raster(gliht_pts_clip, srtm_source, 'srtm_m') print(gliht_pts_clip.dtypes) print(gliht_pts_clip) # Create unique index value for SRTM raster srtm_unique_source = os.path.join( work_dir, "{}_{}_{}.{}".format(tile, 'srtm', 'uniqueid', 'tif')) make_unique_raster(srtm_source, srtm_unique_source) # Sample Unique ID SRTM raster gliht_pts_clip = sample_raster(gliht_pts_clip, srtm_unique_source, 'srtm_idx') gliht_pts_clip.reset_index(inplace=True) print(gliht_pts_clip.dtypes) print(gliht_pts_clip) # Add columns to show the tile and unique index plus tile gliht_pts_clip['tile'] = tile gliht_pts_clip['tile_srtmidx'] = gliht_pts_clip[ 'tile'] + '_' + gliht_pts_clip['srtm_idx'].astype(str) print(gliht_pts_clip.dtypes) print(gliht_pts_clip) # Export to Feather format print("Exporting to Geofeather format") start_time = time.time() to_geofeather(gliht_pts_clip, out_feather_path) print("Export execution time for {0}: {1}".format( out_feather_path, time_elapsed(start_time)))
def test_polygons_geofeather(tmpdir, polygons_wgs84): """Confirm that we can round-trip polygons to / from feather file""" filename = tmpdir / "polygons_wgs84.feather" to_geofeather(polygons_wgs84, filename) assert os.path.exists(filename) df = from_geofeather(filename) assert_frame_equal(df, polygons_wgs84) assert df.crs == polygons_wgs84.crs
def cut_flowlines_at_barriers(region, barriers): """Read in flowlines and joins between segments, cut flowlines at barriers, and return updated flowlines, joins, and joins at each of the barriers NOTE: loops are dropped from the analysis. Parameters ---------- region : str ID of region group barriers : GeoDataFrame Barriers to cut the network Returns ------- (GeoDataFrame, DataFrame, DataFrame) cut flowlines, updated joins, barrier joins """ ### Read NHD flowlines and joins print("Reading flowlines...") flowline_start = time() flowlines = (from_geofeather( nhd_dir / region / "flowlines.feather").set_index( "lineID", drop=False).drop(columns=["HUC2"], errors="ignore")) joins = deserialize_df(nhd_dir / region / "flowline_joins.feather") # Fix data issue; remove on next full run of prepare_flowlines_waterbodies.py ix = flowlines.loc[flowlines.loop].index joins.loc[joins.upstream_id.isin(ix) | joins.downstream_id.isin(ix), "loop"] = True joins.loop = joins.loop.fillna(False) ix = flowlines.loop == True print("Found {:,} loops, dropping...".format(ix.sum())) flowlines = flowlines.loc[~ix].copy() joins = joins.loc[~joins.loop].copy() print("Read {:,} flowlines in {:.2f}s".format(len(flowlines), time() - flowline_start)) ### Cut flowlines at barriers cut_start = time() # since all other lineIDs use HUC4 prefixes, this should be unique # Use the first HUC2 for the region group next_segment_id = int(REGION_GROUPS[region][0]) * 1000000 + 1 flowlines, joins, barrier_joins = cut_flowlines( flowlines, barriers, joins, next_segment_id=next_segment_id) print("Done cutting flowlines in {:.2f}".format(time() - cut_start)) return flowlines, joins, barrier_joins
def test_missing_crs_warning(tmpdir, points_wgs84): """Confirm that a warning is raised if the crs file is missing""" filename = tmpdir / "points_wgs84.feather" to_geofeather(points_wgs84, filename) os.remove("{}.crs".format(filename)) with pytest.warns(UserWarning) as warning: df = from_geofeather(filename) assert ("coordinate reference system file is missing" in warning[0].message.args[0]) assert df.crs is None
def test_points_geofeather_proj4(tmpdir, points_albers_conus_proj4): """Confirm that we can round-trip points to / from feather file with a proj4 defined CRS""" filename = tmpdir / "points_albers_conus.feather" to_geofeather(points_albers_conus_proj4, filename) df = from_geofeather(filename) assert_frame_equal(df, points_albers_conus_proj4) # equality comparision fails for CRS object constructed from proj4, even though they are still the same if hasattr(df.crs, "to_proj4"): assert df.crs.to_proj4() == points_albers_conus_proj4.crs.to_proj4() else: assert df.crs == points_albers_conus_proj4.crs
def main(raster_source, uniqueid_file, work_dir, input_pt_feather, out_feather): pt_data_source = os.path.join(work_dir, input_pt_feather) out_feather_path = os.path.join(work_dir, out_feather) #--- Load the points print("Loading data from: {}".format(pt_data_source)) start_time = time.time() in_pts = from_geofeather(os.path.join(work_dir, pt_data_source)) print("Load time for {0}: {1}".format(pt_data_source, time_elapsed(start_time))) print(in_pts.dtypes) print(in_pts) # Clip the points to raster extent # in_pts_clip = clip_pts_with_raster(in_pts[1:100], raster_source) # subset for testing in_pts_clip = clip_pts_with_raster(in_pts, raster_source) # Sample the raster in_pts_clip = sample_raster(in_pts_clip, raster_source, 'tncdep_m') print(in_pts_clip.dtypes) print(in_pts_clip) # Create unique index value for SRTM raster raster_unique_source = os.path.join(work_dir, uniqueid_file) make_unique_raster(raster_source, raster_unique_source) # Sample Unique ID SRTM raster in_pts_clip = sample_raster(in_pts_clip, raster_unique_source, 'tncdep_idx') in_pts_clip.reset_index(inplace=True) print(in_pts_clip.dtypes) print(in_pts_clip) # Export to Feather format print("Exporting to Geofeather format") start_time = time.time() to_geofeather(in_pts_clip, out_feather_path) print("Export execution time for {0}: {1}".format( out_feather_path, time_elapsed(start_time)))
columns=["miles", "free_miles"]) serialize_df(barrier_networks.reset_index(drop=False), out_dir / "barriers_network.feather") ### Update network geometries and barrier networks # Cut networks from upstream regions and paste them into downstream regions cut_networks = None for region in cross_region.from_region.unique(): out_dir = data_dir / "networks" / region / network_type ### Update network geometries print( "Cutting downstream network from upstream region {}...".format(region)) network = from_geofeather(data_dir / "networks" / region / network_type / "raw/network.feather") # select the affected networks idx = network.networkID.isin(cross_region.upstream_network) cut = network.loc[idx].copy() if cut_networks is None: cut_networks = cut else: cut_networks = cut_networks.append(cut, ignore_index=True, sort=False) # write the updated network back out network = network.loc[~idx].copy() print("Serializing updated network...")
### Read in master print("Reading master...") df = ( from_geofeather(barriers_dir / "small_barriers.feather") .set_index("id") .drop( columns=[ "level_0", "index", "dup_group", "dup_count", "dup_log", "snap_dist", "snap_tolerance", "snap_ref_id", "snap_log", "snapped", "ProtectedLand", "log", "lineID", "wbID", ], errors="ignore", ) .rename(columns={"streamorder": "StreamOrder", "excluded": "Excluded"}) ) # drop any that should be DROPPED (dropped or duplicate) from the analysis # NOTE: excluded ones are retained but don't have networks
def main(tile, input_pt_feather): # Data Directories data_dir = '/Users/arbailey/natcap/idb/data/work/mangroves' work_dir = os.path.join(data_dir, 'yucatan') pt_data_source = os.path.join(work_dir, input_pt_feather) out_feather_path = os.path.join(work_dir, "gliht_srtm_mangroves_{}.feather".format(tile)) # --- Mangrove Max Height raster hmax_source = os.path.join(data_dir, 'gmc_hmax95_bahamas_MAR.tif') hba_source = os.path.join(data_dir, 'gmc_hba95_bahamas_MAR.tif') #--- Load the G-LiHT/SRTM points print("Loading data from: {}".format(pt_data_source)) start_time = time.time() gliht_pts = from_geofeather(os.path.join(work_dir, pt_data_source)) print("Load time for {0}: {1}".format(pt_data_source, time_elapsed(start_time))) gliht_pts.drop(columns=['index'], inplace=True) print(gliht_pts.dtypes) print(gliht_pts) # Sample the Canopy Height rasters # Max Height - hmax95 # gliht_pts = sample_raster(gliht_pts[0:100], hmax_source, 'hmax95') gliht_pts = sample_raster(gliht_pts, hmax_source, 'hmax95') print(gliht_pts.dtypes) print(gliht_pts) # Weighted Average Height - hba95 gliht_pts = sample_raster(gliht_pts, hba_source, 'hba95') print(gliht_pts.dtypes) print(gliht_pts) # # Create unique index value for Canopy raster gmc_unique_source = os.path.join(work_dir, "gmc_uniqueid.tif") # make_unique_raster(hmax_source, gmc_unique_source) # Takes 1:55:14.79 # # Sample Unique ID raster gliht_pts = sample_raster(gliht_pts, gmc_unique_source, 'hmax_idx') # gliht_pts.reset_index(inplace=True) # gliht_pts.drop(columns=['index'], inplace=True) print(gliht_pts.dtypes) print(gliht_pts) # Add columns to show the tile and unique index plus tile gliht_pts['tile'] = tile gliht_pts['tile_hmaxidx'] = gliht_pts['tile'] + '_' + gliht_pts['hmax_idx'].astype(str) print(gliht_pts.dtypes) print(gliht_pts) # Mangrove Extent Vector shapefile paths to join to Points #-- World Atlas of Mangroves wam_path = os.path.join(data_dir, 'wam_Bahamas_MAR.shp') wam_att = 'wam' wam = mangrove_poly_to_gdf(wam_path, wam_att) print(wam) gliht_pts = mangrove_join(gliht_pts, wam) print(gliht_pts) #-- Global Mangrove Watch gmw2016_path = os.path.join(data_dir, 'gmw2016_Bahamas_MAR.shp') gmw2016_att = 'gmw2016' gmw2016 = mangrove_poly_to_gdf(gmw2016_path, gmw2016_att) print(gmw2016) gliht_pts = mangrove_join(gliht_pts, gmw2016) print(gliht_pts) # Global Mangrove Forests gmf_path = os.path.join(data_dir, 'gmf_bahamas_MAR.shp') gmf_att = 'gmf' gmf = mangrove_poly_to_gdf(gmf_path, gmf_att) print(gmf) gliht_pts = mangrove_join(gliht_pts, gmf) print(gliht_pts) # NAtCap Mangrove compilation for MAR region (Mex, Belize, Guatemala, Honduras) ncmar_path = os.path.join(data_dir, 'natcap_mangrovesV4_MAR.shp') ncmar_att = 'ncMAR' ncmar = mangrove_poly_to_gdf(ncmar_path, ncmar_att) print(ncmar) gliht_pts = mangrove_join(gliht_pts, ncmar) print(gliht_pts) print(gliht_pts.dtypes) print(gliht_pts.describe()) # Export to GeoFeather format gliht_pts.reset_index(inplace=True) # get an error from feather export if don't do this # ValueError: feather does not support serializing a non-default index for the index; you can .reset_index() to make the index into column(s) print(gliht_pts.dtypes) print("Exporting to Geofeather format") start_time = time.time() to_geofeather(gliht_pts, out_feather_path) print("Export execution time for {0}: {1}".format(out_feather_path, time_elapsed(start_time)))
from dash.dependencies import Input, Output, State import dash_core_components as dcc import dash_html_components as html import dash_table import plotly import plotly.express as px from flask import Flask # API keys and datasets mb_token = 'pk.eyJ1IjoiamF2aS1hbGZhcm8iLCJhIjoiY2tiMnR0cm5zMDBoejJ4cWNxb3Bzcno5aiJ9.Zh0OEJmyiH27YG4Yw_KLyg' map_shape = gpd.read_file('./data/slv_adm2/SLV_adm2.shp') map_shape.columns = map(str.lower, map_shape.columns) map_shape['codigomunic'] = map_shape.name_2 map_shape['depto'] = map_shape.name_1 gdf = gf.from_geofeather('./data/ehpm19_merged_sample.feather') gdf.crs = "EPSG:4326" map_data = gdf.copy() map_data["lon"] = gdf.centroid.x map_data["lat"] = gdf.centroid.y del gdf # Preparing geojson map_shape.to_file("./data/esa.json", driver="GeoJSON") with open('./data/esa.json') as response: esa_geoj = json.load(response) px.set_mapbox_access_token(mb_token)
### Read in master print("Reading master...") df = (from_geofeather(barriers_dir / "dams.feather").set_index("id").drop( columns=[ "level_0", "index", "dup_group", "dup_count", "dup_sort", "dup_log", "snap_dist", "snap_tolerance", "snap_ref_id", "snap_log", "snapped", "ProtectedLand", "NHDPlusID", "SourceState", "lineID", "wbID", "waterbody", "src", "kind", "log", ], errors="ignore", ).rename(columns={ "streamorder": "StreamOrder", "excluded": "Excluded" })) # drop any that should be DROPPED (dropped or duplicate) from the analysis
print("{:,} duplicate road crossings".format(len(df) - len(keep_ids))) df = df.loc[keep_ids].copy() ### Remove crossings that are very close print("Removing nearby road crossings...") # consider 5 m nearby df = mark_duplicates(df, 5) print("{:,} very close road crossings dropped".format(len( df.loc[df.duplicate]))) df = df.loc[~df.duplicate].drop( columns=["duplicate", "dup_count", "dup_group"]) ### Remove those that otherwise duplicate existing small barriers print("Removing crossings that duplicate existing barriers") barriers = from_geofeather(barriers_dir / "master/small_barriers.feather") barriers = barriers.loc[~barriers.duplicate] barriers["kind"] = "barrier" df["joinID"] = (df.index * 1e6).astype("uint32") df["kind"] = "crossing" merged = barriers[["kind", "geometry"]].append(df[["joinID", "kind", "geometry"]], sort=False, ignore_index=True) merged = mark_duplicates(merged, tolerance=DUPLICATE_TOLERANCE) dup_groups = merged.loc[(merged.dup_count > 1) & (merged.kind == "barrier")].dup_group.unique() remove_ids = merged.loc[merged.dup_group.isin(dup_groups)
def read_barriers(region, mode): """Read files created by prep_dams.py, prep_waterfalls.py, prep_small_barriers.py Merge together and assign uniqueID for internal use in network analysis NOTE: barriers on loops are dropped Parameters ---------- region : str region group identifier, e.g., "02" mode : str One of "natural", "dams", "small_barriers" Returns ------- GeoDataFrame Merged barriers file """ start = time() print("Reading waterfalls") wf = from_geofeather(barriers_dir / "waterfalls.feather") wf = wf.loc[wf.HUC2.isin(REGION_GROUPS[region])].copy() print("Selected {:,} waterfalls".format(len(wf))) wf["barrierID"] = WATERFALLS_ID + wf.id wf["kind"] = "waterfall" barriers = wf if mode != "natural": print("Reading dams") dams = from_geofeather(barriers_dir / "dams.feather") dams = dams.loc[dams.HUC2.isin(REGION_GROUPS[region])].copy() print("Selected {:,} dams".format(len(dams))) dams["barrierID"] = DAMS_ID + dams.id dams["kind"] = "dam" if len(dams): barriers = barriers.append(dams, ignore_index=True, sort=False) if mode == "small_barriers": print("Reading small barriers") sb = from_geofeather(barriers_dir / "small_barriers.feather") sb = sb.loc[sb.HUC2.isin(REGION_GROUPS[region])].copy() print("Selected {:,} small barriers".format(len(sb))) sb["barrierID"] = SB_ID + sb.id sb["kind"] = "small_barrier" if len(sb): barriers = barriers.append(sb, ignore_index=True, sort=False) # Update dtypes # TODO: not neeed after rerun of prep_*.py scripts barriers.id = barriers.id.astype("uint32") barriers.lineID = barriers.lineID.astype("uint32") barriers.NHDPlusID = barriers.NHDPlusID.astype("uint64") barriers.barrierID = barriers.barrierID.astype("uint64") ix = barriers.loop == True print("Found {:,} barriers on loops, dropping".format(ix.sum())) barriers = barriers.loc[~ix].copy() print("Extracted {:,} barriers in {:.2f}s".format(len(barriers), time() - start)) return barriers[ ["geometry", "id", "lineID", "NHDPlusID", "barrierID", "kind"] ].set_index("barrierID", drop=False)
os.makedirs(tile_dir) ### Read in master print("Reading master...") df = (from_geofeather(barriers_dir / "waterfalls.feather").set_index("id").drop( columns=[ "level_0", "index", "dup_group", "dup_count", "dup_log", "snap_dist", "snap_tolerance", "snap_log", "snapped", "log", "lineID", "wbID", ], errors="ignore", ).rename( columns={ "streamorder": "StreamOrder", "name": "Name", "watercours": "Stream", "gnis_name_": "GNIS_Name", })) ### Fix data type issues # TODO: move to prep script df.Name = df.Name.fillna("").str.strip()
parallel_runner, enumerate(chunks)), total=chunk_count): partial_results[index] = chunk df_join = pd.concat(partial_results) return df_join if __name__ == "__main__": with open(PROCESSED_DATA_SOURCES + TEMP + "TEMP_2019.pkl", 'rb') as f: data_TEMP = pickle.load(f) crs = {'init': 'EPSG:4326'} geometry = [Point(xy) for xy in zip(data_TEMP['x'], data_TEMP['y'])] df_points = gpd.GeoDataFrame(data_TEMP, crs=crs, geometry=geometry) df_zones = from_geofeather(PROCESSED_DATA_SOURCES + 'Shape_Joined.feather') df_joined = run(df_zones, df_points, use_parallel=True, processes=10) df_joined = df_joined.reset_index(drop=True) aggregations = { 'NAME_0': 'first', 'value': 'mean', 'TYPE_1': 'first', 'ENGTYPE_1': 'first', 'GID_0': 'first', 'GID_1': 'first' } temp_grouped = df_joined.groupby(['date_range', 'NAME_1']).agg(aggregations) temp_grouped.to_csv(PROCESSED_DATA_SOURCES + 'temp_19.csv')