def test_shp_read_and_write(): if not os.path.isdir('output'): os.makedirs('output') # test without geometry df = pd.DataFrame({'reach': np.arange(10000001, 10000100, dtype=int), 'value': np.arange(1, 100, dtype=float), 'name': ['stuff{}'.format(i) for i in np.arange(1, 100)], 'isTrue': [True, False] * 49 + [True]}) df2shp(df, 'temp/junk.dbf') df = shp2df('temp/junk.dbf', true_values='True', false_values='False') assert [d.name for d in df.dtypes] == ['bool', 'object', 'int64', 'float64'] assert df.isTrue.sum() == 50 # test with geometry df = pd.DataFrame({'reach': np.arange(1, 101, dtype=int), 'value': np.arange(100, dtype=float), 'name': ['stuff{}'.format(i) for i in np.arange(100)], 'geometry': [Point([i, i]) for i in range(100)]}) original_columns = df.columns.tolist() df2shp(df, 'temp/junk.shp') df = shp2df('temp/junk.shp') assert df.geometry[0] == Point([0.0, 0.0]) assert np.array_equal(df.index.values, np.arange(100)) # check ordering of rows assert df.columns.tolist() == original_columns # check column order # test datetime handling and retention of index df.index = pd.date_range('2016-01-01 1:00:00', '2016-01-01 1:01:39', freq='s') df.index.name = 'datetime' df2shp(df, 'temp/junk.shp', index=True) df = shp2df('temp/junk.shp') assert 'datetime' in df.columns assert df.datetime[0] == '2016-01-01 01:00:00'
def test_shp_read_and_write(): if not os.path.isdir('output'): os.makedirs('output') # test without geometry df = pd.DataFrame({ 'reach': np.arange(10000001, 10000100, dtype=int), 'value': np.arange(1, 100, dtype=float), 'name': ['stuff{}'.format(i) for i in np.arange(1, 100)], 'isTrue': [True, False] * 49 + [True] }) cols = ['reach', 'value', 'name', 'isTrue'] df1 = df[cols] #designate a column order ta = time.time() df2shp(df1, 'temp/junk.dbf', retain_order=True) print("wrote shapefile in {:.6f}s\n".format(time.time() - ta)) ta = time.time() df2 = shp2df('temp/junk.dbf', true_values='True', false_values='False') print("read shapefile in {:.6f}s\n".format(time.time() - ta)) #assert list(df2.columns) == cols assert [d.name for d in df2.dtypes] == ['int64', 'float64', 'object', 'bool'] assert df2.isTrue.sum() == 50 # test with geometry df1 = pd.DataFrame({ 'reach': np.arange(1, 101, dtype=int), 'value': np.arange(100, dtype=float), 'name': ['stuff{}'.format(i) for i in np.arange(100)], 'geometry': [Point([i, i]) for i in range(100)] }) cols = ['reach', 'value', 'name', 'geometry' ] # geometry is placed in last column when shp is read in df1 = df1[cols] df2shp(df1, 'temp/junk.shp', retain_order=True) df2 = shp2df('temp/junk.shp') assert df2.geometry[0] == Point([0.0, 0.0]) assert np.array_equal(df2.index.values, np.arange(100)) # check ordering of rows assert df2.columns.tolist() == cols # check column order # test datetime handling and retention of index df.index = pd.date_range('2016-01-01 1:00:00', '2016-01-01 1:01:38', freq='s') df.index.name = 'datetime' df2shp(df, 'temp/junk.dbf', index=True) df = shp2df('temp/junk.dbf') assert 'datetime' in df.columns assert df.datetime[0] == '2016-01-01 01:00:00'
def _load_shapefile(self, shp, index_field, convert_coordinates, remove_offset, simplify): df = shp2df(shp) if index_field is not None: df.index = df[index_field] proj4 = get_proj4(shp) if proj4 != self.proj4: df['geometry'] = projectdf(df, proj4, self.proj4) # convert projected coordinate units and/or get rid z values if the shapefile has them if convert_coordinates != 1 or df.iloc[0]['geometry'].has_z: df['geometry'] = [transform(lambda x, y, z=None: (x * convert_coordinates, y * convert_coordinates), g) for g in df.geometry] # remove model offset from projected coordinates (llcorner = 0,0) if remove_offset: df['geometry'] = [translate(g, -1 * self.extent_proj[0], -1 * self.extent_proj[1]) for g in df.geometry] if simplify > 0: df['geometry'] = [g.simplify(simplify) for g in df.geometry] return df
def _load_shapefile(self, shp, index_field, convert_coordinates, remove_offset, simplify): df = shp2df(shp) if index_field is not None: df.index = df[index_field] proj4 = get_proj4(shp) if proj4 != self.proj4: df['geometry'] = projectdf(df, proj4, self.proj4) # convert projected coordinate units and/or get rid z values if the shapefile has them if convert_coordinates != 1 or df.iloc[0]['geometry'].has_z: df['geometry'] = [ transform(lambda x, y, z=None: (x * convert_coordinates, y * convert_coordinates), g) for g in df.geometry ] # remove model offset from projected coordinates (llcorner = 0,0) if remove_offset: df['geometry'] = [ translate(g, -1 * self.extent_proj[0], -1 * self.extent_proj[1]) for g in df.geometry ] if simplify > 0: df['geometry'] = [g.simplify(simplify) for g in df.geometry] return df
def __init__(self, shapefile='', shapefile_hru_col='nhru'): shp = shp2df(shapefile) try: shp.sort(shapefile_hru_col, inplace=True) self.geometry = shp['geometry'] self.geometry.index = shp[shapefile_hru_col].values except Exception, e: print Exception, e print '\nPlease supply an index field relating shapefile geometries ' \ 'to columns in GDP data (e.g. hru number)'
def test_integer_dtypes(): # verify that pandas is recasting numpy ints as python ints when converting to dict # (numpy ints invalid for shapefiles) d = pd.DataFrame(np.ones((3, 3)), dtype=int).astype(object).to_dict(orient='records') for i in range(3): assert isinstance(d[i][0], int) df = pd.DataFrame({'r': np.arange(100), 'c': np.arange(100)}) df2shp(df, 'temp/ints.dbf') df2 = shp2df('temp/ints.dbf') assert True
def test_shp_read_and_write(): if not os.path.isdir('output'): os.makedirs('output') # test without geometry df = pd.DataFrame({'reach': np.arange(10000001, 10000100, dtype=int), 'value': np.arange(1, 100, dtype=float), 'name': ['stuff{}'.format(i) for i in np.arange(1, 100)], 'isTrue': [True, False] * 49 + [True]}) cols = ['reach', 'value', 'name', 'isTrue'] df1 = df[cols] #designate a column order ta = time.time() df2shp(df1, 'temp/junk.dbf', retain_order=True) print("wrote shapefile in {:.6f}s\n".format(time.time() - ta)) ta = time.time() df2 = shp2df('temp/junk.dbf', true_values='True', false_values='False') print("read shapefile in {:.6f}s\n".format(time.time() - ta)) #assert list(df2.columns) == cols assert [d.name for d in df2.dtypes] == ['int64', 'float64', 'object', 'bool'] assert df2.isTrue.sum() == 50 # test with geometry df1 = pd.DataFrame({'reach': np.arange(1, 101, dtype=int), 'value': np.arange(100, dtype=float), 'name': ['stuff{}'.format(i) for i in np.arange(100)], 'geometry': [Point([i, i]) for i in range(100)]}) cols = ['reach', 'value', 'name', 'geometry'] # geometry is placed in last column when shp is read in df1 = df1[cols] df2shp(df1, 'temp/junk.shp', retain_order=True) df2 = shp2df('temp/junk.shp') assert df2.geometry[0] == Point([0.0, 0.0]) assert np.array_equal(df2.index.values, np.arange(100)) # check ordering of rows assert df2.columns.tolist() == cols # check column order # test datetime handling and retention of index df.index = pd.date_range('2016-01-01 1:00:00', '2016-01-01 1:01:38', freq='s') df.index.name = 'datetime' df2shp(df, 'temp/junk.dbf', index=True) df = shp2df('temp/junk.dbf') assert 'datetime' in df.columns assert df.datetime[0] == '2016-01-01 01:00:00'
def __init__(self, dir, variable='tmin', scenarios=['20c3m', 'early', 'late'], shapefile='', shapefile_hru_col='nhru'): tminfiles = [os.path.join(dir, f) for f in os.listdir(dir) if variable in f] if shapefile is not None: shp = shp2df(shapefile) try: shp.sort(shapefile_hru_col, inplace=True) self.geometry = shp[['geometry']] self.geometry.index = np.arange(len(shp)) + 1 except Exception, e: print Exception, e print '\nPlease supply an index field relating shapefile geometries ' \ 'to columns in GDP data (e.g. hru number)'
def __init__( self, NHDFlowline, PlusFlowlineVAA, PlusFlow, mf_grid=None, mf_grid_node_col=None, nrows=None, ncols=None, mfdis=None, xul=None, yul=None, rot=0, model_domain=None, flowlines_proj4=None, mfgrid_proj4=None, domain_proj4=None, mf_units_mult=1, ): """Class for working with information from NHDPlus v2. See the user's guide for more information: <http://www.horizon-systems.com/NHDPlus/NHDPlusV2_documentation.php#NHDPlusV2 User Guide> Parameters ========== NHDFlowline : str, list of strings or dataframe Shapefile, list of shapefiles, or dataframe defining SFR network; assigned to the Flowline attribute. PlusFlowlineVAA : str, list of strings or dataframe DBF file, list of DBF files with NHDPlus attribute information; assigned to PlusFlowlineVAA attribute. PlusFlow : str, list of strings or dataframe DBF file, list of DBF files with routing information; assigned to PlusFlow attribute. mf_grid : str or dataframe Shapefile or dataframe containing MODFLOW grid mf_grid_node_col : str Column in grid shapefile or dataframe with unique node numbers. In case the grid isn't sorted! (which will result in mixup if rows and columns are assigned later using the node numbers) nrows : int (structured grids) Number of model rows ncols : int (structured grids) Number of model columns mfdis : str MODFLOW discretization file (not yet supported for this class) xul : float, optional x offset of upper left corner of grid. Only needed if using mfdis instead of shapefile yul : float, optional y offset of upper left corner of grid. Only needed if using mfdis instead of shapefile rot : float, optional (default 0) Grid rotation; only needed if using mfdis instead of shapefile. model_domain : str (shapefile) or shapely polygon, optional Polygon defining area in which to create SFR cells. Default is to create SFR at all intersections between the model grid and NHD flowlines. flowlines_proj4 : str, optional Proj4 string for coordinate system of NHDFlowlines. Only needed if flowlines are supplied in a dataframe. domain_proj4 : str, optional Proj4 string for coordinate system of model_domain. Only needed if model_domain is supplied as a polygon. mf_units_mult : float multiplier to convert GIS units to MODFLOW units """ self.Flowline = NHDFlowline self.PlusFlowlineVAA = PlusFlowlineVAA self.PlusFlow = PlusFlow self.fl_cols = [ "COMID", "FCODE", "FDATE", "FLOWDIR", "FTYPE", "GNIS_ID", "GNIS_NAME", "LENGTHKM", "REACHCODE", "RESOLUTION", "WBAREACOMI", "geometry", ] self.pfvaa_cols = ["ArbolateSu", "Hydroseq", "DnHydroseq", "LevelPathI", "StreamOrde"] self.mf_grid = mf_grid self.model_domain = model_domain self.nrows = nrows self.ncols = ncols self.mfdis = mfdis self.xul = xul self.yul = yul self.rot = rot self.mf_units_mult = mf_units_mult self.GISunits = None self.to_km = None # converts GIS units to km for arbolate sum self.fl_proj4 = flowlines_proj4 self.mf_grid_proj4 = mfgrid_proj4 self.domain_proj4 = domain_proj4 print "Reading input..." # handle dataframes or shapefiles as arguments # get proj4 for any shapefiles that are submitted for attr, input in {"fl": NHDFlowline, "pf": PlusFlow, "pfvaa": PlusFlowlineVAA, "grid": mf_grid}.iteritems(): if isinstance(input, pd.DataFrame): self.__dict__[attr] = input else: self.__dict__[attr] = shp2df(input) if isinstance(model_domain, Polygon): self.domain = model_domain else: self.domain = shape(fiona.open(model_domain).next()["geometry"]) self.domain_proj4 = get_proj4(model_domain) # sort and pair down the grid if mf_grid_node_col is not None: self.grid.sort(mf_grid_node_col, inplace=True) self.grid.index = self.grid[mf_grid_node_col].values self.grid = self.grid[["geometry"]] # get projections if self.mf_grid_proj4 is None and not isinstance(mf_grid, pd.DataFrame): self.mf_grid_proj4 = get_proj4(mf_grid) if self.fl_proj4 is None: if isinstance(NHDFlowline, list): self.fl_proj4 = get_proj4(NHDFlowline[0]) elif not isinstance(NHDFlowline, pd.DataFrame): self.fl_proj4 = get_proj4(NHDFlowline) # set the indices for attr, index in {"fl": "COMID", "pfvaa": "ComID"}.iteritems(): if not self.__dict__[attr].index.name == index: self.__dict__[attr].index = self.__dict__[attr][index] # first check that grid is in projected units if self.mf_grid_proj4.split("proj=")[1].split()[0].strip() == "longlat": raise ProjectionError(self.mf_grid) # reproject the NHD Flowlines and model domain to model grid if they aren't # (prob a better way to check for same projection) # set GIS units from modflow grid projection (used for arbolate sum computation) # assumes either m or ft! self.GISunits = parse_proj4_units(self.mf_grid_proj4) self.to_km = [0.001 if self.GISunits == "m" else 0.001 / 0.3048][0] if different_projections(self.fl_proj4, self.mf_grid_proj4): print "reprojecting NHDFlowlines from\n{}\nto\n{}...".format(self.fl_proj4, self.mf_grid_proj4) self.fl["geometry"] = projectdf(self.fl, self.fl_proj4, self.mf_grid_proj4) if model_domain is not None and different_projections(self.domain_proj4, self.mf_grid_proj4): print "reprojecting model domain from\n{}\nto\n{}...".format(self.domain_proj4, self.mf_grid_proj4) self.domain = project(self.domain, self.domain_proj4, self.mf_grid_proj4)
def make_collection(self, shp, index_field=None, s=20, fc='0.8', ec='k', lw=0.5, alpha=0.5, color_field=None, cbar=False, clim=(), cmap='jet', cbar_label=None, simplify_patches=100, zorder=5, convert_coordinates=1, remove_offset=True, collection_name=None, **kwargs): if collection_name is None: collection_name = os.path.split(shp)[-1].split('.')[0] df = shp2df(shp) if index_field is not None: df.index = df[index_field] proj4 = get_proj4(shp) if proj4 != self.proj4: df['geometry'] = projectdf(df, proj4, self.proj4) # convert projected coordinate units and/or get rid z values if the shapefile has them if convert_coordinates != 1 or df.iloc[0]['geometry'].has_z: df['geometry'] = [transform(lambda x, y, z=None: (x * convert_coordinates, y * convert_coordinates), g) for g in df.geometry] # remove model offset from projected coordinates (llcorner = 0,0) if remove_offset: df['geometry'] = [translate(g, -1 * self.extent_proj[0], -1 * self.extent_proj[1]) for g in df.geometry] if simplify_patches > 0: df['geometry'] = [g.simplify(simplify_patches) for g in df.geometry] if 'Polygon' in df.iloc[0].geometry.type: print("building PatchCollection...") inds = [] patches = [] for i, g in df.geometry.iteritems(): if g.type != 'MultiPolygon': inds.append(i) patches.append(PolygonPatch(g)) else: for part in g.geoms: inds.append(i) patches.append(PolygonPatch(part)) collection = PatchCollection(patches, cmap=cmap, facecolor=fc, linewidth=lw, edgecolor=ec, alpha=alpha, ) elif 'LineString' in df.geometry[0].type: print("building LineCollection...") inds = [] lines = [] for i, g in df.geometry.iteritems(): if 'Multi' not in g.type: x, y = g.xy inds.append(i) lines.append(list(zip(x, y))) # plot each line in a multilinestring else: for l in g: x, y = l.xy inds.append(i) lines.append(list(zip(x, y))) collection = LineCollection(lines, colors=ec, linewidths=lw, alpha=alpha, zorder=zorder, **kwargs) #lc.set_edgecolor(ec) #lc.set_alpha(alpha) #lc.set_lw(lw) # set the color scheme (could set line thickness by same proceedure) if fc in df.columns: colors = np.array([df[fc][ind] for ind in inds]) collection.set_array(colors) else: print("plotting points...") x = np.array([g.x for g in df.geometry]) y = np.array([g.y for g in df.geometry]) collection = self.ax.scatter(x, y, s=s, c=fc, ec=ec, lw=lw, alpha=alpha, zorder=zorder, **kwargs) inds = list(range(len(x))) self.layers[collection_name] = df self.collections[collection_name] = collection self.collection_inds[collection_name] = inds return collection
def get_upstream_area(points, PlusFlow, NHDFlowlines, NHDCatchments, nearfield=None): """For each point in points, get upstream drainage area in km2, using NHDPlus PlusFlow routing table and NHDPlus Catchment areas. Upstream area within the containing catchment is estimated as a fraction of proportional to the distance of the measurment point along the NHDPlus Flowline associated with the catchment. Parameters ---------- points : list of shapely Point objects Locations of streamflow measurements. Must be in same coordinate system as NHDCatchments PlusFlow : str or list of strings Path(s) to PlusFlow routing tables NHDFlowlines : str or list of strings Path(s) to Flowlines shapefiles NHDCatchments : str or list of strings Path(s) to Catchment shapefiles nearfield : shapefile or shapely Polygon Nearfield area of model. Used to filter NHDPlus flowlines and catchments to greatly speed reading them in and finding the COMIDs associated with points. Must be in same coordinate system as points and NHDPlus shapefiles. Returns ------- upstream_area : list List of areas in km2, for each point in points. """ try: import fiona from shapely.geometry import LineString, Polygon, shape from GISio import shp2df except ImportError: print('This method requires fiona, shapely and GIS_utils.') if isinstance(nearfield, Polygon): bbox = nearfield.bounds elif isinstance(nearfield, str): bbox = shape(fiona.open(nearfield).next()['geometry']).bounds() else: bbox = None # dialate the bounding box by half, so that features aren't missed. x = 0.5 * (bbox[2] - bbox[0]) y = 0.5 * (bbox[3] - bbox[1]) bbox = (bbox[0] - x, bbox[1] - y, bbox[2] + x, bbox[3] + y) pf = shp2df(PlusFlow) fl = shp2df(NHDFlowlines, index='COMID', filter=bbox) cmt = shp2df(NHDCatchments, index='FEATUREID', filter=bbox) # find the catchment containing each point in points comids = [] for p in points: comids += cmt.FEATUREID[np.array([p.within(g) for g in cmt.geometry])].tolist() upstream_area = [] for i, comid in enumerate(comids): comids = {comid} upstream = [comid] for j in range(1000): upstream = set(pf.ix[pf.TOCOMID.isin(upstream), 'FROMCOMID']).difference({0}) if len(upstream) == 0: break comids.update(upstream) total_upstream_area = cmt.ix[comids, 'AreaSqKM'].sum() if comid == 11951607: j = 2 # estimate fraction of containing catchment that is upstream # by finding closest vertex on flowline, # and then dividing upstream length by downstream length #X = np.array(fl.ix[comid, 'geometry'].coords.xy[0]) #Y = np.array(fl.ix[comid, 'geometry'].coords.xy[1]) g = points[i] # misc measurement point #i = np.argmin(np.sqrt((X-g.x)**2 + (Y-g.y)**2)) # closest point on flowline # should be able to just project point onto flowline and divide by total length l = fl.ix[comid, 'geometry'] frac = l.project(g) / l.length #frac = LineString(zip(X[:i+1], Y[:i+1])).length/LineString(zip(X[i:], Y[i:])).length upstream_in_catchment = cmt.ix[comid, 'AreaSqKM'] * frac total_upstream_area += upstream_in_catchment upstream_area.append(total_upstream_area) return upstream_area
from flopy.utils.reference import SpatialReference # basic grid parameters name = 'map_test' epsg = 5070 xul, yul = 520487.3, 1194668.3 nrow, ncol = 20, 20 dxy = 5280 * .3048 buf = 1e4 bounds = xul - buf, \ yul - dxy * nrow - buf, \ xul + dxy * ncol + buf, \ yul + buf # make version of preprocessed flowlines filtered to bounding box df = shp2df('/Users/aleaf/Documents/MAP/repos/sfr_output/preprocessed/flowlines_gt20km/flowlines_gt20km_edited.shp', filter=bounds) df2shp(df, 'data/{}_flowlines.shp'.format(name), epsg=epsg) # make a spatial reference object defining the grid sr = SpatialReference(delr=np.ones(ncol, dtype=float) * dxy, delc=np.ones(nrow, dtype=float) * dxy, xul=xul, yul=yul, epsg=epsg) # export sr info to json file model_info = sr.attribute_dict model_info['nrow'] = sr.nrow model_info['ncol'] = sr.ncol model_info['delr'] = sr.delr[0] model_info['delc'] = sr.delc[0] model_info['epsg'] = sr.epsg with open('data/{}_grid.json'.format(name), 'w') as output:
def make_collection(self, shp, index_field=None, s=20, fc='0.8', ec='k', lw=0.5, alpha=0.5, color_field=None, cbar=False, clim=(), cmap='jet', cbar_label=None, simplify_patches=100, zorder=5, convert_coordinates=1, remove_offset=True, collection_name=None, **kwargs): if collection_name is None: collection_name = os.path.split(shp)[-1].split('.')[0] df = shp2df(shp) if index_field is not None: df.index = df[index_field] proj4 = get_proj4(shp) if proj4 != self.proj4: df['geometry'] = projectdf(df, proj4, self.proj4) # convert projected coordinate units and/or get rid z values if the shapefile has them if convert_coordinates != 1 or df.iloc[0]['geometry'].has_z: df['geometry'] = [ transform(lambda x, y, z=None: (x * convert_coordinates, y * convert_coordinates), g) for g in df.geometry ] # remove model offset from projected coordinates (llcorner = 0,0) if remove_offset: df['geometry'] = [ translate(g, -1 * self.extent_proj[0], -1 * self.extent_proj[1]) for g in df.geometry ] if simplify_patches > 0: df['geometry'] = [ g.simplify(simplify_patches) for g in df.geometry ] if 'Polygon' in df.iloc[0].geometry.type: print("building PatchCollection...") inds = [] patches = [] for i, g in df.geometry.iteritems(): if g.type != 'MultiPolygon': inds.append(i) patches.append(PolygonPatch(g)) else: for part in g.geoms: inds.append(i) patches.append(PolygonPatch(part)) collection = PatchCollection( patches, cmap=cmap, facecolor=fc, linewidth=lw, edgecolor=ec, alpha=alpha, ) elif 'LineString' in df.geometry[0].type: print("building LineCollection...") inds = [] lines = [] for i, g in df.geometry.iteritems(): if 'Multi' not in g.type: x, y = g.xy inds.append(i) lines.append(list(zip(x, y))) # plot each line in a multilinestring else: for l in g: x, y = l.xy inds.append(i) lines.append(list(zip(x, y))) collection = LineCollection(lines, colors=ec, linewidths=lw, alpha=alpha, zorder=zorder, **kwargs) #lc.set_edgecolor(ec) #lc.set_alpha(alpha) #lc.set_lw(lw) # set the color scheme (could set line thickness by same proceedure) if fc in df.columns: colors = np.array([df[fc][ind] for ind in inds]) collection.set_array(colors) else: print("plotting points...") x = np.array([g.x for g in df.geometry]) y = np.array([g.y for g in df.geometry]) collection = self.ax.scatter(x, y, s=s, c=fc, ec=ec, lw=lw, alpha=alpha, zorder=zorder, **kwargs) inds = list(range(len(x))) self.layers[collection_name] = df self.collections[collection_name] = collection self.collection_inds[collection_name] = inds return collection
def __init__(self, NHDFlowline, PlusFlowlineVAA, PlusFlow, mf_grid=None, mf_grid_node_col=None, nrows=None, ncols=None, mfdis=None, xul=None, yul=None, rot=0, model_domain=None, flowlines_proj4=None, mfgrid_proj4=None, domain_proj4=None, mf_units_mult=1): """Class for working with information from NHDPlus v2. See the user's guide for more information: <http://www.horizon-systems.com/NHDPlus/NHDPlusV2_documentation.php#NHDPlusV2 User Guide> Parameters ========== NHDFlowline : str, list of strings or dataframe Shapefile, list of shapefiles, or dataframe defining SFR network; assigned to the Flowline attribute. PlusFlowlineVAA : str, list of strings or dataframe DBF file, list of DBF files with NHDPlus attribute information; assigned to PlusFlowlineVAA attribute. PlusFlow : str, list of strings or dataframe DBF file, list of DBF files with routing information; assigned to PlusFlow attribute. mf_grid : str or dataframe Shapefile or dataframe containing MODFLOW grid mf_grid_node_col : str Column in grid shapefile or dataframe with unique node numbers. In case the grid isn't sorted! (which will result in mixup if rows and columns are assigned later using the node numbers) nrows : int (structured grids) Number of model rows ncols : int (structured grids) Number of model columns mfdis : str MODFLOW discretization file (not yet supported for this class) xul : float, optional x offset of upper left corner of grid. Only needed if using mfdis instead of shapefile yul : float, optional y offset of upper left corner of grid. Only needed if using mfdis instead of shapefile rot : float, optional (default 0) Grid rotation; only needed if using mfdis instead of shapefile. model_domain : str (shapefile) or shapely polygon, optional Polygon defining area in which to create SFR cells. Default is to create SFR at all intersections between the model grid and NHD flowlines. flowlines_proj4 : str, optional Proj4 string for coordinate system of NHDFlowlines. Only needed if flowlines are supplied in a dataframe. domain_proj4 : str, optional Proj4 string for coordinate system of model_domain. Only needed if model_domain is supplied as a polygon. mf_units_mult : float multiplier to convert GIS units to MODFLOW units """ self.Flowline = NHDFlowline self.PlusFlowlineVAA = PlusFlowlineVAA self.PlusFlow = PlusFlow self.fl_cols = ['COMID', 'FCODE', 'FDATE', 'FLOWDIR', 'FTYPE', 'GNIS_ID', 'GNIS_NAME', 'LENGTHKM', 'REACHCODE', 'RESOLUTION', 'WBAREACOMI', 'geometry'] self.pfvaa_cols = ['ArbolateSu', 'Hydroseq', 'DnHydroseq', 'LevelPathI', 'StreamOrde'] self.mf_grid = mf_grid self.model_domain = model_domain self.nrows = nrows self.ncols = ncols self.mfdis = mfdis self.xul = xul self.yul = yul self.rot = rot self.mf_units_mult = mf_units_mult self.GISunits = None self.to_km = None # converts GIS units to km for arbolate sum self.fl_proj4 = flowlines_proj4 self.mf_grid_proj4 = mfgrid_proj4 self.domain_proj4 = domain_proj4 print "Reading input..." # handle dataframes or shapefiles as arguments # get proj4 for any shapefiles that are submitted for attr, input in {'fl': NHDFlowline, 'pf': PlusFlow, 'pfvaa': PlusFlowlineVAA, 'grid': mf_grid}.iteritems(): if isinstance(input, pd.DataFrame): self.__dict__[attr] = input else: self.__dict__[attr] = shp2df(input) if isinstance(model_domain, Polygon): self.domain = model_domain elif isinstance(model_domain, str): self.domain = shape(fiona.open(model_domain).next()['geometry']) self.domain_proj4 = get_proj4(model_domain) else: #print 'setting model domain to extent of grid...' #self.domain = unary_union(self.grid.geometry.tolist()) # sort and pair down the grid if mf_grid_node_col is not None: self.grid.sort(mf_grid_node_col, inplace=True) self.grid.index = self.grid[mf_grid_node_col].values self.grid = self.grid[['geometry']] # get projections if self.mf_grid_proj4 is None and not isinstance(mf_grid, pd.DataFrame): self.mf_grid_proj4 = get_proj4(mf_grid) if self.fl_proj4 is None: if isinstance(NHDFlowline, list): self.fl_proj4 = get_proj4(NHDFlowline[0]) elif not isinstance(NHDFlowline, pd.DataFrame): self.fl_proj4 = get_proj4(NHDFlowline) # set the indices for attr, index in {'fl': 'COMID', 'pfvaa': 'ComID'}.iteritems(): if not self.__dict__[attr].index.name == index: self.__dict__[attr].index = self.__dict__[attr][index] # first check that grid is in projected units if self.mf_grid_proj4.split('proj=')[1].split()[0].strip() == 'longlat': raise ProjectionError(self.mf_grid) # reproject the NHD Flowlines and model domain to model grid if they aren't # (prob a better way to check for same projection) # set GIS units from modflow grid projection (used for arbolate sum computation) # assumes either m or ft! self.GISunits = parse_proj4_units(self.mf_grid_proj4) self.to_km = [0.001 if self.GISunits == 'm' else 0.001/0.3048][0] if different_projections(self.fl_proj4, self.mf_grid_proj4): print "reprojecting NHDFlowlines from\n{}\nto\n{}...".format(self.fl_proj4, self.mf_grid_proj4) self.fl['geometry'] = projectdf(self.fl, self.fl_proj4, self.mf_grid_proj4) if model_domain is not None \ and different_projections(self.domain_proj4, self.mf_grid_proj4): print "reprojecting model domain from\n{}\nto\n{}...".format(self.domain_proj4, self.mf_grid_proj4) self.domain = project(self.domain, self.domain_proj4, self.mf_grid_proj4) def list_updown_comids(self): # setup local variables and cull plusflow table to comids in model comids = self.df.index.tolist() pf = self.pf.ix[(self.pf.FROMCOMID.isin(comids)) | (self.pf.TOCOMID.isin(comids))].copy() # subset PlusFlow entries for comids that are not in flowlines dataset # comids may be missing because they are outside of the model # or if the flowlines dataset was edited (resulting in breaks in the routing) missing_tocomids = ~pf.TOCOMID.isin(comids) & (pf.TOCOMID != 0) missing = pf.ix[missing_tocomids, ['FROMCOMID', 'TOCOMID']].copy() # recursively crawl the PlusFlow table # to try to find a downstream comid in the flowlines dataest missing['nextCOMID'] = [find_next(tc, self.pf, comids) for tc in missing.TOCOMID] pf.loc[missing_tocomids, 'TOCOMID'] = missing.nextCOMID # set any remaining comids not in model to zero # (outlets or inlets from outside model) #pf.loc[~pf.TOCOMID.isin(comids), 'TOCOMID'] = 0 (these should all be handled above) pf.loc[~pf.FROMCOMID.isin(comids), 'FROMCOMID'] = 0 tocomid = pf.TOCOMID.values fromcomid = pf.FROMCOMID.values self.df['dncomids'] = [tocomid[fromcomid == c].tolist() for c in comids] self.df['upcomids'] = [fromcomid[tocomid == c].tolist() for c in comids] def assign_segments(self): # create segment numbers self.df.sort('COMID', inplace=True) self.df['segment'] = np.arange(len(self.df)) + 1 # reduce dncomids to 1 per segment braids = self.df[np.array([len(d) for d in self.df.dncomids]) > 1] for i, r in braids.iterrows(): # select the dncomid that has a matching levelpath matching_levelpaths = np.array(r.dncomids)[self.df.ix[self.df.COMID.isin(r.dncomids), 'LevelPathI'].values == r.LevelPathI] # if none match, select the first dncomid if len(matching_levelpaths) == 0: matching_levelpaths = [r.dncomids[0]] self.df.set_value(i, 'dncomids', matching_levelpaths) # assign upsegs and outsegs based on NHDPlus routing self.df['upsegs'] = [[self.df.segment[c] if c !=0 else 0 for c in comids] for comids in self.df.upcomids] self.df['dnsegs'] = [[self.df.segment[c] if c !=0 else 0 for c in comids] for comids in self.df.dncomids] # make a column of outseg integers self.df['outseg'] = [d[0] for d in self.df.dnsegs] self.df.sort('segment', inplace=True) def to_sfr(self, roughness=0.037, streambed_thickness=1, streambedK=1, icalc=1, iupseg=0, iprior=0, nstrpts=0, flow=0, runoff=0, etsw=0, pptsw=0, roughch=0, roughbk=0, cdepth=0, fdepth=0, awdth=0, bwdth=0): # create a working dataframe self.df = self.fl[self.fl_cols].join(self.pfvaa[self.pfvaa_cols], how='inner') print '\nclipping flowlines to active area...' inside = [g.intersects(self.domain) for g in self.df.geometry] self.df = self.df.ix[inside].copy() self.df.sort('COMID', inplace=True) flowline_geoms = self.df.geometry.tolist() grid_geoms = self.grid.geometry.tolist() print "intersecting flowlines with grid cells..." grid_intersections = GISops.intersect_rtree(grid_geoms, flowline_geoms) print "setting up segments..." self.list_updown_comids() self.assign_segments() fl_segments = self.df.segment.tolist() fl_comids = self.df.COMID.tolist() m1 = make_mat1(flowline_geoms, fl_segments, fl_comids, grid_intersections, grid_geoms) print "computing widths..." m1['length'] = np.array([g.length for g in m1.geometry]) lengths = m1[['segment', 'length']].copy() groups = lengths.groupby('segment') reach_asums = np.concatenate([np.cumsum(grp.length.values[::-1])[::-1] for s, grp in groups]) segment_asums = np.array([self.df.ArbolateSu.values[s-1] for s in m1.segment.values]) reach_asums = -1 * self.to_km * reach_asums + segment_asums # arbolate sums are computed in km width = width_from_arbolate(reach_asums) # widths are returned in m if self.GISunits != 'm': width = width / 0.3048 m1['width'] = width * self.mf_units_mult m1['length'] = m1.length * self.mf_units_mult m1['roughness'] = roughness m1['sbthick'] = streambed_thickness m1['sbK'] = streambedK m1['sbtop'] = 0 if self.nrows is not None: m1['row'] = np.floor(m1.node / self.ncols) + 1 if self.ncols is not None: column = m1.node.values % self.ncols column[column == 0] = self.ncols # last column has remainder of 0 m1['column'] = column m1['layer'] = 1 self.m1 = m1 print "setting up Mat2..." self.m2 = self.df[['segment', 'outseg']] self.m2['icalc'] = icalc self.m2.index = self.m2.segment print 'Done' def write_tables(self, basename='SFR'): """Write tables with SFR reach (Mat1) and segment (Mat2) information out to csv files. Parameters ---------- basename: string e.g. Mat1 is written to <basename>Mat1.csv """ m1_cols = ['node', 'layer', 'segment', 'reach', 'sbtop', 'width', 'length', 'sbthick', 'sbK', 'roughness', 'reachID'] m2_cols = ['segment', 'icalc', 'outseg'] if self.nrows is not None: m1_cols.insert(1, 'row') if self.ncols is not None: m1_cols.insert(2, 'column') print "writing Mat1 to {0}{1}, Mat2 to {0}{2}".format(basename, 'Mat1.csv', 'Mat2.csv') self.m1[m1_cols].to_csv(basename + 'Mat1.csv', index=False) self.m2[m2_cols].to_csv(basename + 'Mat2.csv', index=False) def write_linework_shapefile(self, basename='SFR'): """Write a shapefile containing linework for each SFR reach, with segment, reach, model node number, and NHDPlus COMID attribute information Parameters ---------- basename: string Output will be written to <basename>.shp """ print "writing reach geometries to {}".format(basename+'.shp') df2shp(self.m1[['reachID', 'node', 'segment', 'reach', 'comid', 'geometry']], basename+'.shp', proj4=self.mf_grid_proj4)
def __init__(self, NHDFlowline=None, PlusFlowlineVAA=None, PlusFlow=None, NHDFcode=None, elevslope=None, mf_grid=None, mf_grid_node_col=None, nrows=None, ncols=None, mfdis=None, xul=None, yul=None, rot=0, model_domain=None, flowlines_proj4=None, mfgrid_proj4=None, domain_proj4=None, mf_units='feet'): """Class for working with information from NHDPlus v2. See the user's guide for more information: <http://www.horizon-systems.com/NHDPlus/NHDPlusV2_documentation.php#NHDPlusV2 User Guide> Parameters ========== NHDFlowline : str, list of strings or dataframe Shapefile, list of shapefiles, or dataframe defining SFR network; assigned to the Flowline attribute. PlusFlowlineVAA : str, list of strings or dataframe DBF file, list of DBF files with NHDPlus attribute information; assigned to PlusFlowlineVAA attribute. PlusFlow : str, list of strings or dataframe DBF file, list of DBF files with routing information; assigned to PlusFlow attribute. mf_grid : str or dataframe Shapefile or dataframe containing MODFLOW grid mf_grid_node_col : str Column in grid shapefile or dataframe with unique node numbers. In case the grid isn't sorted! (which will result in mixup if rows and columns are assigned later using the node numbers) nrows : int (structured grids) Number of model rows ncols : int (structured grids) Number of model columns mfdis : str MODFLOW discretization file (not yet supported for this class) xul : float, optional x offset of upper left corner of grid. Only needed if using mfdis instead of shapefile yul : float, optional y offset of upper left corner of grid. Only needed if using mfdis instead of shapefile rot : float, optional (default 0) Grid rotation; only needed if using mfdis instead of shapefile. model_domain : str (shapefile) or shapely polygon, optional Polygon defining area in which to create SFR cells. Default is to create SFR at all intersections between the model grid and NHD flowlines. flowlines_proj4 : str, optional Proj4 string for coordinate system of NHDFlowlines. Only needed if flowlines are supplied in a dataframe. domain_proj4 : str, optional Proj4 string for coordinate system of model_domain. Only needed if model_domain is supplied as a polygon. mf_units : str, 'feet' or 'meters' Length units of MODFLOW model """ self.Flowline = NHDFlowline self.PlusFlowlineVAA = PlusFlowlineVAA self.PlusFlow = PlusFlow self.elevslope = elevslope self.fl_cols = ['COMID', 'FCODE', 'FDATE', 'FLOWDIR', 'FTYPE', 'GNIS_ID', 'GNIS_NAME', 'LENGTHKM', 'REACHCODE', 'RESOLUTION', 'WBAREACOMI', 'geometry'] self.pfvaa_cols = ['ArbolateSu', 'Hydroseq', 'DnHydroseq', 'LevelPathI', 'StreamOrde'] self.mf_grid = mf_grid self.model_domain = model_domain self.nrows = nrows self.ncols = ncols self.mfdis = mfdis self.xul = xul self.yul = yul self.rot = rot # unit conversions (set below after grid projection is verified) self.mf_units = mf_units self.mf_units_mult = 1.0 # go from GIS units to model units self.GISunits = None # self.to_km = None # converts GIS units to km for arbolate sum self.fl_proj4 = flowlines_proj4 self.mf_grid_proj4 = mfgrid_proj4 self.domain_proj4 = domain_proj4 print("Reading input...") # handle dataframes or shapefiles as arguments # get proj4 for any shapefiles that are submitted for attr, input in {'fl': NHDFlowline, 'pf': PlusFlow, 'pfvaa': PlusFlowlineVAA, 'elevs': elevslope, 'grid': mf_grid}.items(): if isinstance(input, pd.DataFrame): self.__dict__[attr] = input else: self.__dict__[attr] = shp2df(input) if isinstance(model_domain, Polygon): self.domain = model_domain elif isinstance(model_domain, str): self.domain = shape(fiona.open(model_domain).next()['geometry']) self.domain_proj4 = get_proj4(model_domain) else: print('setting model domain to extent of grid ' \ 'by performing unary union of grid cell geometries...\n' \ '(may take a few minutes for large grids)') # add tiny buffer to overcome floating point errors in gridcell geometries # (otherwise a multipolygon feature may be returned) geoms = [g.buffer(0.001) for g in self.grid.geometry.tolist()] self.domain = unary_union(geoms) # sort and pair down the grid if mf_grid_node_col is not None: self.grid.sort_values(by=mf_grid_node_col, inplace=True) self.grid.index = self.grid[mf_grid_node_col].values else: print('Warning: Node field for grid shape file not supplied. \ Node numbers will be assigned using index. \ This may result in incorrect location of SFR reaches.') self.grid = self.grid[['geometry']] # get projections if self.mf_grid_proj4 is None and not isinstance(mf_grid, pd.DataFrame): self.mf_grid_proj4 = get_proj4(mf_grid) if self.fl_proj4 is None: if isinstance(NHDFlowline, list): self.fl_proj4 = get_proj4(NHDFlowline[0]) elif not isinstance(NHDFlowline, pd.DataFrame): self.fl_proj4 = get_proj4(NHDFlowline) # set the indices for attr, index in {'fl': 'COMID', 'pfvaa': 'ComID', 'elevs': 'COMID'}.items(): if not self.__dict__[attr].index.name == index: self.__dict__[attr].index = self.__dict__[attr][index] # first check that grid is in projected units if self.mf_grid_proj4.split('proj=')[1].split()[0].strip() == 'longlat': raise ProjectionError(self.mf_grid) # reproject the NHD Flowlines and model domain to model grid if they aren't # (prob a better way to check for same projection) # set GIS units from modflow grid projection (used for arbolate sum computation) # assumes either m or ft! self.GISunits = parse_proj4_units(self.mf_grid_proj4) self.mf_units_mult = 1/0.3048 if self.GISunits == 'm' and self.mf_units == 'feet' \ else 0.3048 if not self.GISunits == 'm' and self.mf_units == 'meters' \ else 1.0 self.to_km = 0.001 if self.GISunits == 'm' else 0.001/0.3048 # convert the elevations from elevslope table self.elevs['Max'] = self.elevs.MAXELEVSMO * self.convert_elevslope_to_model_units[self.mf_units] self.elevs['Min'] = self.elevs.MINELEVSMO * self.convert_elevslope_to_model_units[self.mf_units] if different_projections(self.fl_proj4, self.mf_grid_proj4): print("reprojecting NHDFlowlines from\n{}\nto\n{}...".format(self.fl_proj4, self.mf_grid_proj4)) self.fl['geometry'] = projectdf(self.fl, self.fl_proj4, self.mf_grid_proj4) if model_domain is not None \ and different_projections(self.domain_proj4, self.mf_grid_proj4): print("reprojecting model domain from\n{}\nto\n{}...".format(self.domain_proj4, self.mf_grid_proj4)) self.domain = project(self.domain, self.domain_proj4, self.mf_grid_proj4)