def test_large_integers(): # (e.g. USGS GW site numbers) df = pd.DataFrame({'site_no': [424825088223301, 424825088223302, 424825088223303]}) df2shp(df, 'temp/junk.dbf') df = pd.read_csv('../examples/data/gw_field_sites.csv') df2shp(df, 'temp/junk.dbf')
def test_large_integers(): # (e.g. USGS GW site numbers) df = pd.DataFrame( {'site_no': [424825088223301, 424825088223302, 424825088223303]}) df2shp(df, 'temp/junk.dbf') df = pd.read_csv('../examples/data/gw_field_sites.csv') df2shp(df, 'temp/junk.dbf')
def test_integer_dtypes(): # verify that pandas is recasting numpy ints as python ints when converting to dict # (numpy ints invalid for shapefiles) d = pd.DataFrame(np.ones((3, 3)), dtype=int).astype(object).to_dict(orient='records') for i in range(3): assert isinstance(d[i][0], int) df = pd.DataFrame({'r': np.arange(100), 'c': np.arange(100)}) df2shp(df, 'temp/ints.dbf') df2 = shp2df('temp/ints.dbf') assert True
def write_linework_shapefile(self, basename='SFR'): """Write a shapefile containing linework for each SFR reach, with segment, reach, model node number, and NHDPlus COMID attribute information Parameters ---------- basename: string Output will be written to <basename>.shp """ print "writing reach geometries to {}".format(basename+'.shp') df2shp(self.m1[['reachID', 'node', 'segment', 'reach', 'comid', 'geometry']], basename+'.shp', proj4=self.mf_grid_proj4)
def write_linework_shapefile(self, basename='SFR'): """Write a shapefile containing linework for each SFR reach, with segment, reach, model node number, and NHDPlus COMID attribute information Parameters ---------- basename: string Output will be written to <basename>.shp """ print("writing reach geometries to {}".format(basename+'.shp')) df2shp(self.m1[['reachID', 'node', 'segment', 'reach', 'outseg', 'comid', 'geometry']], basename+'.shp', proj4=self.mf_grid_proj4)
def write_linework_shapefile(self, basename="SFR"): """Write a shapefile containing linework for each SFR reach, with segment, reach, model node number, and NHDPlus COMID attribute information Parameters ---------- basename: string Output will be written to <basename>.shp """ print "writing reach geometries to {}".format(basename + ".shp") df2shp( self.m1[["reachID", "node", "segment", "reach", "comid", "geometry"]], basename + ".shp", proj4=self.mf_grid_proj4, )
def plot_segment_linkages(self, linkshp='segment_linkages.shp', outletshp='outlets.shp'): from shapely.geometry import LineString from GISio import df2shp print '\nMaking shapefiles of SFR segment linkages and outlet locations...' self._get_sfr_cell_geoms() segments = self.m1[['segment', 'geometry']].groupby('segment') geoms = [(df.geometry.tolist()[0].centroid, df.geometry.tolist()[-1].centroid) for s, df in segments] linksdf = self.m2[['segment', 'outseg']].copy() linksdf['geometry'] = [LineString([geoms[s][1], geoms[o-1][0]]) for s, o in enumerate(linksdf.outseg)] linksdf = linksdf[linksdf.outseg != 0] df2shp(linksdf, linkshp, prj=self.prj) df2shp(linksdf, outletshp, prj=self.prj)
def plot_segment_linkages(self, linkshp='segment_linkages.shp', outletshp='outlets.shp'): from shapely.geometry import LineString, Point from GISio import df2shp print('\nMaking shapefiles of SFR segment linkages and outlet locations...') self._get_sfr_cell_geoms() segments = self.m1[['segment', 'geometry']].groupby('segment') geoms = [(df.geometry.tolist()[0].centroid, df.geometry.tolist()[-1].centroid) for s, df in segments] linksdf = self.m2[['segment', 'outseg']].copy() linksdf['geometry'] = [LineString([geoms[s][1], geoms[o-1][0]]) if o != 0 and o != 999999 else Point(geoms[s][1]) for s, o in enumerate(linksdf.outseg)] outletsdf = linksdf[(linksdf.outseg == 0) | (linksdf.outseg == 999999)].copy() linksdf = linksdf[(linksdf.outseg != 0) & (linksdf.outseg != 999999)].copy() df2shp(linksdf, linkshp, prj=self.prj) df2shp(outletsdf, outletshp, prj=self.prj)
def contour2shp(contours, outshape='contours.shp', add_fields={}, **kwargs): """Convert matplotlib contour plot object to shapefile. Parameters ---------- contours : matplotlib.contour.QuadContourSet or list of them (object returned by matplotlib.pyplot.contour) outshape : str path of output shapefile add_fields : dict of lists or 1D arrays Add fields (keys=fieldnames), with attribute data (values=lists) to shapefile. Attribute data must be of the same length, and in the same order as the total number of contour objects x number of levels in each object. **kwargs : key-word arguments to GISio.df2shp Returns ------- df : dataframe of shapefile contents """ from GISio import df2shp if not isinstance(contours, list): contours = [contours] geoms = [] level = [] for ctr in contours: levels = ctr.levels for i, c in enumerate(ctr.collections): paths = c.get_paths() geoms += [LineString(p.vertices) for p in paths] level += list(np.ones(len(paths)) * levels[i]) d = {'geometry': geoms, 'level': level} d.update(add_fields) df = pd.DataFrame(d) df2shp(df, outshape, **kwargs) return df
def test_shp_read_and_write(): if not os.path.isdir('output'): os.makedirs('output') # test without geometry df = pd.DataFrame({'reach': np.arange(10000001, 10000100, dtype=int), 'value': np.arange(1, 100, dtype=float), 'name': ['stuff{}'.format(i) for i in np.arange(1, 100)], 'isTrue': [True, False] * 49 + [True]}) df2shp(df, 'temp/junk.dbf') df = shp2df('temp/junk.dbf', true_values='True', false_values='False') assert [d.name for d in df.dtypes] == ['bool', 'object', 'int64', 'float64'] assert df.isTrue.sum() == 50 # test with geometry df = pd.DataFrame({'reach': np.arange(1, 101, dtype=int), 'value': np.arange(100, dtype=float), 'name': ['stuff{}'.format(i) for i in np.arange(100)], 'geometry': [Point([i, i]) for i in range(100)]}) original_columns = df.columns.tolist() df2shp(df, 'temp/junk.shp') df = shp2df('temp/junk.shp') assert df.geometry[0] == Point([0.0, 0.0]) assert np.array_equal(df.index.values, np.arange(100)) # check ordering of rows assert df.columns.tolist() == original_columns # check column order # test datetime handling and retention of index df.index = pd.date_range('2016-01-01 1:00:00', '2016-01-01 1:01:39', freq='s') df.index.name = 'datetime' df2shp(df, 'temp/junk.shp', index=True) df = shp2df('temp/junk.shp') assert 'datetime' in df.columns assert df.datetime[0] == '2016-01-01 01:00:00'
def test_shp_read_and_write(): if not os.path.isdir('output'): os.makedirs('output') # test without geometry df = pd.DataFrame({ 'reach': np.arange(10000001, 10000100, dtype=int), 'value': np.arange(1, 100, dtype=float), 'name': ['stuff{}'.format(i) for i in np.arange(1, 100)], 'isTrue': [True, False] * 49 + [True] }) cols = ['reach', 'value', 'name', 'isTrue'] df1 = df[cols] #designate a column order ta = time.time() df2shp(df1, 'temp/junk.dbf', retain_order=True) print("wrote shapefile in {:.6f}s\n".format(time.time() - ta)) ta = time.time() df2 = shp2df('temp/junk.dbf', true_values='True', false_values='False') print("read shapefile in {:.6f}s\n".format(time.time() - ta)) #assert list(df2.columns) == cols assert [d.name for d in df2.dtypes] == ['int64', 'float64', 'object', 'bool'] assert df2.isTrue.sum() == 50 # test with geometry df1 = pd.DataFrame({ 'reach': np.arange(1, 101, dtype=int), 'value': np.arange(100, dtype=float), 'name': ['stuff{}'.format(i) for i in np.arange(100)], 'geometry': [Point([i, i]) for i in range(100)] }) cols = ['reach', 'value', 'name', 'geometry' ] # geometry is placed in last column when shp is read in df1 = df1[cols] df2shp(df1, 'temp/junk.shp', retain_order=True) df2 = shp2df('temp/junk.shp') assert df2.geometry[0] == Point([0.0, 0.0]) assert np.array_equal(df2.index.values, np.arange(100)) # check ordering of rows assert df2.columns.tolist() == cols # check column order # test datetime handling and retention of index df.index = pd.date_range('2016-01-01 1:00:00', '2016-01-01 1:01:38', freq='s') df.index.name = 'datetime' df2shp(df, 'temp/junk.dbf', index=True) df = shp2df('temp/junk.dbf') assert 'datetime' in df.columns assert df.datetime[0] == '2016-01-01 01:00:00'
def test_shp_read_and_write(): if not os.path.isdir('output'): os.makedirs('output') # test without geometry df = pd.DataFrame({'reach': np.arange(10000001, 10000100, dtype=int), 'value': np.arange(1, 100, dtype=float), 'name': ['stuff{}'.format(i) for i in np.arange(1, 100)], 'isTrue': [True, False] * 49 + [True]}) cols = ['reach', 'value', 'name', 'isTrue'] df1 = df[cols] #designate a column order ta = time.time() df2shp(df1, 'temp/junk.dbf', retain_order=True) print("wrote shapefile in {:.6f}s\n".format(time.time() - ta)) ta = time.time() df2 = shp2df('temp/junk.dbf', true_values='True', false_values='False') print("read shapefile in {:.6f}s\n".format(time.time() - ta)) #assert list(df2.columns) == cols assert [d.name for d in df2.dtypes] == ['int64', 'float64', 'object', 'bool'] assert df2.isTrue.sum() == 50 # test with geometry df1 = pd.DataFrame({'reach': np.arange(1, 101, dtype=int), 'value': np.arange(100, dtype=float), 'name': ['stuff{}'.format(i) for i in np.arange(100)], 'geometry': [Point([i, i]) for i in range(100)]}) cols = ['reach', 'value', 'name', 'geometry'] # geometry is placed in last column when shp is read in df1 = df1[cols] df2shp(df1, 'temp/junk.shp', retain_order=True) df2 = shp2df('temp/junk.shp') assert df2.geometry[0] == Point([0.0, 0.0]) assert np.array_equal(df2.index.values, np.arange(100)) # check ordering of rows assert df2.columns.tolist() == cols # check column order # test datetime handling and retention of index df.index = pd.date_range('2016-01-01 1:00:00', '2016-01-01 1:01:38', freq='s') df.index.name = 'datetime' df2shp(df, 'temp/junk.dbf', index=True) df = shp2df('temp/junk.dbf') assert 'datetime' in df.columns assert df.datetime[0] == '2016-01-01 01:00:00'
# basic grid parameters name = 'map_test' epsg = 5070 xul, yul = 520487.3, 1194668.3 nrow, ncol = 20, 20 dxy = 5280 * .3048 buf = 1e4 bounds = xul - buf, \ yul - dxy * nrow - buf, \ xul + dxy * ncol + buf, \ yul + buf # make version of preprocessed flowlines filtered to bounding box df = shp2df('/Users/aleaf/Documents/MAP/repos/sfr_output/preprocessed/flowlines_gt20km/flowlines_gt20km_edited.shp', filter=bounds) df2shp(df, 'data/{}_flowlines.shp'.format(name), epsg=epsg) # make a spatial reference object defining the grid sr = SpatialReference(delr=np.ones(ncol, dtype=float) * dxy, delc=np.ones(nrow, dtype=float) * dxy, xul=xul, yul=yul, epsg=epsg) # export sr info to json file model_info = sr.attribute_dict model_info['nrow'] = sr.nrow model_info['ncol'] = sr.ncol model_info['delr'] = sr.delr[0] model_info['delc'] = sr.delc[0] model_info['epsg'] = sr.epsg with open('data/{}_grid.json'.format(name), 'w') as output: json.dump(model_info, output, indent=4, sort_keys=True)