示例#1
0
def make_lakarr2d(grid, lakesdata, include_ids, id_column='hydroid'):
    """
    Make a nrow x ncol array with lake package extent for each lake,
    using the numbers in the 'id' column in the lakes shapefile.
    """
    if isinstance(lakesdata, str):
        # implement automatic reprojection in gis-utils
        # maintaining backwards compatibility
        kwargs = {'dest_crs': grid.crs}
        kwargs = get_input_arguments(kwargs, shp2df)
        lakes = shp2df(lakesdata, **kwargs)
    elif isinstance(lakesdata, pd.DataFrame):
        lakes = lakesdata.copy()
    else:
        raise ValueError(
            'unrecognized input for "lakesdata": {}'.format(lakesdata))
    id_column = id_column.lower()
    lakes.columns = [c.lower() for c in lakes.columns]
    lakes.index = lakes[id_column]
    lakes = lakes.loc[include_ids]
    lakes['lakid'] = np.arange(1, len(lakes) + 1)
    lakes['geometry'] = [Polygon(g.exterior) for g in lakes.geometry]
    arr = rasterize(lakes, grid=grid, id_column='lakid')

    # ensure that order of hydroids is unchanged
    # (used to match features to lake IDs in lake package)
    assert lakes[id_column].tolist() == include_ids
    return arr
示例#2
0
def test_add_observations_from_line_ids(shellmound_sfrdata,
                                        flux_observation_data, outdir):
    obs = shellmound_sfrdata.add_observations(flux_observation_data,
                                              obstype='downstream-flow',
                                              line_id_column='line_id',
                                              obsname_column='site_no')
    assert np.all(obs == shellmound_sfrdata._observations)
    assert set(obs.columns) == {'obsname', 'obstype', 'rno', 'iseg', 'ireach'}
    # get the last reach in each segment
    rd = shellmound_sfrdata.reach_data.sort_values(
        by=['iseg', 'ireach'], axis=0).groupby('iseg').last()
    rno = dict(zip(rd.line_id, rd.rno))
    assert set(obs.rno) == set(
        [rno[lid] for lid in flux_observation_data.line_id])
    rd = shellmound_sfrdata.reach_data
    iseg_ireach = dict(list(zip(rd.rno, zip(rd.iseg, rd.ireach))))
    for i, r in obs.iterrows():
        assert (r.iseg, r.ireach) == iseg_ireach[r.rno]

    out_shapefile = os.path.join(outdir, 'obs.shp')

    # test shapefile export
    shellmound_sfrdata.export_observations(filename=out_shapefile)
    df = shp2df(out_shapefile)
    pd.testing.assert_frame_equal(df.drop('geometry', axis=1),
                                  shellmound_sfrdata.observations,
                                  check_dtype=False)

    # test assigning obs from custom reach number column?
    obs = shellmound_sfrdata.add_observations(flux_observation_data,
                                              obstype='downstream-flow',
                                              rno_column='junk',
                                              obsname_column='site_no')
    assert set(obs.rno) == set(flux_observation_data.junk)
示例#3
0
def cull_data_to_active_area(data,
                             active_area,
                             active_area_id_column=None,
                             active_area_feature_id=None,
                             data_crs=None,
                             metadata=None):
    df = data.copy()
    if metadata is not None:
        md = metadata.copy()
    if isinstance(active_area, Path) or isinstance(active_area, str):
        active_area = [active_area]
    active_area = [str(filepath) for filepath in active_area]
    active_area_df = shp2df(active_area, dest_crs=data_crs)
    if active_area_id_column is not None and active_area_feature_id is not None:
        loc = active_area_df[active_area_id_column] == active_area_feature_id
        assert any(loc), "feature {} not found!".format(active_area_feature_id)
        active_area_polygon = active_area_df.loc[loc, 'geometry']
    else:
        active_area_polygon = MultiPolygon(active_area_df.geometry.tolist())

    if metadata is not None:
        within = np.array([g.within(active_area_polygon) for g in md.geometry])
        md = md.loc[within]
        df_within = df.site_no.isin(md['site_no'])
    else:
        within = np.array([g.within(active_area_polygon) for g in df.geometry])
        df_within = within
    if not np.all(within):
        print(
            'Culling {} sites outside of the model area defined by {}.'.format(
                np.sum(~within), active_area))
    df = df.loc[df_within]
    if metadata is not None:
        return df, md
    return df
示例#4
0
    def from_shapefile(cls,
                       shapefile=None,
                       node_col='node',
                       kcol='k',
                       icol='i',
                       jcol='j',
                       isfr_col='isfr',
                       active_area=None,
                       crs=None,
                       epsg=None,
                       proj_str=None,
                       prjfile=None):

        if crs is None:
            crs = get_shapefile_crs(shapefile)
        crs = get_crs(prjfile=prjfile, epsg=epsg, proj_str=proj_str, crs=crs)

        with fiona.open(shapefile) as src:
            bounds = src.bounds

        df = shp2df(shapefile)
        assert 'geometry' in df.columns, "No feature geometries found in {}.".format(
            shapefile)

        return cls.from_dataframe(df,
                                  node_col=node_col,
                                  kcol=kcol,
                                  icol=icol,
                                  jcol=jcol,
                                  isfr_col=isfr_col,
                                  bounds=bounds,
                                  active_area=active_area,
                                  crs=crs)
示例#5
0
    def from_shapefile(cls,
                       shapefile=None,
                       node_col='node',
                       kcol='k',
                       icol='i',
                       jcol='j',
                       isfr_col='isfr',
                       active_area=None,
                       epsg=None,
                       proj_str=None,
                       prjfile=None):

        if prjfile is None:
            prjfile = shapefile.replace('.shp', '.prj')
            prjfile = prjfile if os.path.exists(prjfile) else None
        with fiona.open(shapefile) as src:
            bounds = src.bounds

        df = shp2df(shapefile)
        assert 'geometry' in df.columns, "No feature geometries found in {}.".format(
            shapefile)

        return cls.from_dataframe(df,
                                  node_col=node_col,
                                  kcol=kcol,
                                  icol=icol,
                                  jcol=jcol,
                                  isfr_col=isfr_col,
                                  bounds=bounds,
                                  active_area=active_area,
                                  epsg=epsg,
                                  proj_str=proj_str,
                                  prjfile=prjfile)
示例#6
0
def make_bdlknc_zones(grid,
                      lakesshp,
                      include_ids,
                      feat_id_column='feat_id',
                      lake_package_id_column='lak_id'):
    """
    Make zones for populating with lakebed leakance values. Same as
    lakarr, but with a buffer around each lake so that horizontal
    connections have non-zero values of bdlknc, and near-shore
    areas can be assigend higher leakance values.
    """
    print('setting up lakebed leakance zones...')
    t0 = time.time()
    if isinstance(lakesshp, str):
        # implement automatic reprojection in gis-utils
        # maintaining backwards compatibility
        kwargs = {'dest_crs': grid.crs}
        kwargs = get_input_arguments(kwargs, shp2df)
        lakes = shp2df(lakesshp, **kwargs)
    elif isinstance(lakesshp, pd.DataFrame):
        lakes = lakesshp.copy()
    else:
        raise ValueError(
            'unrecognized input for "lakesshp": {}'.format(lakesshp))
    # Exterior buffer
    id_column = feat_id_column.lower()
    lakes.columns = [c.lower() for c in lakes.columns]
    exterior_buffer = 30  # m
    lakes.index = lakes[id_column]
    lakes = lakes.loc[include_ids]
    if lake_package_id_column not in lakes.columns:
        lakes[lake_package_id_column] = np.arange(1, len(lakes) + 1)
    # speed up buffer construction by getting exteriors once
    # and probably more importantly,
    # simplifying possibly complex geometries of lakes generated from 2ft lidar
    unbuffered_exteriors = [
        Polygon(g.exterior).simplify(5) for g in lakes.geometry
    ]
    lakes['geometry'] = [
        g.buffer(exterior_buffer) for g in unbuffered_exteriors
    ]
    arr = rasterize(lakes, grid=grid, id_column=lake_package_id_column)

    # Interior buffer for lower leakance, assumed to be 20 m around the lake
    interior_buffer = -20  # m
    lakes['geometry'] = [
        g.buffer(interior_buffer) for g in unbuffered_exteriors
    ]
    arr2 = rasterize(lakes, grid=grid, id_column=lake_package_id_column)
    arr2 = arr2 * 100  # Create new ids for the interior, as multiples of 10

    arr[arr2 > 0] = arr2[arr2 > 0]
    # ensure that order of hydroids is unchanged
    # (used to match features to lake IDs in lake package)
    assert lakes[id_column].tolist() == list(include_ids)
    print('finished in {:.2f}s'.format(time.time() - t0))
    return arr
示例#7
0
def test_export_period_data(shellmound_sfrdata_with_period_data, outdir):
    sfrd = shellmound_sfrdata_with_period_data
    outfile = '{}/test_mf6_sfr_period_data_inflow.shp'.format(outdir)
    sfrd.export_period_data(outfile)
    df = shp2df(outfile)
    nodes = dict(zip(sfrd.reach_data.rno, sfrd.reach_data.node))
    pers = [int(c.strip('inflow')) for c in df.columns if 'inflow' in c]
    assert set(pers) == set(sfrd.period_data.per)
    assert set(df['rno']) == set(sfrd.period_data.rno)
    assert np.allclose(df['0inflow'].append(df['1inflow']).values,
                       sfrd.period_data['inflow'].values)
    assert np.array_equal(df.node.values, np.array([nodes[rno] for rno in df.rno], dtype=int))

    # check export still works if there are multiple items in a reach
    sfrd._period_data = sfrd.period_data.append(sfrd.period_data)
    sfrd.export_period_data(outfile)
    df = shp2df(outfile)
    assert np.allclose(sorted(df['0inflow'].append(df['1inflow']).values),
                       sorted(sfrd.period_data.groupby(['rno', 'per']).sum().inflow.values))
示例#8
0
def test_grid_write_shapefile(modelgrid, tmpdir):
    filename = os.path.join(tmpdir, 'grid.shp')
    modelgrid.write_shapefile(filename)
    with fiona.open(filename) as src:
        assert src.crs['init'] == 'epsg:3070'
        assert np.allclose(src.bounds, modelgrid.bounds)
    df = shp2df(filename)
    i, j = np.indices((modelgrid.nrow, modelgrid.ncol))
    assert np.array_equal(np.arange(len(df), dtype=int), df.node.values)
    assert np.array_equal(i.ravel(), df.i.values)
    assert np.array_equal(j.ravel(), df.j.values)
示例#9
0
def test_points_to_raster(point_data, test_output_path):
    bottom_shapefiles = [test_output_path / 'test_points.shp']
    outfile = test_output_path / 'test_points_raster.tif'
    points_to_raster(bottom_shapefiles,
                     data_col='values',
                     output_resolution=0.1,
                     outfile=outfile)
    source_data = shp2df(str(bottom_shapefiles[0]))
    x = [g.x for g in source_data.geometry]
    y = [g.y for g in source_data.geometry]
    results = get_values_at_points(outfile, x, y)
    assert np.allclose(results, source_data['values'].values)
示例#10
0
    def from_shapefile(cls,
                       shapefile,
                       id_column='id',
                       routing_column='toid',
                       arbolate_sum_column2='asum2',
                       width1_column='width1',
                       width2_column='width2',
                       up_elevation_column='elevup',
                       dn_elevation_column='elevdn',
                       name_column='name',
                       attr_length_units='meters',
                       attr_height_units='meters',
                       filter=None,
                       epsg=None,
                       proj_str=None,
                       prjfile=None):
        """
        Parameters
        ----------

        filter : tuple or str
            Bounding box (tuple) or shapefile of model stream network area.
        """

        if prjfile is None:
            prjfile = shapefile.replace('.shp', '.prj')
            prjfile = prjfile if os.path.exists(prjfile) else None

        shpfile_crs = crs(epsg=epsg, proj_str=proj_str, prjfile=prjfile)

        # ensure that filter bbox is in same crs as flowlines
        if filter is not None and not isinstance(filter, tuple):
            filter = get_bbox(filter, shpfile_crs)

        df = shp2df(shapefile, filter=filter)
        assert 'geometry' in df.columns, "No feature geometries found in {}.".format(
            shapefile)

        return cls.from_dataframe(df,
                                  id_column=id_column,
                                  routing_column=routing_column,
                                  arbolate_sum_column2=arbolate_sum_column2,
                                  width1_column=width1_column,
                                  width2_column=width2_column,
                                  up_elevation_column=up_elevation_column,
                                  dn_elevation_column=dn_elevation_column,
                                  name_column=name_column,
                                  attr_length_units=attr_length_units,
                                  attr_height_units=attr_height_units,
                                  epsg=epsg,
                                  proj_str=proj_str,
                                  prjfile=prjfile)
示例#11
0
def read_nhdplus(shpfiles, bbox_filter=None, index_col='comid'):
    # read shapefile into dataframe and find the index column
    df = shp2df(shpfiles, filter=bbox_filter)
    if len(df) > 0:
        index_col = [c for c in df.columns if c.lower() == index_col]
        if len(index_col) == 0:
            if isinstance(shpfiles, list):
                shpfiles = '\n'.join(shpfiles)
            raise IndexError('No {} column found in: \n{}'.format(
                index_col, shpfiles))
        else:
            df.index = df[index_col[0]]
        return df
示例#12
0
def test_transient_list_export(model):
    m, grid, output_path = model
    outfiles = export(m, grid, 'wel', output_path=output_path)
    variables = ['wel0_stress_period_data']
    if m.version != 'mf6':
        variables = ['wel_stress_period_data']
    check_files(outfiles, variables=variables)
    df = mftransientlist_to_dataframe(m.wel.stress_period_data, squeeze=True)
    df.index = range(len(df))
    if 'cellid' in df.columns:
        df['cellid'] = df['cellid'].astype(str)
    df2 = shp2df(outfiles[0]).drop('geometry', axis=1)
    assert np.allclose(df.drop('cellid', axis=1), df2.drop('cellid', axis=1))
示例#13
0
def test_get_upstream_area():

    catchments = ['/Users/aleaf/Documents/NHDPlus/NHDPlusGL/NHDPlus04/NHDPlusCatchment/Catchment.shp',
                  '/Users/aleaf/Documents/NHDPlus/NHDPlusMS/NHDPlus07/NHDPlusCatchment/Catchment.shp']
    plusflow = ['/Users/aleaf/Documents/NHDPlus/NHDPlusGL/NHDPlus04/NHDPlusAttributes/PlusFlow.dbf',
                '/Users/aleaf/Documents/NHDPlus/NHDPlusMS/NHDPlus07/NHDPlusAttributes/PlusFlow.dbf']
    nodasites = '/Users/aleaf/Documents/USFS/Nicolet/targets/north/flux_field_no_da.shp'
    flowlines = ['/Users/aleaf/Documents/NHDPlus/NHDPlusGL/NHDPlus04/NHDSnapshot/Hydrography/NHDFlowline.shp',
                 '/Users/aleaf/Documents/NHDPlus/NHDPlusMS/NHDPlus07/NHDSnapshot/Hydrography/NHDFlowline.shp']
    nearfield = '/Users/aleaf/Documents/USFS/Nicolet/shps/Nicolet_north_NF.shp'

    nf = shape(fiona.open(nearfield).next()['geometry'])
    nf = project(nf, '+init=epsg:26716', '+init=epsg:4269')
    bbox = nf.bounds

    noda = shp2df(nodasites)

    get_upstream_area(noda.geometry.tolist(), plusflow, flowlines, catchments, nf)
示例#14
0
def read_polygon_feature(feature, dest_crs, feature_crs=None):
    """Read a geometric feature from a shapefile, shapely geometry object,
    or collection of shapely geometry objects. Reproject to dest_crs
    if the feature is in a different CRS.

    Parameters
    ----------
    feature : shapely Polygon, list of Polygons, or shapefile path
            Polygons must be in same CRS as linework; shapefile
            features will be reprojected if their crs is different.
    dest_crs : instance of sfrmaker.crs
        Output CRS for the feature.

    Returns
    -------
    feature : shapely geometry object
    """
    if isinstance(feature, str):
        with fiona.open(feature) as src:
            feature_crs = crs(src.crs)
        geoms = shp2df(feature)['geometry'].values
        feature = unary_union(geoms)
    elif isinstance(feature, collections.Iterable):
        if isinstance(feature[0], dict):
            try:
                feature = [shape(f) for f in feature]
            except Exception as ex:
                print(ex)
                print(
                    "Supplied dictionary doesn't appear to be valid GeoJSON.")
        feature = unary_union(feature)
    elif isinstance(feature, dict):
        try:
            feature = shape(feature)
        except Exception as ex:
            print(ex)
            print("Supplied dictionary doesn't appear to be valid GeoJSON.")
    elif isinstance(feature, Polygon):
        pass
    else:
        raise TypeError("Unrecognized feature input.")
    if feature_crs is not None and feature_crs != dest_crs:
        feature = project(feature, feature_crs.proj_str, dest_crs.proj_str)
    return feature.buffer(0)
def test_edit_flowlines(flowlines, preprocessed_flowlines, test_data_path):
    if flowlines is None:
        flowlines = preprocessed_flowlines
    flowline_edits_file = os.path.join(test_data_path, 'flowline_edits.yml')
    edited_flowlines = edit_flowlines(flowlines,
                                      flowline_edits_file,
                                      logger=None)
    with open(flowline_edits_file) as src:
        cfg = yaml.load(src, Loader=yaml.Loader)
    # verify that flowlines were dropped
    assert not any(
        set(cfg['drop_flowlines']).intersection(edited_flowlines.COMID))
    # verify routing changes
    for comid, tocomid in cfg['reroute_flowlines'].items():
        assert edited_flowlines.loc[comid, 'tocomid'] == tocomid
    add_flowlines = shp2df(os.path.join(test_data_path, 'yazoo.shp'))
    assert not any(set(add_flowlines.comid).difference(edited_flowlines.index))
    if isinstance(flowlines, str):
        assert os.path.exists(flowlines[:-4] + '.prj')
示例#16
0
def parent_model_sfr_flow_results():
    parent_model_sfrlines = 'sfrmaker/test/data/shellmound/merasnwt_sfrlines.shp'
    rd = shp2df(parent_model_sfrlines)
    rd['Qout'] = 0.
    rd = rd.rename(columns={'iseg': 'segment', 'ireach': 'reach'}) \
        [['rno', 'segment', 'reach', 'Qout']].copy()
    rd.loc[rd.rno == 13933, 'Qout'] = 353146.667
    rd.loc[rd.rno == 11780, 'Qout'] = 3531.46667
    rd.loc[rd.rno == 11949, 'Qout'] = 3.53146667
    rd.loc[rd.rno == 11483, 'Qout'] = 353.146667
    rd.loc[rd.rno == 13070, 'Qout'] = 7062.93334
    rd.loc[rd.rno == 15682, 'Qout'] = 7.06293334
    rd.loc[rd.rno == 15684, 'Qout'] = 35314.6667
    rd['kstpkper'] = [(0, 0)] * len(rd)
    rd2 = rd.copy()
    rd2['kstpkper'] = [(1, 1)] * len(rd)
    rd2['Qout'] *= 2
    rd = rd.append(rd2).copy()
    return rd
示例#17
0
def get_nhdplus_v2_routing(PlusFlow_file,
                           from_col='FROMCOMID',
                           to_col='TOCOMID'):
    """Read PlusFlow file and return the routing
    information as a dictionary of to:from COMID numbers.
    """
    fname, ext = os.path.splitext(PlusFlow_file)
    if ext in ['.shp', '.dbf']:
        df = shp2df(PlusFlow_file)
    elif ext == '.csv':
        df = pd.read_csv(PlusFlow_file)
    else:
        raise Exception("Unrecognized file-type for PlusFlow table: {}".format(
            PlusFlow_file))
    flowline_routing = dict(zip(df[from_col], df[to_col]))
    comids = set(df[from_col])
    flowline_routing = {
        k: v if v in comids else 0
        for k, v in flowline_routing.items()
    }
    return flowline_routing
示例#18
0
def assign_geographic_obsgroups(metadata, geographic_groups,
                                geographic_groups_col, metadata_crs):

    md = metadata.copy()
    if geographic_groups is not None:
        if isinstance(geographic_groups, dict):
            pass
        else:
            geo_group_dict = {}
            if isinstance(geographic_groups, str) or isinstance(
                    geographic_groups, Path):
                geographic_groups = [geographic_groups]
            for item in reversed(geographic_groups):
                group_info = shp2df(str(item), dest_crs=metadata_crs)
                groups = dict(
                    zip(group_info[geographic_groups_col],
                        group_info['geometry']))
                geo_group_dict.update(groups)
        for group_name, polygon in geo_group_dict.items():
            within = [g.within(polygon) for g in md.geometry]
            md.loc[within, 'geo_group'] = group_name
    return md
示例#19
0
def test_export_sfr(model):
    m, grid, output_path = model
    # mf2005 style SFR export not implemented yet
    # TODO: implement mf2005 sfr package export
    if m.version != 'mf6':
        return
    outfiles = export(m, grid, 'sfr', output_path=output_path)
    # TODO: finish this test
    variables = ['shellmound.sfr']
    if m.version != 'mf6':
        variables = ['wel_stress_period_data']
        df = pd.DataFrame(m.sfr.reach_data.array)
        compare_cols = ['strtop']
    else:
        df = pd.DataFrame(m.sfr.packagedata.array)
        compare_cols = ['rlen', 'rwid', 'rgrd', 'rtp', 'rbth', 'rhk']
    check_files(outfiles, variables=variables)
    df.index = range(len(df))
    if 'cellid' in df.columns:
        df['cellid'] = df['cellid'].astype(str)
    df2 = shp2df(outfiles[0]).drop('geometry', axis=1)
    df2['cellid'] = list(zip(df2['k'], df2['i'], df2['j']))
    df2['cellid'] = df2['cellid'].astype(str)
    assert np.allclose(df[compare_cols], df2[compare_cols])
示例#20
0
    def from_shapefile(cls,
                       shapefile,
                       id_column='id',
                       routing_column='toid',
                       arbolate_sum_column2='asum2',
                       width1_column='width1',
                       width2_column='width2',
                       up_elevation_column='elevup',
                       dn_elevation_column='elevdn',
                       name_column='name',
                       attr_length_units='meters',
                       attr_height_units='meters',
                       filter=None,
                       crs=None,
                       epsg=None,
                       proj_str=None,
                       prjfile=None):
        """Create a Lines instance from a shapefile.

        Parameters
        ----------
        shapefile : str
            Input shapefile
        id_column : str, optional
            Attribute field with line identifiers, 
            by default 'id'
        routing_column : str, optional
            Attribute field with downstream routing connections,
            by default 'toid'
        arbolate_sum_column2 : str, optional
            Attribute field with arbolate sums at downstream ends of lines, 
            by default 'asum2'
        width1_column : str, optional
            Attribute field with channel widths at upstream ends of lines,
            by default 'width1'
        width2_column : str, optional
            Attribute field with channel widths at downstream ends of lines, 
            by default 'width2'
        up_elevation_column : str, optional
            Attribute field with elevations at upstream ends of lines, 
            by default 'elevup'
        dn_elevation_column : str, optional
            Attribute field with elevations at downstream ends of lines,
            by default 'elevdn'
        name_column : str, optional
            Attribute field with feature names, 
            by default 'name'
        attr_length_units : str, optional
            Length units for feature attributes (e.g. width, arbolate sum, etc.)
            By default, meters.
        attr_height_units : str, optional
            Length units for elevation attributes
            By default, 'meters'.
        filter : tuple, optional
            (xmin, ymin, xmax, ymax) bounding box to filter which records 
            are read from the shapefile. By default None.
        epsg: int, optional
            EPSG code identifying Coordinate Reference System (CRS)
            for features in the input shapefile.
        proj_str: str, optional
            proj_str string identifying CRS for features in the input shapefile.
        prjfile: str, optional
            File path to projection (.prj) file identifying CRS
            for features in the input shapefile. By default,
            the projection file included with the input shapefile
            will be used.

        Returns
        -------
        lines : :class:`Lines` instance
        """

        if prjfile is None:
            prjfile = shapefile.replace('.shp', '.prj')
            prjfile = prjfile if os.path.exists(prjfile) else None

        shpfile_crs = get_crs(prjfile=prjfile,
                              epsg=epsg,
                              proj_str=proj_str,
                              crs=crs)

        # ensure that filter bbox is in same crs as flowlines
        if filter is not None and not isinstance(filter, tuple):
            filter = get_bbox(filter, shpfile_crs)

        df = shp2df(shapefile, filter=filter)
        assert 'geometry' in df.columns, "No feature geometries found in {}.".format(
            shapefile)

        return cls.from_dataframe(df,
                                  id_column=id_column,
                                  routing_column=routing_column,
                                  arbolate_sum_column2=arbolate_sum_column2,
                                  width1_column=width1_column,
                                  width2_column=width2_column,
                                  up_elevation_column=up_elevation_column,
                                  dn_elevation_column=dn_elevation_column,
                                  name_column=name_column,
                                  attr_length_units=attr_length_units,
                                  attr_height_units=attr_height_units,
                                  epsg=epsg,
                                  proj_str=proj_str,
                                  prjfile=prjfile)
示例#21
0
def add_observations(sfrdata,
                     data,
                     flowline_routing=None,
                     obstype=None,
                     sfrlines_shapefile=None,
                     rno_column_in_sfrlines='rno',
                     x_location_column=None,
                     y_location_column=None,
                     line_id_column=None,
                     rno_column=None,
                     obstype_column=None,
                     obsname_column='site_no'):
    """Add SFR observations to the observations DataFrame
    attribute of an sfrdata instance. Observations can
    by located on the SFR network by specifying reach number
    directly (rno_column_in_data), by x, y location (x_column_in_data and y_column in data),
    or by specifying the source hydrography lines that they are located on
    (line_id_column_in_data).

    Parameters
    ----------
    sfrdata : sfrmaker.SFRData instance
        SFRData instance with reach_data table attribute. To add observations from x, y coordinates,
        the reach_data table must have a geometry column with LineStrings representing each reach, or
        an sfrlines_shapefile is required. Reach numbers are assumed to be in an 'rno' column.
    data : DataFrame
        Table with information on the observation sites to be located. Must have
        either reach numbers (rno_column_in_data), line_ids (line_id_column_in_data),
        or x and y locations (x_column_in_data and y_column_in_data).
    obstype : str (optional)
        Type of observation to record, for MODFLOW-6 (default 'downstream-flow'; see
        MODFLOW-6 IO documentation for more details). Alternatively, observation
        types can be specified by row in data, using the obstype_column_in_data argument.
    x_location_column : str (optional)
        Column in data with site x-coordinates (in same CRS as SFR network).
    y_location_column : str (optional)
        Column in data with site y-coordinates (in same CRS as SFR network).
    sfrlines_shapefile : str (optional)
        Shapefile version of SFRdata.reach_data. Only needed if SFRdata.reach_data doesn't
        have LineString geometries for the reaches.
    rno_column_in_sfrlines : str (optional)
        Column in sfrlines with reach numbers for matching lines with reaches in sfrdata, or
        reach numbers assigned to observation sites. (default 'rno')
    line_id_column : str
        Column in data matching observation sites to line_ids in the source hydrography data.
    rno_column : str
        Column in data matching observation sites to reach numbers in the SFR network.
    flowline_routing : dict
        Optional dictionary of routing for source hydrography. Only needed
        if locating by line_id, and SFR network is a subset of the full source
        hydrography (i.e. some lines were dropped in the creation of the SFR packge,
        or if the sites are inflow points corresponding to lines outside of the model perimeter).
        In this case, observation points referenced to line_ids that are missing from the SFR
        network are placed at the first reach corresponding to the next downstream line_id
        that is represented in the SFR network.
    obstype_column : str (optional)
        Column in data with MODFLOW-6 observation types. For adding observations of different types.
        If obstype and obstype_column_in_data are none, the default of 'downstream-flow' will be used.
    obsname_column : str
        Column in data with unique identifier (e.g. site number or name) for observation sites.


    Notes
    -----
    Sites located by line_id (source hydrography) will be assigned to the last reach in the
    segment corresponding to the line_id. Locating by x, y or reach number is more accurate.

    """
    sfrd = sfrdata
    reach_data = sfrdata.reach_data.copy()

    # allow input via a list of tables or single table
    input_data = data
    if not isinstance(input_data, list):
        input_data = [input_data]
    dfs = []
    for item in input_data:
        if isinstance(item, str):
            dfs.append(pd.read_csv(item))
        elif isinstance(item, pd.DataFrame):
            dfs.append(item.copy())
        else:
            raise Exception(
                'Unrecognized input type for data:\n{}'.format(item))
    data = pd.concat(dfs).reset_index(drop=True)

    # read reach geometries from a shapefile
    if sfrlines_shapefile is not None:
        sfrlines = shp2df(sfrlines_shapefile)
        geoms = dict(
            zip(sfrlines[rno_column_in_sfrlines], sfrlines['geometry']))
        reach_data['geometry'] = [geoms[rno] for rno in reach_data['rno']]

    # if no reach number is provided
    msg = "Observation sites need reach number, (x,y) coordinates, or source hydrography IDs"
    if rno_column not in data.columns:

        rno_column = 'rno'

        # get reach numbers by x, y location of sites
        if x_location_column in data.columns and y_location_column in data.columns:
            locs = locate_sites(
                data,
                reach_data,
                x_column_in_data=x_location_column,
                y_column_in_data=y_location_column,
                reach_id_col='rno',  # reach number column in reach_data
                site_number_col=obsname_column)
            data[rno_column] = locs['rno']

        # get reach number from site locations in source hydrography (line_ids)
        elif line_id_column in data.columns:
            # map NHDPlus COMIDs to reach numbers
            if flowline_routing is None:
                line_id = dict(zip(reach_data.iseg, reach_data.line_id))
                sfr_routing = sfrdata.segment_routing.copy()

                # routing for source hydrography
                flowline_routing = {
                    line_id.get(k, 0): line_id.get(v, 0)
                    for k, v in sfr_routing.items()
                }
            # get the last reach in each segment
            r1 = reach_data.sort_values(by=['iseg', 'ireach'],
                                        axis=0).groupby('iseg').last()
            line_id_rno_mapping = dict(zip(r1['line_id'], r1['rno']))
            line_ids = get_next_id_in_subset(r1.line_id, flowline_routing,
                                             data[line_id_column])
            data[rno_column] = [line_id_rno_mapping[lid] for lid in line_ids]

        else:
            raise ValueError(msg)

    # create observations dataframe
    obsdata = pd.DataFrame(columns=sfrd.observations.columns)

    # remove duplicate locations
    data = data.groupby(rno_column).first().reset_index()
    obsdata['rno'] = data[rno_column]

    # segment and reach info
    iseg_ireach = dict(
        list(zip(reach_data.rno, zip(reach_data.iseg, reach_data.ireach))))
    obsdata['iseg'] = [iseg_ireach[rno][0] for rno in obsdata.rno]
    obsdata['ireach'] = [iseg_ireach[rno][1] for rno in obsdata.rno]
    for col in ['rno', 'iseg', 'ireach']:
        obsdata[col] = obsdata[col].astype(int)

    if obstype is not None:
        obsdata['obstype'] = obstype
    elif obstype_column in data.columns:
        obsdata['obstype'] = data[obstype_column]
    else:
        obsdata['obstype'] = 'downstream-flow'
    obsdata['obsname'] = data[obsname_column].astype(str)

    return obsdata
示例#22
0
def export_sfr_results(mf2005_sfr_outputfile=None,
                       mf2005_SfrFile_instance=None,
                       mf6_sfr_stage_file=None,
                       mf6_sfr_budget_file=None,
                       model=None,
                       grid=None,
                       kstpkper=(0, 0),
                       sfrlinesfile=None,
                       pointsize=0.5,
                       output_length_units='feet',
                       output_time_units='seconds',
                       gis=True,
                       pdfs=True,
                       output_path='postproc',
                       suffix='',
                       verbose=False):

    pdfs_dir, rasters_dir, shps_dir = make_output_folders(output_path)
    m = model
    if not isinstance(kstpkper, list):
        kstpkper = [kstpkper]
    print('Exporting SFR results...')
    for f in [mf2005_sfr_outputfile, mf6_sfr_stage_file, mf6_sfr_budget_file]:
        if f is not None:
            print('file: {}'.format(f))

    df = read_sfr_output(mf2005_sfr_outputfile=mf2005_sfr_outputfile,
                         mf2005_SfrFile_instance=mf2005_SfrFile_instance,
                         mf6_sfr_stage_file=mf6_sfr_stage_file,
                         mf6_sfr_budget_file=mf6_sfr_budget_file,
                         model=model)
    lmult = convert_length_units(get_length_units(m), output_length_units)
    tmult = convert_time_units(get_time_units(m), output_time_units)
    unit_text = get_unit_text(output_length_units, output_time_units, 3)

    if 'GWF' in df.columns:
        df['Qaquifer'] = -df.GWF  # for consistency with MF2005
    if 'Qmean' not in df.columns:
        df['Qmean'] = df[['Qin', 'Qout']].abs().mean(axis=1)

    # write columns in the output units
    df['Qmean_{}'.format(unit_text)] = df.Qmean * lmult**3 / tmult
    df['Qaq_{}'.format(unit_text)] = df.Qaquifer * lmult**3 / tmult

    # add model top comparison if available
    if m.dis is not None and 'i' in df.columns and 'j' in df.columns:
        df['model_top'] = m.dis.top.array[df.i.values, df.j.values]
    if 'stage' in df.columns:
        df['above'] = df.stage - df.model_top
    groups = df.groupby('kstpkper')

    outfiles = []
    if gis:
        prj_file = None
        if sfrlinesfile is not None:
            sfrlines = shp2df(sfrlinesfile)
            prj_file = sfrlines[:-4] + '.prj'
            sfrlines.sort_values(by=['iseg', 'ireach'], inplace=True)
            geoms = sfrlines.geometry
        else:
            #assert sr is not None, \
            #    'need SpatialReference instance to locate model grid cells'
            #dfp = groups.get_group((0, 0)).copy()
            geoms = None
            #vertices = sr.get_vertices(dfp.i, dfp.j)
            #geoms = [Polygon(vrt) for vrt in vertices]

        for kstp, kper in kstpkper:
            print('stress period {}, timestep {}'.format(kper, kstp))
            dfp = groups.get_group((kstp, kper)).copy()
            if geoms is not None:
                dfp['geometry'] = geoms
            #dfp = gp.GeoDataFrame(dfp)
            #dfp.crs = sr.proj4_str
            # to use cell polygons instead of lines
            # verts = m.sr.get_vertices(df.i.values, df.j.values)
            #df['geometry'] = [Polygon(v) for v in verts]
            dfp['stp'] = [t[0] for t in dfp['kstpkper']]
            dfp['per'] = [t[1] for t in dfp['kstpkper']]
            dfp.drop('kstpkper', axis=1,
                     inplace=True)  # geopandas doesn't like tuples
            outfile = '{}/sfrout_per{}_stp{}{}.shp'.format(
                shps_dir, kper, kstp, suffix)

            export_shapefile(outfile, dfp, modelgrid=grid, prj=prj_file)
            outfiles.append(outfile)
            #dfp.to_file(outfile)
            #print('wrote {}'.format(outfile))

    if pdfs:
        # need to add a scale that addresses units
        for kstp, kper in kstpkper:
            print('stress period {}, timestep {}'.format(kper, kstp))
            df = groups.get_group((kstp, kper)).copy()
            bf_outfile = '{}/baseflow_per{}_stp{}{}.pdf'.format(
                pdfs_dir, kper, kstp, suffix)
            sfr_baseflow_pdf(bf_outfile,
                             df,
                             pointsize=pointsize,
                             verbose=verbose)

            qaq_outfile = '{}/qaquifer_per{}_stp{}.pdf'.format(
                pdfs_dir, kper, kstp, suffix)
            sfr_qaquifer_pdf(qaq_outfile,
                             df,
                             pointsize=pointsize,
                             verbose=verbose)
            outfiles += [bf_outfile, qaq_outfile]
    return outfiles
示例#23
0
def locate_sites(site_data,
                 reach_data,
                 active_area_shapefile=None,
                 x_column_in_data=None,
                 y_column_in_data=None,
                 reach_id_col='rno',
                 site_number_col='site_no',
                 keep_columns=None,
                 perimeter_buffer=1000,
                 distance_threshold=1000):
    """Get SFR reach locations corresponding to x, y points
    (e.g. measurement site locations).

    Parameters
    ----------
    site_data: ESRI shapefile
        DataFrame or shapefile with point locations and attribute data for
        stream flow observation sites. Point locations can be specified
        in a DataFrame by either x_column_in_data and y_column_in_data, or
        a 'geometry' column of shapely points. If shapefiles are provided
        for both site_data and reach_data, they can be in any CRS, but both must have .prj files.
    reach_data: ESRI shapefile
        SFRData.reach_data DataFrame, or shapefile equivalent
        with line-arcs representing all segments and/or reaches.
        If shapefiles are provided for both site_data and reach_data,
        they can be in any CRS, but both must have .prj files.
    active_area_shapefile: ESRI shapefile or shapely polygon (optional)
        Shapefile or polygon, in same CRS as sfr_lines_shapefile,
        defining areal extent (perimeter) of SFR network.
    x_column_in_data : str (optional)
        Column in data with site x-coordinates (in same CRS as SFR network).
    y_column_in_data : str (optional)
        Column in data with site y-coordinates (in same CRS as SFR network).
    reach_id_col: str
        Column with unique number for each stream line-arc. default "rno"
    site_number_col : str
        Name of column in sites_shapefile with number identifying each
        site to be located. default "site_no"
    keep_columns: list of strings
        List of columns in sites_shapefile to retain when
        writing output_csv_file and output_shape_file.
    perimeter_buffer : scalar
        Exclude flows within this distance of perimeter defined
        by active_area_shapefile. For example, a value of 1000 would
        mean that sites must be at least 1 km inside of the active area perimeter to
        be included.
    distance_threshold : scalar
        Only consider sites within this distance of a stream line-arc.


    Returns
    -------
    locs : DataFrame

    """
    sfrproj4 = None
    locsproj4 = None
    # read in sfr lines
    if not isinstance(reach_data, pd.DataFrame):
        sfrlines = shp2df(reach_data)
        sfrproj4 = get_proj_str(reach_data)
    elif isinstance(reach_data, pd.DataFrame):
        sfrlines = reach_data.copy()
    else:
        raise TypeError(
            'Datatype for reach_data not understood: {}'.format(reach_data))
    sfrlines.index = sfrlines[reach_id_col]

    # sites to locate
    if not isinstance(site_data, pd.DataFrame):
        locs = shp2df(site_data)
        if isinstance(site_data, list):
            locsproj4 = get_proj_str(site_data[0])
        else:
            locsproj4 = get_proj_str(site_data)
        locs['site_no'] = locs[site_number_col]  # str_ids(locs.site_no)
    elif isinstance(site_data, pd.DataFrame):
        locs = site_data.copy()
    else:
        raise TypeError(
            'Datatype for site_data not understood: {}'.format(site_data))

    # reproject if crs are available
    if locsproj4 is not None and sfrproj4 is not None:
        locs['geometry'] = project(locs.geometry.values, locsproj4, sfrproj4)

    # get the x and y coordinates
    if x_column_in_data is not None and y_column_in_data is not None:
        x = locs[x_column_in_data]
        y = locs[y_column_in_data]
    else:
        x = [p.x for p in locs.geometry]
        y = [p.y for p in locs.geometry]

    ids, distances = get_closest_reach(x, y, sfrlines, rno_column=reach_id_col)
    reach_id_col = reach_id_col.lower()
    locs[reach_id_col] = ids
    locs['distance'] = distances
    if 'iseg' in sfrlines.columns:
        locs['segment'] = sfrlines.loc[ids, 'iseg'].values
        locs['reach'] = sfrlines.loc[ids, 'ireach'].values
    locs = locs.loc[locs.distance <= distance_threshold]

    # cull observations at or outside of model perimeter
    # to only those along model perimeter
    if active_area_shapefile is not None:
        active_area = active_area_shapefile
        if not isinstance(active_area_shapefile, Polygon):
            active_area = shp2df(active_area_shapefile).geometry[0]
        perimeter = active_area.exterior.buffer(perimeter_buffer)
        perimeter_inside_buffer = Polygon(perimeter.interiors[0])

        keep = []
        for rn in locs[reach_id_col]:
            geom = sfrlines.loc[rn, 'geometry']
            keep.append(geom.within(perimeter_inside_buffer))
    else:
        keep = slice(None)

    if keep_columns is None:
        keep_columns = locs.columns.tolist()
    for c in [reach_id_col, 'segment', 'reach', 'geometry']:
        if c not in keep_columns and c in locs.columns:
            keep_columns.append(c)

    locs = locs.loc[keep, keep_columns]
    return locs
示例#24
0
def read_wdnr_monthly_water_use(wu_file, wu_points, model,
                                active_area=None,
                                drop_ids=None,
                                minimum_layer_thickness=2
                                ):
    """Read water use data from a master file generated from
    WDNR_wu_data.ipynb. Cull data to area of model. Reshape
    to one month-year-site value per row.

    Parameters
    ----------
    wu_file : csv file
        Water use data ouput from the WDNR_wu_data.ipynb.
    wu_points : point shapefile
        Water use locations, generated in the WDNR_wu_data.ipynb
        Must be in same CRS as sr.
    model : flopy.modflow.Modflow instance
        Must have a valid attached .sr attribute defining the model grid.
        Only wells within the bounds of the sr will be retained.
        Sr is also used for row/column lookup.
        Must be in same CRS as wu_points.
    active_area : str (shapefile path) or shapely.geometry.Polygon
        Polygon denoting active area of the model. If specified,
        wells are culled to this area instead of the model bounding box.
        (default None)
    minimum_layer_thickness : scalar
        Minimum layer thickness to have pumping.

    Returns
    -------
    monthly_data : DataFrame

    """
    col_fmt = '{}_wdrl_gpm_amt'
    data_renames = {'site_seq_no': 'site_no',
                    'wdrl_year': 'year'}
    df = pd.read_csv(wu_file)
    drop_cols = [c for c in df.columns if 'unnamed' in c.lower()]
    drop_cols += ['objectid']
    df.drop(drop_cols, axis=1, inplace=True, errors='ignore')
    df.rename(columns=data_renames, inplace=True)
    if drop_ids is not None:
        df = df.loc[~df.site_no.isin(drop_ids)].copy()

    # implement automatic reprojection in gis-utils
    # maintaining backwards compatibility
    kwargs = {'dest_crs': model.modelgrid.crs}
    kwargs = get_input_arguments(kwargs, shp2df)
    locs = shp2df(wu_points, **kwargs)
    site_seq_col = [c for c in locs if 'site_se' in c.lower()]
    locs_renames = {c: 'site_no' for c in site_seq_col}
    locs.rename(columns=locs_renames, inplace=True)
    if drop_ids is not None:
        locs = locs.loc[~locs.site_no.isin(drop_ids)].copy()

    if active_area is None:
        # cull the data to the model bounds
        features = model.modelgrid.bbox
        txt = "No wells are inside the model bounds of {}"\
            .format(model.modelgrid.extent)
    elif isinstance(active_area, str):
        # implement automatic reprojection in gis-utils
        # maintaining backwards compatibility
        kwargs = {'dest_crs': model.modelgrid.crs}
        kwargs = get_input_arguments(kwargs, shp2df)
        features = shp2df(active_area, **kwargs).geometry.tolist()
        if len(features) > 1:
            features = MultiPolygon(features)
        else:
            features = Polygon(features[0])
        txt = "No wells are inside the area of {}"\
            .format(active_area)
    elif isinstance(active_area, Polygon):
        features = active_area

    within = [g.within(features) for g in locs.geometry]
    assert len(within) > 0, txt
    locs = locs.loc[within].copy()
    if len(locs) == 0:
        print('No wells within model area:\n{}\n{}'.format(wu_file, wu_points))
        return None, None
    df = df.loc[df.site_no.isin(locs.site_no)]
    df.sort_values(by=['site_no', 'year'], inplace=True)

    # create seperate dataframe with well info
    well_info = df[['site_no',
                    'well_radius_mm',
                    'borehole_radius_mm',
                    'well_depth_m',
                    'elev_open_int_top_m',
                    'elev_open_int_bot_m',
                    'screen_length_m',
                    'screen_midpoint_elev_m']].copy()
    # groupby site number to cull duplicate information
    well_info = well_info.groupby('site_no').first()
    well_info['site_no'] = well_info.index

    # add top elevation, screen midpoint elev, row, column and layer
    points = dict(zip(locs['site_no'], locs.geometry))
    well_info['x'] = [points[sn].x for sn in well_info.site_no]
    well_info['y'] = [points[sn].y for sn in well_info.site_no]

    # have to do a loop because modelgrid.rasterize currently only works with scalars
    print('intersecting wells with model grid...')
    t0 = time.time()
    #i, j = [], []
    #for x, y in zip(well_info.x.values, well_info.y.values):
    #    iy, jx = model.modelgrid.rasterize(x, y)
    #    i.append(iy)
    #    j.append(jx)
    i, j = get_ij(model.modelgrid, well_info.x.values, well_info.y.values)
    print("took {:.2f}s\n".format(time.time() - t0))

    top = model.dis.top.array
    botm = model.dis.botm.array
    thickness = get_layer_thicknesses(top, botm)
    well_info['i'] = i
    well_info['j'] = j
    well_info['elv_m'] = top[i, j]
    well_info['elv_top_m'] = well_info.elev_open_int_top_m
    well_info['elv_botm_m'] = well_info.elev_open_int_bot_m
    well_info['elv_mdpt_m'] = well_info.screen_midpoint_elev_m
    well_info['k'] = get_layer(botm, i, j, elev=well_info['elv_mdpt_m'].values)
    well_info['laythick'] = thickness[well_info.k.values, i, j]
    well_info['ktop'] = get_layer(botm, i, j, elev=well_info['elv_top_m'].values)
    well_info['kbotm'] = get_layer(botm, i, j, elev=well_info['elv_botm_m'].values)

    # for wells in a layer below minimum thickness
    # move to layer with screen top, then screen botm,
    # put remainder in layer 1 and hope for the best
    well_info = wells.assign_layers_from_screen_top_botm(well_info, model,
                                       flux_col='q',
                                       screen_top_col='elv_top_m',
                                       screen_botm_col='elv_botm_m',
                                       across_layers=False,
                                       distribute_by='transmissivity',
                                       minimum_layer_thickness=2.)
    #isthin = well_info.laythick < minimum_layer_thickness
    #well_info.loc[isthin, 'k'] = well_info.loc[isthin, 'ktop'].values
    #well_info.loc[isthin, 'laythick'] = model.dis.thickness.array[well_info.k[isthin].values,
    #                                                              well_info.i[isthin].values,
    #                                                              well_info.j[isthin].values]
    #isthin = well_info.laythick < minimum_layer_thickness
    #well_info.loc[isthin, 'k'] = well_info.loc[isthin, 'kbotm'].values
    #well_info.loc[isthin, 'laythick'] = model.dis.thickness.array[well_info.k[isthin].values,
    #                                                              well_info.i[isthin].values,
    #                                                              well_info.j[isthin].values]
    #isthin = well_info.laythick < minimum_layer_thickness
    #well_info.loc[isthin, 'k'] = 1
    #well_info.loc[isthin, 'laythick'] = model.dis.thickness.array[well_info.k[isthin].values,
    #                                                              well_info.i[isthin].values,
    #                                                              well_info.j[isthin].values]
    isthin = well_info.laythick < minimum_layer_thickness
    assert not np.any(isthin)

    # make a datetime column
    monthlyQ_cols = [col_fmt.format(calendar.month_abbr[i]).lower()
                     for i in range(1, 13)]
    monthly_data = df[['site_no', 'year'] + monthlyQ_cols]
    monthly_data.columns = ['site_no', 'year'] + np.arange(1, 13).tolist()

    # stack the data
    # so that each row is a site number, year, month
    # reset the index to move multi-index levels back out to columns
    stacked = monthly_data.set_index(['site_no', 'year']).stack().reset_index()
    stacked.columns = ['site_no', 'year', 'month', 'gallons']
    stacked['datetime'] = pd.to_datetime(['{}-{:02d}'.format(y, m)
                                          for y, m in zip(stacked.year, stacked.month)])
    monthly_data = stacked
    return well_info, monthly_data
示例#25
0
def get_flowline_routing(plusflow_file, dest_routing_file):
    if not os.path.exists(dest_routing_file):
        df = gisutils.shp2df(plusflow_file)
        routing = df[['FROMCOMID', 'TOCOMID']]
        routing.to_csv(dest_routing_file, index=False)
示例#26
0
def get_inflow_locations_from_parent_model(parent_reach_data,
                                           inset_reach_data,
                                           inset_grid,
                                           active_area=None):
    """Get places in an inset model SFR network where the parent SFR network crosses
    the inset model boundary, using common line ID numbers from parent and inset reach datasets.
    MF2005 or MF6 supported; if either dataset contains only reach numbers (is MODFLOW-6),
    the reach numbers are used as segment numbers, with each segment only having one reach.

    Parameters
    ----------
    parent_reach_data : str (filepath) or DataFrame
        SFR reach data for parent model. Must include columns:
        line_id : int; unique identifier for hydrography line that each reach is based on
        rno : int; unique identifier for each reach. Optional if iseg and ireach columns are included.
        iseg : int; unique identifier for each segment. Optional if rno is included.
        ireach : int; unique identifier for each reach. Optional if rno is included.
        geometry : shapely.geometry object representing location of each reach
    inset_reach_data : str (filepath) or DataFrame
        SFR reach data for inset model. Same columns as parent_reach_data,
        except a geometry column isn't needed. line_id values must correspond to
        same source hydrography as those in parent_reach_data.
    inset_grid : flopy.discretization.StructuredGrid instance describing model grid
        Must be in same coordinate system as geometries in parent_reach_data.
        Required only if active_area is None.
    active_area : shapely.geometry.Polygon object
        Describes the area of the inset model where SFR is applied. Used to find
        inset reaches from parent model. Must be in same coordinate system as
        geometries in parent_reach_data. Required only if inset_grid is None.

    Returns
    -------
    locations : DataFrame
        Columns:
        parent_segment : parent model segment
        parent_reach : parent model reach
        parent_rno : parent model reach number
        line_id : unique identifier for hydrography line that each reach is based on
    """

    # spatial reference instances defining parent and inset grids
    if isinstance(inset_grid, str):
        grid = load_modelgrid(inset_grid)
    elif isinstance(inset_grid, flopy.discretization.grid.Grid):
        grid = inset_grid
    else:
        raise ValueError('Unrecognized input for inset_grid')

    if active_area is None:
        l, r, b, t = grid.extent
        active_area = box(l, b, r, t)

    # parent and inset reach data
    if isinstance(parent_reach_data, str):
        prd = shp2df(parent_reach_data)
    elif isinstance(parent_reach_data, pd.DataFrame):
        prd = parent_reach_data.copy()
    else:
        raise ValueError('Unrecognized input for parent_reach_data')
    if 'rno' in prd.columns and 'iseg' not in prd.columns:
        prd['iseg'] = prd['rno']
        prd['ireach'] = 1
    mustinclude_cols = {'line_id', 'rno', 'iseg', 'ireach', 'geometry'}
    assert len(mustinclude_cols.intersection(
        prd.columns)) == len(mustinclude_cols)

    if isinstance(inset_reach_data, str):
        if inset_reach_data.endswith('.shp'):
            ird = shp2df(inset_reach_data)
        else:
            ird = pd.read_csv(inset_reach_data)
    elif isinstance(inset_reach_data, pd.DataFrame):
        ird = inset_reach_data.copy()
    else:
        raise ValueError('Unrecognized input for inset_reach_data')
    if 'rno' in ird.columns and 'iseg' not in ird.columns:
        ird['iseg'] = ird['rno']
        ird['ireach'] = 1
    mustinclude_cols = {'line_id', 'rno', 'iseg', 'ireach'}
    assert len(mustinclude_cols.intersection(
        ird.columns)) == len(mustinclude_cols)

    graph = make_graph(ird.rno.values, ird.outreach.values, one_to_many=False)

    # cull parent reach data to only lines that cross or are just upstream of inset boundary
    buffered = active_area.buffer(5000, cap_style=2)
    close = [g.intersects(buffered) for g in prd.geometry]
    prd = prd.loc[close]
    prd.index = prd.rno
    boundary = active_area.exterior
    inset_line_id_connections = {}  # parent rno: inset line_id
    for i, r in prd.iterrows():
        if r.outreach not in prd.index:
            continue
        downstream_line = prd.loc[r.outreach, 'geometry']
        upstream_line = prd.loc[prd.rno == r.outreach, 'geometry'].values[0]
        intersects = r.geometry.intersects(boundary)
        intersects_downstream = downstream_line.within(active_area)
        # intersects_upstream = upstream_line.within(active_area)
        in_inset_model = r.geometry.within(active_area)
        if intersects_downstream:
            if intersects:
                # if not intersects_upstream: # exclude lines that originated within the model
                #    # lines that cross route to their counterpart in inset model
                inset_line_id_connections[r.rno] = r.line_id
                pass
            elif not in_inset_model:
                # lines that route to a line within the inset model
                # route to that line's inset counterpart
                inset_line_id_connections[r.rno] = prd.loc[r.outreach,
                                                           'line_id']
                pass

    prd = prd.loc[prd.rno.isin(inset_line_id_connections.keys())]

    # parent rno lookup
    parent_rno_lookup = {v: k for k, v in inset_line_id_connections.items()}

    # inlet reaches in inset model
    ird = ird.loc[ird.ireach == 1]
    ird = ird.loc[ird.line_id.isin(inset_line_id_connections.values())]

    # for each reach in ird (potential inset inlets)
    # check that there isn't another inlet downstream
    drop_reaches = []
    for i, r in ird.iterrows():
        path = find_path(graph, r.rno)
        another_inlet_downstream = len(
            set(path[1:]).intersection(set(ird.rno))) > 0
        if another_inlet_downstream:
            drop_reaches.append(r.rno)

    ird = ird.loc[~ird.rno.isin(drop_reaches)]
    # cull parent flows to outlet reaches
    iseg_ireach = zip(prd.iseg, prd.ireach)
    parent_outlet_iseg_ireach = dict(zip(prd.rno, iseg_ireach))

    df = ird[['line_id', 'name', 'rno', 'iseg', 'ireach']].copy()
    df['parent_rno'] = [parent_rno_lookup[lid] for lid in df['line_id']]
    df['parent_iseg'] = [
        parent_outlet_iseg_ireach[rno][0] for rno in df['parent_rno']
    ]
    df['parent_ireach'] = [
        parent_outlet_iseg_ireach[rno][1] for rno in df['parent_rno']
    ]
    return df.reset_index(drop=True)
示例#27
0
def rasterize(feature, grid, id_column=None,
              include_ids=None,
              epsg=None,
              proj4=None, dtype=np.float32):
    """Rasterize a feature onto the model grid, using
    the rasterio.features.rasterize method. Features are intersected
    if they contain the cell center.

    Parameters
    ----------
    feature : str (shapefile path), list of shapely objects,
              or dataframe with geometry column
    id_column : str
        Column with unique integer identifying each feature; values
        from this column will be assigned to the output raster.
    grid : grid.StructuredGrid instance
    epsg : int
        EPSG code for feature coordinate reference system. Optional,
        but an epgs code or proj4 string must be supplied if feature
        isn't a shapefile, and isn't in the same CRS as the model.
    proj4 : str
        Proj4 string for feature CRS (optional)
    dtype : dtype
        Datatype for the output array

    Returns
    -------
    2D numpy array with intersected values

    """
    try:
        from rasterio import features
        from rasterio import Affine
    except:
        print('This method requires rasterio.')
        return

    #trans = Affine(sr.delr[0], 0., sr.xul,
    #               0., -sr.delc[0], sr.yul) * Affine.rotation(sr.rotation)
    trans = grid.transform

    if isinstance(feature, str):
        proj4 = get_proj_str(feature)
        df = shp2df(feature)
    elif isinstance(feature, pd.DataFrame):
        df = feature.copy()
    elif isinstance(feature, collections.Iterable):
        # list of shapefiles
        if isinstance(feature[0], str):
            proj4 = get_proj_str(feature[0])
            df = shp2df(feature)
        else:
            df = pd.DataFrame({'geometry': feature})
    elif not isinstance(feature, collections.Iterable):
        df = pd.DataFrame({'geometry': [feature]})
    else:
        print('unrecognized feature input')
        return

    # handle shapefiles in different CRS than model grid
    reproject = False
    if proj4 is not None:
        if proj4 != grid.proj_str:
            reproject = True
    elif epsg is not None and grid.epsg is not None:
        if epsg != grid.epsg:
            reproject = True
            from fiona.crs import to_string, from_epsg
            proj4 = to_string(from_epsg(epsg))
    if reproject:
        df['geometry'] = project(df.geometry.values, proj4, grid.proj_str)

    # subset to include_ids
    if id_column is not None and include_ids is not None:
        df = df.loc[df[id_column].isin(include_ids)].copy()

    # create list of GeoJSON features, with unique value for each feature
    if id_column is None:
        numbers = range(1, len(df)+1)
    # if IDs are strings, get a number for each one
    # pd.DataFrame.unique() generally preserves order
    elif isinstance(df[id_column].dtype, np.object):
        unique_values = df[id_column].unique()
        values = dict(zip(unique_values, range(1, len(unique_values) + 1)))
        numbers = [values[n] for n in df[id_column]]
    else:
        numbers = df[id_column].tolist()

    geoms = list(zip(df.geometry, numbers))
    result = features.rasterize(geoms,
                                out_shape=(grid.nrow, grid.ncol),
                                transform=trans)
    assert result.sum(axis=(0, 1)) != 0, "Nothing was intersected!"
    return result.astype(dtype)
示例#28
0
def load_nhdplus_v2(NHDPlus_paths=None,
                    NHDFlowlines=None,
                    PlusFlowlineVAA=None,
                    PlusFlow=None,
                    elevslope=None,
                    filter=None,
                    epsg=None,
                    proj_str=None,
                    prjfile=None):
    """
    Parameters
    ==========
    NHDFlowlines : str or list of strings.
        Shapefile or list of NHDFlowline shapefiles containing
        feature geometries (line arcs) for stream network. Must contain
        the following attribute fields:
        COMID : common identifier number
    PlusFlowlineVAA : str or list of strings.
        DBF file or list of DBF files with NHDPlus attribute information.
        Must contain the following attribute fields:
        COMID : common identifier number
    PlusFlow : str or list of strings.
        DBF file or list of DBF files with NHDPlus routing information.
        Must contain the following attribute fields:
        COMID : common identifier number
    elevslope : str or list of strings.
        DBF file or list of DBF files with end elevations for each
        line arc in NHDFlowlines. Must contain the following attribute fields:
        COMID : common identifier number
    filter : tuple, str (filepath), shapely Polygon or GeoJSON polygon
        Bounding box (tuple) or polygon feature of model stream network area.
        Shapefiles will be reprojected to the CRS of the flowlines; all other
        feature types must be supplied in same CRS as flowlines.
    """
    print("\nloading NHDPlus v2 hydrography data...")
    ta = time.time()

    if NHDPlus_paths is not None:
        NHDFlowlines, PlusFlowlineVAA, PlusFlow, elevslope = \
            get_nhdplus_v2_filepaths(NHDPlus_paths)

    # get crs information from flowline projection file
    if prjfile is None:
        prjfile = get_prj_file(NHDPlus_paths, NHDFlowlines)
    nhdcrs = crs(epsg=epsg, proj_str=proj_str, prjfile=prjfile)

    # ensure that filter bbox is in same crs as flowlines
    # get filters from shapefiles, shapley Polygons or GeoJSON polygons
    if filter is not None and not isinstance(filter, tuple):
        filter = get_bbox(filter, dest_crs=nhdcrs)

    fl_cols = [
        'COMID',  # 'FCODE', 'FDATE', 'FLOWDIR',
        # 'FTYPE', 'GNIS_ID',
        'GNIS_NAME',
        'LENGTHKM',
        # 'REACHCODE', 'RESOLUTION', 'WBAREACOMI',
        'geometry'
    ]
    pfvaa_cols = [
        'ArbolateSu',  # 'Hydroseq', 'DnHydroseq',
        'StreamOrde',  # 'LevelPathI',
    ]
    elevs_cols = ['MAXELEVSMO', 'MINELEVSMO']

    # read flowlines and attribute tables into dataframes
    fl = read_nhdplus(NHDFlowlines, bbox_filter=filter)
    pfvaa = read_nhdplus(PlusFlowlineVAA)
    pf = shp2df(PlusFlow)
    elevs = read_nhdplus(elevslope)

    # join flowline and attribute dataframes
    df = fl[fl_cols].copy()
    df = df.join(pfvaa[pfvaa_cols], how='inner')
    df = df.join(elevs[elevs_cols], how='inner')
    print("\nload finished in {:.2f}s".format(time.time() - ta))

    # add routing information from PlusFlow table;
    df['tocomid'] = get_tocomids(pf, df.index.tolist())
    return df
示例#29
0
def rasterize(feature,
              grid,
              id_column=None,
              include_ids=None,
              crs=None,
              epsg=None,
              proj4=None,
              dtype=np.float32,
              **kwargs):
    """Rasterize a feature onto the model grid, using
    the rasterio.features.rasterize method. Features are intersected
    if they contain the cell center.

    Parameters
    ----------
    feature : str (shapefile path), list of shapely objects,
              or dataframe with geometry column
    id_column : str
        Column with unique integer identifying each feature; values
        from this column will be assigned to the output raster.
    grid : grid.StructuredGrid instance
    crs : obj
        A Python int, dict, str, or pyproj.crs.CRS instance
        passed to :meth:`pyproj.crs.CRS.from_user_input`
        Can be any of:

          - PROJ string
          - Dictionary of PROJ parameters
          - PROJ keyword arguments for parameters
          - JSON string with PROJ parameters
          - CRS WKT string
          - An authority string [i.e. 'epsg:4326']
          - An EPSG integer code [i.e. 4326]
          - A tuple of ("auth_name": "auth_code") [i.e ('epsg', '4326')]
          - An object with a `to_wkt` method.
          - A :class:`pyproj.crs.CRS` class

    dtype : dtype
        Datatype for the output array
    **kwargs : keyword arguments to rasterio.features.rasterize()
        https://rasterio.readthedocs.io/en/stable/api/rasterio.features.html

    Returns
    -------
    2D numpy array with intersected values

    """
    try:
        from rasterio import Affine, features
    except:
        print('This method requires rasterio.')
        return

    if epsg is not None:
        warnings.warn(
            "The epsg argument is deprecated. Use crs instead, "
            "which requires gisutils >= 0.2", DeprecationWarning)
    if proj4 is not None:
        warnings.warn(
            "The epsg argument is deprecated. Use crs instead, "
            "which requires gisutils >= 0.2", DeprecationWarning)
    if crs is not None:
        if version.parse(gisutils.__version__) < version.parse('0.2.0'):
            raise ValueError("The crs argument requires gisutils >= 0.2")
        from gisutils import get_authority_crs
        crs = get_authority_crs(crs)

    trans = grid.transform

    kwargs = {}
    if isinstance(feature, str):
        proj4 = get_proj_str(feature)
        kwargs = {'dest_crs': grid.crs}
        kwargs = get_input_arguments(kwargs, shp2df)
        df = shp2df(feature, **kwargs)
    elif isinstance(feature, pd.DataFrame):
        df = feature.copy()
    elif isinstance(feature, collections.Iterable):
        # list of shapefiles
        if isinstance(feature[0], str):
            proj4 = get_proj_str(feature[0])
            kwargs = {'dest_crs': grid.crs}
            kwargs = get_input_arguments(kwargs, shp2df)
            df = shp2df(feature, **kwargs)
        else:
            df = pd.DataFrame({'geometry': feature})
    elif not isinstance(feature, collections.Iterable):
        df = pd.DataFrame({'geometry': [feature]})
    else:
        print('unrecognized feature input')
        return

    # handle shapefiles in different CRS than model grid
    if 'dest_crs' not in kwargs:
        reproject = False
        # todo: consolidate rasterize reprojection to just use crs
        if crs is not None:
            if crs != grid.crs:
                df['geometry'] = project(df.geometry.values, crs, grid.crs)
        if proj4 is not None:
            if proj4 != grid.proj_str:
                reproject = True
        elif epsg is not None and grid.epsg is not None:
            if epsg != grid.epsg:
                reproject = True
                from fiona.crs import from_epsg, to_string
                proj4 = to_string(from_epsg(epsg))
        if reproject:
            df['geometry'] = project(df.geometry.values, proj4, grid.proj_str)

    # subset to include_ids
    if id_column is not None and include_ids is not None:
        df = df.loc[df[id_column].isin(include_ids)].copy()

    # create list of GeoJSON features, with unique value for each feature
    if id_column is None:
        numbers = range(1, len(df) + 1)
    # if IDs are strings, get a number for each one
    # pd.DataFrame.unique() generally preserves order
    elif isinstance(df[id_column].dtype, np.object):
        unique_values = df[id_column].unique()
        values = dict(zip(unique_values, range(1, len(unique_values) + 1)))
        numbers = [values[n] for n in df[id_column]]
    else:
        numbers = df[id_column].tolist()

    geoms = list(zip(df.geometry, numbers))
    result = features.rasterize(geoms,
                                out_shape=(grid.nrow, grid.ncol),
                                transform=trans)
    assert result.sum(axis=(0, 1)) != 0, "Nothing was intersected!"
    return result.astype(dtype)
示例#30
0
def setup_structured_grid(xoff=None,
                          yoff=None,
                          xul=None,
                          yul=None,
                          nrow=None,
                          ncol=None,
                          nlay=None,
                          dxy=None,
                          delr=None,
                          delc=None,
                          top=None,
                          botm=None,
                          rotation=0.,
                          parent_model=None,
                          snap_to_NHG=False,
                          features=None,
                          features_shapefile=None,
                          id_column=None,
                          include_ids=None,
                          buffer=1000,
                          crs=None,
                          epsg=None,
                          model_length_units=None,
                          grid_file='grid.json',
                          bbox_shapefile=None,
                          **kwargs):
    """"""
    print('setting up model grid...')
    t0 = time.time()

    # conversions for model/parent model units to meters
    # set regular flag for handling delc/delr
    to_meters_inset = convert_length_units(model_length_units, 'meters')
    regular = True
    if dxy is not None:
        delr_m = np.round(dxy * to_meters_inset,
                          4)  # dxy is specified in model units
        delc_m = delr_m
    if delr is not None:
        delr_m = np.round(delr * to_meters_inset,
                          4)  # delr is specified in model units
        if not np.isscalar(delr_m):
            if (set(delr_m)) == 1:
                delr_m = delr_m[0]
            else:
                regular = False
    if delc is not None:
        delc_m = np.round(delc * to_meters_inset,
                          4)  # delc is specified in model units
        if not np.isscalar(delc_m):
            if (set(delc_m)) == 1:
                delc_m = delc_m[0]
            else:
                regular = False
    if parent_model is not None:
        to_meters_parent = convert_length_units(
            get_model_length_units(parent_model), 'meters')
        # parent model grid spacing in meters
        parent_delr_m = np.round(
            parent_model.dis.delr.array[0] * to_meters_parent, 4)
        if not parent_delr_m % delr_m == 0:
            raise ValueError(
                'inset delr spacing of {} must be factor of parent spacing of {}'
                .format(delr_m, parent_delr_m))
        parent_delc_m = np.round(
            parent_model.dis.delc.array[0] * to_meters_parent, 4)
        if not parent_delc_m % delc_m == 0:
            raise ValueError(
                'inset delc spacing of {} must be factor of parent spacing of {}'
                .format(delc_m, parent_delc_m))

    if epsg is not None:
        crs = pyproj.crs.CRS.from_epsg(epsg)
    elif crs is not None:
        from gisutils import get_authority_crs
        crs = get_authority_crs(crs)
    elif parent_model is not None:
        crs = parent_model.modelgrid.crs

    # option 1: make grid from xoff, yoff and specified dimensions
    if xoff is not None and yoff is not None:
        assert nrow is not None and ncol is not None, \
            "Need to specify nrow and ncol if specifying xoffset and yoffset."
        if regular:
            height_m = np.round(delc_m * nrow, 4)
            width_m = np.round(delr_m * ncol, 4)
        else:
            height_m = np.sum(delc_m)
            width_m = np.sum(delr_m)

        # optionally align grid with national hydrologic grid
        # grids snapping to NHD must have spacings that are a factor of 1 km
        if snap_to_NHG:
            assert regular and np.allclose(1000 % delc_m, 0, atol=1e-4)
            x, y = get_point_on_national_hydrogeologic_grid(xoff,
                                                            yoff,
                                                            offset='edge',
                                                            op=np.floor)
            xoff = x
            yoff = y
            rotation = 0.

        # need to specify xul, yul in case snapping to parent
        # todo: allow snapping to parent grid on xoff, yoff
        if rotation != 0:
            raise NotImplementedError('Rotated grids not supported.')
        xul = xoff
        yul = yoff + height_m

    # option 2: make grid using buffered feature bounding box
    else:
        if features is None and features_shapefile is not None:
            # Make sure shapefile and bbox filter are in dest (model) CRS
            # TODO: CRS wrangling could be added to shp2df as a feature
            reproject_filter = False
            try:
                from gisutils import get_shapefile_crs
                features_crs = get_shapefile_crs(features_shapefile)
                if features_crs != crs:
                    reproject_filter = True
            except:
                features_crs = get_proj_str(features_shapefile)
                reproject_filter = True
            filter = None
            if parent_model is not None:
                if reproject_filter:
                    filter = project(parent_model.modelgrid.bbox,
                                     parent_model.modelgrid.crs,
                                     features_crs).bounds
                else:
                    filter = parent_model.modelgrid.bbox.bounds
            shp2df_kwargs = {'dest_crs': crs}
            shp2df_kwargs = get_input_arguments(shp2df_kwargs, shp2df)
            df = shp2df(features_shapefile, filter=filter, **shp2df_kwargs)

            # optionally subset shapefile data to specified features
            if id_column is not None and include_ids is not None:
                df = df.loc[df[id_column].isin(include_ids)]
            # use all features by default
            features = df.geometry.tolist()

            # convert multiple features to a MultiPolygon
            if isinstance(features, list):
                if len(features) > 1:
                    features = MultiPolygon(features)
                else:
                    features = features[0]

            # size the grid based on the bbox for features
            x1, y1, x2, y2 = features.bounds
            L = buffer  # distance from area of interest to boundary
            xul = x1 - L
            yul = y2 + L
            height_m = np.round(yul - (y1 - L),
                                4)  # initial model height from buffer distance
            width_m = np.round((x2 + L) - xul, 4)
            rotation = 0.  # rotation not supported with this option

    # align model with parent grid if there is a parent model
    # (and not snapping to national hydrologic grid)
    if parent_model is not None and not snap_to_NHG:

        # get location of coinciding cell in parent model for upper left
        pi, pj = parent_model.modelgrid.intersect(xul, yul)
        verts = np.array(parent_model.modelgrid.get_cell_vertices(pi, pj))
        xul, yul = verts[:, 0].min(), verts[:, 1].max()

        # adjust the dimensions to align remaining corners
        def roundup(number, increment):
            return int(np.ceil(number / increment) * increment)

        height = roundup(height_m, parent_delr_m)
        width = roundup(width_m, parent_delc_m)

        # update nrow, ncol after snapping to parent grid
        if regular:
            nrow = int(height / delc_m)  # h is in meters
            ncol = int(width / delr_m)

    # set the grid configuration dictionary
    # spacing is in meters (consistent with projected CRS)
    # (modelgrid object will be updated automatically from this dictionary)
    #if rotation == 0.:
    #    xll = xul
    #    yll = yul - model.height
    grid_cfg = {
        'nrow': int(nrow),
        'ncol': int(ncol),
        'nlay': nlay,
        'delr': delr_m,
        'delc': delc_m,
        'xoff': xoff,
        'yoff': yoff,
        'xul': xul,
        'yul': yul,
        'rotation': rotation,
        'lenuni': 2
    }
    if regular:
        grid_cfg['delr'] = np.ones(grid_cfg['ncol'],
                                   dtype=float) * grid_cfg['delr']
        grid_cfg['delc'] = np.ones(grid_cfg['nrow'],
                                   dtype=float) * grid_cfg['delc']
    grid_cfg['delr'] = grid_cfg['delr'].tolist()  # for serializing to json
    grid_cfg['delc'] = grid_cfg['delc'].tolist()

    # renames for flopy modelgrid
    renames = {'rotation': 'angrot'}
    for k, v in renames.items():
        if k in grid_cfg:
            grid_cfg[v] = grid_cfg.pop(k)

    # add epsg or wkt if there isn't an epsg
    if epsg is not None:
        grid_cfg['epsg'] = epsg
    elif crs is not None:
        if 'epsg' in crs.srs.lower():
            grid_cfg['epsg'] = int(crs.srs.split(':')[1])
        else:
            grid_cfg['wkt'] = crs.srs
    else:
        warnings.warn('No coordinate system reference provided for model grid!'
                      'Model input data may not be mapped correctly.')

    # set up the model grid instance
    grid_cfg['top'] = top
    grid_cfg['botm'] = botm
    grid_cfg.update(kwargs)  # update with any kwargs from function call
    kwargs = get_input_arguments(grid_cfg, MFsetupGrid)
    modelgrid = MFsetupGrid(**kwargs)
    modelgrid.cfg = grid_cfg

    # write grid info to json, and shapefile of bbox
    # omit top and botm arrays from json represenation of grid
    # (just for horizontal disc.)
    del grid_cfg['top']
    del grid_cfg['botm']

    fileio.dump(grid_file, grid_cfg)
    if bbox_shapefile is not None:
        write_bbox_shapefile(modelgrid, bbox_shapefile)
    print("finished in {:.2f}s\n".format(time.time() - t0))
    return modelgrid