Пример #1
0
def test_original_routing_attribute(nhdplus_dataframe, one_to_many, datapath):
    df = nhdplus_dataframe
    if one_to_many:
        # add some fake distributaries to a few flowlines
        id = df.COMID.values[0]
        inds = df.index.values[:3]
        modified_toids = [[toids[0], id] for toids in df.loc[inds, 'tocomid']
                          ] + df.loc[df.index.values[3]:, 'tocomid'].tolist()
        df['tocomid'] = modified_toids
        assert not is_to_one(df.tocomid)
    flowlines = [
        '{}/tylerforks/NHDPlus/NHDSnapshot/Hydrography/NHDFlowline.shp'.format(
            datapath)
    ]
    prjfile = get_prj_file(NHDFlowlines=flowlines)

    # convert arbolate sums from km to m
    df['asum2'] = df.ArbolateSu * 1000

    # convert comid end elevations from cm to m
    if 'MAXELEVSMO' in df.columns:
        df['elevup'] = df.MAXELEVSMO / 100.
    if 'MINELEVSMO' in df.columns:
        df['elevdn'] = df.MINELEVSMO / 100.
    lines = sfrmaker.Lines.from_dataframe(df,
                                          id_column='COMID',
                                          routing_column='tocomid',
                                          name_column='GNIS_NAME',
                                          attr_length_units='meters',
                                          attr_height_units='meters',
                                          prjfile=prjfile)
    # verify that values (to ids) in original routing dictionary are scalars
    assert is_to_one(lines._original_routing)
Пример #2
0
 def routing(self):
     """Dictionary of routing connections from ids (keys)
     to to_ids (values).
     """
     if self._routing is None or self._routing_changed():
         toid = self.df.toid.values
         # check whether or not routing is
         # many-to-one or one-to-one (no diversions)
         # squeeze it down
         to_one = False
         # if below == True, all toids are scalar or length 1 lists
         if len(toid) > 1:
             to_one = is_to_one(toid)
             # if not, try converting any scalars to lists
             if not to_one:
                 toid = [[l] if np.isscalar(l) else l for l in toid]
                 to_one = is_to_one(toid)
             toid = np.squeeze(list(toid))
             routing = make_graph(self.df.id.values,
                                  toid,
                                  one_to_many=not to_one)
             if not to_one:
                 routing = pick_toids(routing, self.elevup)
         else:
             routing = {self.df.id.values[0]: 0}
         self._routing = routing
     return self._routing
Пример #3
0
def test_load_nhdplus_hr(neversink_lines_from_nhdplus_hr):
    lines = neversink_lines_from_nhdplus_hr
    assert isinstance(lines, sfrmaker.lines.Lines)
    assert is_to_one(lines._original_routing)
Пример #4
0
    def to_sfr(self,
               grid=None,
               active_area=None,
               isfr=None,
               model=None,
               model_length_units='undefined',
               model_time_units='days',
               minimum_reach_length=None,
               width_from_asum_a_param=0.1193,
               width_from_asum_b_param=0.5032,
               minimum_reach_width=1.,
               consolidate_conductance=False,
               one_reach_per_cell=False,
               add_outlets=None,
               package_name=None,
               **kwargs):
        """Create a streamflow routing dataset from the information
        in sfrmaker.lines class instance and a supplied sfrmaker.grid class instance.

        Parameters
        ----------
        grid : sfrmaker.grid or flopy.discretization.StructuredGrid
            Numerica model grid instance. Required unless an attached model
            has a valid modelgrid attribute.
        active_area : shapely Polygon, list of shapely Polygons, or shapefile path; optional
            Shapely Polygons must be in same CRS as input flowlines; shapefile
            features will be reprojected if their crs is different.
        isfr : ndarray, optional
            Numpy integer array of the same size as the model grid, designating area that will
            be populated with SFR reaches (0=no SFR; 1=SFR). An isfr array of shape
            nrow x ncol will be broadcast to all layers. Only required if a model is not
            supplied, or if SFR is only desired in a subset of active model cells.
            By default, None, in which case the model ibound or idomain array will be used.
        model : flopy.modflow.Modflow or flopy.mf6.ModflowGwf, optional
            Flopy model instance
        model_length_units : str; e.g. {'ft', 'feet', 'meters', etc.}, optional
            Length units of the model. While SFRmaker will try to read these
            from a supplied grid (first) and then a supplied model (second),
            it is good practice to specify them explicitly here.
        model_time_units : str; e.g. {'d', 'days'}, optional
            Time units for model. By default, days.
        minimum_reach_length : float, optional
            Minimum reach length to retain. Default is to compute
            an effective mean model cell length by taking the square root
            of the average cell area, and then set minimum_reach_length
            to 5% of effective mean cell length.
        width_from_asum_a_param : float, optional
            :math:`a` parameter used for estimating channel width from arbolate sum.
            Only needed if input flowlines are lacking width information.
            See :func:`~sfrmaker.utils.width_from_arbolate`. By default, 0.1193.
        width_from_asum_b_param : float, optional
            :math:`b` parameter used for estimating channel width from arbolate sum.
            Only needed if input flowlines are lacking width information.
            See :func:`~sfrmaker.utils.width_from_arbolate`. By default, 0.5032.
        minimum_reach_width : float, optional
            Minimum reach width to specify (in model units), if computing widths from
            arbolate sum values. (default = 1)
        consolidate_conductance : bool
            If True, total reach conductance each cell is computed, and
            assigned to the most downstream reach via the hydraulic conductivity
            parameter.
        one_reach_per_cell : bool
            If True, streambed conductance in each reach is consolidated
            (consolidate_conductance = True), and additional reaches besides
            the most downstream reach are dropped.
        add_outlets : sequence of ints
            Option to add breaks in routing at specified line ids. For example
            if controlled flows out of a reservoir are specified as inflows
            to the SFR network, an outlet can be added above to the dam to
            prevent double-counting of flow. By default, None
        package_name : str
            Base name for writing sfr output.
        kwargs : keyword arguments to :class:`SFRData`

        Returns
        -------
        sfrdata : sfrmaker.SFRData instance

        """
        print("\nSFRmaker version {}".format(sfrmaker.__version__))
        print("\nCreating sfr dataset...")
        totim = time.time()

        if flopy and active_area is None and isfr is None and model is not None:
            if model.version == 'mf6':
                isfr = np.sum(model.dis.idomain.array == 1, axis=0) > 0
            else:
                isfr = np.sum(model.bas6.ibound.array == 1, axis=0) > 0
        if flopy and isinstance(grid, flopy.discretization.StructuredGrid):
            print('\nCreating grid class instance from flopy Grid instance...')
            ta = time.time()
            grid = StructuredGrid.from_modelgrid(grid,
                                                 active_area=active_area,
                                                 isfr=isfr)
            print("grid class created in {:.2f}s\n".format(time.time() - ta))
        elif flopy and model is not None:
            grid = StructuredGrid.from_modelgrid(model.modelgrid,
                                                 active_area=active_area,
                                                 isfr=isfr)
        elif not isinstance(grid, sfrmaker.grid.Grid):
            raise TypeError('Unrecognized input for grid: {}'.format(grid))

        # print grid information to screen
        print(grid)

        # print model information to screen
        print(model)

        model_length_units = get_length_units(model_length_units, grid, model)
        mult = convert_length_units(self.attr_length_units, model_length_units)
        mult_h = convert_length_units(self.attr_height_units,
                                      model_length_units)
        gis_mult = convert_length_units(self.geometry_length_units,
                                        model_length_units)

        # to_crs the flowlines if they aren't in same CRS as grid
        if self.crs != grid.crs:
            self.to_crs(grid.crs)
        # cull the flowlines to the active part of the model grid
        if grid.active_area is not None:
            self.cull(grid.active_area, inplace=True, simplify=True, tol=2000)
        elif grid._bounds is not None:  # cull to grid bounding box if already computed
            self.cull(box(*grid._bounds), inplace=True)
        if package_name is None:
            if model is not None:
                package_name = model.name
            else:
                package_name = 'model'

        # convert routing connections (toid column) from lists (one-to-many)
        # to ints (one-to-one or many-to-one)
        routing = self.routing.copy()

        # one to many routing is not supported
        to_one = is_to_one(routing.values())
        assert to_one, "routing is still one-to-many"
        # if not to_one:
        #    routing = pick_toids(routing, elevup)
        valid_ids = routing.keys()
        # df.toid column is basis for routing attributes
        # all paths terminating in invalid toids (outside of the model)
        # will be none; set invalid toids = 0
        # TODO: write a test for pick_toids if some IDs route to more than one connection
        assert not np.any([isinstance(r, list) for r in routing.items()
                           ]), "one to many routing not supported"
        self.df.toid = [
            routing[i] if routing[i] in valid_ids else 0
            for i in self.df.id.tolist()
        ]

        # intersect lines with model grid to get preliminary reaches
        rd = self.intersect(grid)

        # length of intersected line fragments (in model units)
        rd['rchlen'] = np.array([g.length for g in rd.geometry]) * gis_mult

        # estimate widths if they aren't supplied
        if self.df.width1.sum() == 0:
            print("Computing widths...")

            # compute arbolate sums for original LineStrings if they weren't provided
            if 'asum2' not in self.df.columns:
                raise NotImplementedError(
                    'Check length unit conversions before using this option.')
                asums = arbolate_sum(
                    self.df.id,
                    dict(
                        zip(
                            self.df.id,
                            np.array([g.length for g in self.df.geometry]) *
                            convert_length_units(self.geometry_length_units,
                                                 'meters'))), self.routing)
            else:
                asums = dict(
                    zip(
                        self.df.id,
                        self.df.asum2 * convert_length_units(
                            self.attr_length_units, 'meters')))

            # populate starting asums (asum1)
            routing_r = {v: k for k, v in self.routing.items() if v != 0}
            self.df['asum1'] = [
                asums.get(routing_r.get(id, 0), 0) for id in self.df.id.values
            ]
            asum1s = dict(zip(self.df.id, self.df.asum1))

            # compute arbolate sum at reach midpoints (in meters)
            lengths = rd[['line_id', 'ireach', 'geometry']].copy()
            lengths['rchlen'] = np.array(
                [g.length for g in lengths.geometry]) * convert_length_units(
                    self.geometry_length_units, 'meters')
            groups = lengths.groupby(
                'line_id')  # fragments grouped by parent line

            reach_cumsums = []
            ordered_ids = rd.line_id.loc[rd.line_id.diff() != 0].values
            for id in ordered_ids:
                grp = groups.get_group(id).sort_values(by='ireach')
                dist = np.cumsum(grp.rchlen.values) - 0.5 * grp.rchlen.values
                reach_cumsums.append(dist)
            reach_cumsums = np.concatenate(reach_cumsums)
            segment_asums = [asum1s[id] for id in lengths.line_id]
            reach_asums = segment_asums + reach_cumsums
            # maintain positive asums; lengths in NHD often aren't exactly equal to feature lengths
            # reach_asums[reach_asums < 0.] = 0
            rd['asum'] = reach_asums
            width = width_from_arbolate_sum(reach_asums,
                                            a=width_from_asum_a_param,
                                            b=width_from_asum_b_param,
                                            minimum_width=minimum_reach_width,
                                            input_units='meters',
                                            output_units=model_length_units)
            rd['width'] = width
            rd.loc[rd.width < minimum_reach_width,
                   'width'] = minimum_reach_width

            # assign width1 and width2 back to segment data
            self.df['width1'] = width_from_arbolate_sum(
                self.df.asum1.values,
                a=width_from_asum_a_param,
                b=width_from_asum_b_param,
                minimum_width=minimum_reach_width,
                input_units=self.attr_length_units,
                output_units=model_length_units)
            self.df['width2'] = width_from_arbolate_sum(
                self.df.asum2.values,
                a=width_from_asum_a_param,
                b=width_from_asum_b_param,
                minimum_width=minimum_reach_width,
                input_units=self.attr_length_units,
                output_units=model_length_units)

        # interpolate linestring end widths to intersected reaches
        else:
            # verify that each linestring has only 1 segment associated with it
            # (interpolation might be wrong for multiple segments otherwise)
            assert rd.groupby('line_id').iseg.nunique().max() == 1
            # sort the linestring and reach data so that they are aligned
            self.df.sort_values(by='id', inplace=True)
            rd.sort_values(by=['line_id', 'ireach'], inplace=True)
            rd['width'] = interpolate_to_reaches(
                reach_data=rd,
                segment_data=self.df,
                segvar1='width1',
                segvar2='width2',
                reach_data_group_col='line_id',
                segment_data_group_col='id') * mult

        # discard very small reaches; redo numbering
        # set minimum reach length based on cell size
        thresh = 0.05  # fraction of cell length (based on square root of area)
        if minimum_reach_length is None:
            cellgeoms = grid.df.loc[rd.node.values, 'geometry']
            mean_area = np.mean([g.area for g in cellgeoms])
            minimum_reach_length = np.sqrt(mean_area) * thresh * gis_mult

        inds = rd.rchlen > minimum_reach_length
        print('\nDropping {} reaches with length < {:.2f} {}...'.format(
            np.sum(~inds), minimum_reach_length, model_length_units))
        rd = rd.loc[inds].copy()
        rd['strhc1'] = 1.  # default value of streambed Kv for now
        # handle co-located reaches
        if consolidate_conductance or one_reach_per_cell:
            rd = consolidate_reach_conductances(
                rd, keep_only_dominant=one_reach_per_cell)

        # patch the routing
        # 1) reduce one to many routing to one-to-one routing (pick_toids() above)
        # 2) create new graph with just one-to-one segments
        # 3) list new paths;  2) and 3) should be automatic following 1)
        # 4) code below will update new graph to only include remaining segments
        # 5) create sfrdata instance; numbering will be messed up
        # 6) run methods on sfrdata instance to fix numbering and route reaches with unique numbers

        print('\nRepairing routing connections...')
        remaining_ids = rd.line_id.unique()
        # routing and paths properties should update automatically
        # when id and toid columns are changed in self.df
        # but only rd (reach_data) has been changed
        new_routing = {}
        paths = self.paths.copy()
        # for each segment
        for k in remaining_ids:
            # interate through successive downstream segments
            for s in paths[k][1:]:
                # assign the first segment that still exists as the outseg
                if s in remaining_ids:
                    new_routing[k] = s
                    break
            # if no segments are left downstream, assign outlet
            if k not in new_routing.keys():
                new_routing[k] = 0

        # add any outlets to the stream network
        # for now handle int or str ids
        if add_outlets is not None:
            # get the
            if isinstance(add_outlets, str) or isinstance(add_outlets, int):
                add_outlets = [add_outlets]
            for outlet_id in add_outlets:
                if rd.line_id.dtype == np.object:
                    outlet_id = str(outlet_id)
                    outlet_toid = '0'
                else:
                    outlet_id = int(outlet_id)
                    outlet_toid = 0
                valid_outlet_ids = get_previous_ids_in_subset(
                    rd.line_id, self.routing, outlet_id)
                loc = rd.line_id.isin(valid_outlet_ids)
                rd.loc[loc, 'toid'] = outlet_toid
                for valid_outlet_id in valid_outlet_ids:
                    new_routing[valid_outlet_id] = outlet_toid

        # map remaining_ids to segment numbers
        segment = dict(zip(rd.line_id, rd.iseg))
        line_id = {s: lid for lid, s in segment.items()}

        # get the segment associated with each line id
        nseg = [segment[rid] for rid in remaining_ids]
        # get the segment associated with new connection for each line id
        outseg = [segment.get(new_routing[line_id[s]], 0) for s in nseg]

        # renumber the segments to be consecutive,
        # starting at 1 and only increasing downstream
        r = renumber_segments(nseg, outseg)
        # map new segment numbers to line_ids
        line_id = {r[s]: lid for s, lid in line_id.items()}
        # segment2 = {lid: r[s] for lid, s in segment.items()}
        # line_id2 = {s: lid for lid, s in segment2.items()}

        # update reach_data
        rd['iseg'] = [r[s] for s in rd.iseg]

        print('\nSetting up segment data...')
        sd = pd.DataFrame()
        sd['nseg'] = [r[s] for s in nseg]
        sd['outseg'] = [r[s] for s in outseg]
        sd.sort_values(by='nseg', inplace=True)

        # verify that no segments route to themselves
        assert not routing_is_circular(sd.nseg, sd.outseg)

        # (elevup dict was created above)
        elevup = self.elevup
        elevdn = dict(zip(self.df.id, self.df.elevdn))
        sd['elevup'] = [elevup[line_id[s]] for s in sd.nseg]
        sd['elevdn'] = [elevdn[line_id[s]] for s in sd.nseg]
        # convert elevation units
        sd['elevup'] *= mult_h
        sd['elevdn'] *= mult_h

        # apply widths if they were included
        if self.df[['width1', 'width2']].sum().sum() > 0:
            width1 = dict(zip(self.df.id, self.df.width1))
            width2 = dict(zip(self.df.id, self.df.width2))
            sd['width1'] = [width1[line_id[s]] for s in sd.nseg]
            sd['width2'] = [width2[line_id[s]] for s in sd.nseg]
            sd['width1'] *= mult
            sd['width2'] *= mult  # convert length units from source data to model
        elif self.df.width2.sum() == 0:
            raise NotImplementedError(
                'Need to supply width1 and width2 or use arbolate sum.')

        # create sfrdata instance
        # this class has methods for fix segment and reach numbering,
        # assigning elevations and other properties by reach,
        # smoothing elevations, writing sfr package files
        # and other output
        rd = rd[[c for c in SFRData.rdcols if c in rd.columns]].copy()
        sfrd = SFRData(reach_data=rd,
                       segment_data=sd,
                       grid=grid,
                       model=model,
                       model_length_units=model_length_units,
                       model_time_units=model_time_units,
                       package_name=package_name,
                       **kwargs)
        print("\nTime to create sfr dataset: {:.2f}s\n".format(time.time() -
                                                               totim))
        return sfrd