class NWIS: """Access NWIS web service.""" def __init__(self): self.session = RetrySession() self.url = ServiceURL().restful.nwis @staticmethod def query_byid(ids: Union[str, List[str]]) -> Dict[str, str]: """Generate the geometry keys and values of an ArcGISRESTful query.""" if not isinstance(ids, (str, list)): raise InvalidInputType("ids", "str or list") ids = [str(i) for i in ids] if isinstance(ids, list) else [str(ids)] query = {"sites": ",".join(ids)} return query @staticmethod def query_bybox(bbox: Tuple[float, float, float, float]) -> Dict[str, str]: """Generate the geometry keys and values of an ArcGISRESTful query.""" geoutils.check_bbox(bbox) query = {"bBox": ",".join(f"{b:.06f}" for b in bbox)} return query def get_info(self, query: Dict[str, str], expanded: bool = False) -> pd.DataFrame: """Get NWIS stations by a list of IDs or within a bounding box. Only stations that record(ed) daily streamflow data are returned. The following columns are included in the dataframe with expanded set to False: ================== ================================== Name Description ================== ================================== site_no Site identification number station_nm Site name site_tp_cd Site type dec_lat_va Decimal latitude dec_long_va Decimal longitude coord_acy_cd Latitude-longitude accuracy dec_coord_datum_cd Decimal Latitude-longitude datum alt_va Altitude of Gage/land surface alt_acy_va Altitude accuracy alt_datum_cd Altitude datum huc_cd Hydrologic unit code parm_cd Parameter code stat_cd Statistical code ts_id Internal timeseries ID loc_web_ds Additional measurement description medium_grp_cd Medium group code parm_grp_cd Parameter group code srs_id SRS ID access_cd Access code begin_date Begin date end_date End date count_nu Record count hcdn_2009 Whether is in HCDN-2009 stations ================== ================================== Parameters ---------- query : dict A dictionary containing query by IDs or BBOX. Use ``query_byid`` or ``query_bbox`` class methods to generate the queries. expanded : bool, optional Whether to get expanded sit information for example drainage area. Returns ------- pandas.DataFrame NWIS stations """ if not isinstance(query, dict): raise InvalidInputType("query", "dict") output_type = [{"outputDataTypeCd": "dv"}] if expanded: output_type.append({"siteOutput": "expanded"}) site_list = [] for t in output_type: payload = { **query, **t, "format": "rdb", "parameterCd": "00060", "siteStatus": "all", "hasDataTypeCd": "dv", } resp = self.session.post(f"{self.url}/site", payload).text.split("\n") r_list = [txt.split("\t") for txt in resp if "#" not in txt] r_dict = [dict(zip(r_list[0], st)) for st in r_list[2:]] site_list.append(pd.DataFrame.from_dict(r_dict).dropna()) if expanded: sites = pd.merge( *site_list, on="site_no", how="outer", suffixes=("", "_overlap") ).filter(regex="^(?!.*_overlap)") else: sites = site_list[0] sites = sites.drop(sites[sites.alt_va == ""].index) try: sites = sites[sites.parm_cd == "00060"] sites["begin_date"] = pd.to_datetime(sites["begin_date"]) sites["end_date"] = pd.to_datetime(sites["end_date"]) except AttributeError: pass float_cols = ["dec_lat_va", "dec_long_va", "alt_va", "alt_acy_va"] if expanded: float_cols += ["drain_area_va", "contrib_drain_area_va"] sites[float_cols] = sites[float_cols].apply(lambda x: pd.to_numeric(x, errors="coerce")) sites = sites[sites.site_no.apply(len) == 8] gii = WaterData("gagesii", DEF_CRS) hcdn = gii.byid("staid", sites.site_no.tolist()) hcdn_dict = hcdn[["staid", "hcdn_2009"]].set_index("staid").hcdn_2009.to_dict() sites["hcdn_2009"] = sites.site_no.apply( lambda x: len(hcdn_dict[x]) > 0 if x in hcdn_dict.keys() else False ) return sites def get_streamflow( self, station_ids: Union[List[str], str], dates: Tuple[str, str], mmd: bool = False ) -> pd.DataFrame: """Get daily streamflow observations from USGS. Parameters ---------- station_ids : str, list The gage ID(s) of the USGS station. dates : tuple Start and end dates as a tuple (start, end). mmd : bool Convert cms to mm/day based on the contributing drainage area of the stations. Returns ------- pandas.DataFrame Streamflow data observations in cubic meter per second (cms) """ if not isinstance(station_ids, (str, list)): raise InvalidInputType("ids", "str or list") station_ids = station_ids if isinstance(station_ids, list) else [station_ids] if not isinstance(dates, tuple) or len(dates) != 2: raise InvalidInputType("dates", "tuple", "(start, end)") start = pd.to_datetime(dates[0]) end = pd.to_datetime(dates[1]) siteinfo = self.get_info(self.query_byid(station_ids)) check_dates = siteinfo.loc[ ( (siteinfo.stat_cd == "00003") & (start < siteinfo.begin_date) & (end > siteinfo.end_date) ), "site_no", ].tolist() nas = [s for s in station_ids if s in check_dates] if len(nas) > 0: raise InvalidInputRange( "Daily Mean data unavailable for the specified time " + "period for the following stations:\n" + ", ".join(nas) ) payload = { "format": "json", "sites": ",".join(station_ids), "startDT": start.strftime("%Y-%m-%d"), "endDT": end.strftime("%Y-%m-%d"), "parameterCd": "00060", "statCd": "00003", "siteStatus": "all", } resp = self.session.post(f"{self.url}/dv", payload) time_series = resp.json()["value"]["timeSeries"] r_ts = { t["sourceInfo"]["siteCode"][0]["value"]: t["values"][0]["value"] for t in time_series } def to_df(col, dic): discharge = pd.DataFrame.from_records(dic, exclude=["qualifiers"], index=["dateTime"]) discharge.index = pd.to_datetime(discharge.index) discharge.columns = [col] return discharge qobs = pd.concat([to_df(f"USGS-{s}", t) for s, t in r_ts.items()], axis=1) # Convert cfs to cms qobs = qobs.astype("float64") * 0.028316846592 if mmd: nldi = NLDI() basins_dict = { f"USGS-{s}": nldi.getfeature_byid("nwissite", f"USGS-{s}", basin=True).geometry for s in station_ids } basins = gpd.GeoDataFrame.from_dict(basins_dict, orient="index") basins.columns = ["geometry"] basins = basins.set_crs(DEF_CRS) eck4 = "+proj=eck4 +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs" area = basins.to_crs(eck4).area ms2mmd = 1000.0 * 24.0 * 3600.0 qobs = qobs.apply(lambda x: x / area.loc[x.name] * ms2mmd) return qobs
def post_connection_error(): url = "https://somefailedurl.com" s = RetrySession(retries=2) s.post(url)