from hydroDL.data import usgs, gageII import pandas as pd import numpy as np import time import os import matplotlib.pyplot as plt # read site inventory workDir = r'C:\Users\geofk\work\waterQuality' modelDir = os.path.join(workDir, 'modelUsgs2') fileInvC = os.path.join(workDir, 'inventory_NWIS_sample') fileInvQ = os.path.join(workDir, 'inventory_NWIS_streamflow') # look up sample for interested sample sites siteC = usgs.readUsgsText(fileInvC) codeLst = \ ['00915', '00925', '00930', '00935', '00955', '00940', '00945']+\ ['00418','00419','39086','39087']+\ ['00301','00300','00618','00681','00653']+\ ['00010','00530','00094']+\ ['00403','00408'] codeSet1 = sorted(set(siteC['parm_cd'].astype(int))) codeSet = sorted(set(siteC['parm_cd'])) aa = siteC['parm_cd'].astype(str) aa = siteC.loc[(siteC['parm_cd'].astype(str) == '70')] aa['site_no'].astype(str)
c. in gageII database """ from hydroDL.data import usgs, gageII from hydroDL import kPath import pandas as pd import numpy as np import time import os # site inventory usgsDir = os.path.join(kPath.dirData, 'USGS') invDir = os.path.join(kPath.dirData, 'USGS', 'inventory') fileInvC = os.path.join(invDir, 'inventory_NWIS_sample') fileInvQ = os.path.join(invDir, 'inventory_NWIS_streamflow') siteC = usgs.readUsgsText(fileInvC) siteQ = usgs.readUsgsText(fileInvQ) tabGageII = gageII.readTab('bas_classif') # summarize count for all site fileCountC = os.path.join(invDir, 'count_NWIS_sample_all') if os.path.exists(fileCountC): tabC = pd.read_csv(fileCountC, dtype={'site_no': str}) else: codeLst = np.sort(siteC['parm_cd'].unique()).tolist() nSite = len(codeLst) dictTab = dict() t0 = time.time() for k, code in enumerate(codeLst): # screen out site with only one sample site = siteC.loc[(siteC['parm_cd'] == code) & (siteC['count_nu'] > 1)]
varFcLst = ['ppt', 'tmean'] nTest = 365 rho = 365 batchSize = 50 nEpoch = 100 hiddenSize = 64 nx = len(varFcLst) + 1 ny = len(varWqLst) # setup result folder saveFolder = os.path.join(workDir, 'singleSite', siteNo) if not os.path.exists(saveFolder): os.mkdir(saveFolder) # load sample data dfSample = usgs.readUsgsText(os.path.join(workDir, 'data', 'sample', siteNo), dataType='sample') tS = dfSample['datetime'].values tY = np.arange(tS[0].astype('datetime64[D]'), tS[-1].astype('datetime64[D]') + np.timedelta64(1, 'D'), np.timedelta64(1, 'D')) dataY = np.full([tY.size, len(varWqLst)], np.nan) for i, d in enumerate(tY): ind = np.where(tS.astype('datetime64[D]') == d)[0] for j, var in enumerate(varWqLst): dataY[i, j] = np.nanmean(dfSample[var].values[ind]) # load forcing and streamflow data dfDaily = usgs.readUsgsText(os.path.join(workDir, 'data', 'dailyTS', siteNo), dataType='dailyTS') dfForcing = pd.read_csv(os.path.join(workDir, 'data', 'forcing', siteNo)) tX = dfDaily['datetime'].values
from hydroDL.data import usgs import pandas as pd # read site inventory fileName = r'C:\Users\geofk\work\waterQuality\tsDaily\all-streamflow-site.txt' siteAll = usgs.readUsgsText(fileName) indLst = list() for ind, row in siteAll.iterrows(): strT = row['data_types_cd'] if strT[0] != 'N' and strT[2] != 'N': indLst.append(ind) siteWq = siteAll.iloc[indLst, :] tabM=siteWq['instruments_cd'].value_counts() with pd.option_context('display.max_rows', None, 'display.max_columns', None): print(tabM) # not right... siteAll.query('instruments_cd == "NNNYNNNNNNNYNNNNYNNNNNNNNNNNNN"')
workDir = os.path.join(kPath.dirData, 'USGS', 'inventory') fileInvC = os.path.join(workDir, 'inventory_NWIS_sample') fileInvQ = os.path.join(workDir, 'inventory_NWIS_streamflow') # look up sample for interested sample sites # see exploreSample.py for this file fileCountC = os.path.join(workDir, 'count_NWIS_sample_gageII') tabC = pd.read_csv(fileCountC, dtype={'site_no': str}).set_index('site_no') codeLst = waterQuality.codeLst tabSite = tabC[codeLst] # download C/Q data - this will download all elements siteNoLst = tabSite.index.tolist() errLst = list() tabState = pd.read_csv(os.path.join(workDir, 'fips_state_code.csv')) siteQ = usgs.readUsgsText(os.path.join(workDir, 'inventory_NWIS_streamflow')) t0 = time.time() for k, siteNo in enumerate(siteNoLst): try: stateCd = siteQ['state_cd'].loc[siteQ['site_no'] == siteNo].values[0] state = tabState['short'].loc[tabState['code'] == int( stateCd)].values[0] saveFile = os.path.join(kPath.dirData, 'USGS', 'streamflow', siteNo) if not os.path.exists(saveFile): usgs.downloadDaily(siteNo, ['00060'], state, saveFile) saveFile = os.path.join(kPath.dirData, 'USGS', 'sample', siteNo) if not os.path.exists(saveFile): usgs.downloadSample(siteNo, state, saveFile) except: errLst.append(siteNo) ns = len(siteNoLst)
import time import os import matplotlib.pyplot as plt # read site inventory workDir =os.path.join(kPath.dirData,'USGS','inventory') modelDir = os.path.join(workDir, 'modelUsgs2') fileInvC = os.path.join(workDir, 'inventory_NWIS_sample') fileInvQ = os.path.join(workDir, 'inventory_NWIS_streamflow') # look up sample for interested sample sites fileCountC = os.path.join(workDir, 'count_NWIS_sample') if os.path.exists(fileCountC): tabC = pd.read_csv(fileCountC, dtype={'site_no': str}) else: siteC = usgs.readUsgsText(fileInvC) codeLst = \ ['00915', '00925', '00930', '00935', '00955', '00940', '00945']+\ ['00418','00419','39086','39087']+\ ['00301','00300','00618','00681','00653']+\ ['00010','00530','00094']+\ ['00403','00408'] dictTab = dict() for code in codeLst: site = siteC.loc[(siteC['parm_cd'] == code) & (siteC['count_nu'] > 1)] temp = dict( zip(site['site_no'].tolist(), site['count_nu'].astype(int).tolist())) dictTab[code] = temp tabC = pd.DataFrame.from_dict(dictTab) tabC = tabC.rename_axis('site_no').reset_index()
# upgrade code to read flags and save CSV from hydroDL.data import usgs from hydroDL import kPath from hydroDL.app import waterQuality import os import pandas as pd pd.set_option('display.max_rows', 100) siteNo = '07060710' codeLst = usgs.codeLst startDate = pd.datetime(1979, 1, 1) fileC = os.path.join(kPath.dirData, 'USGS', 'sample', siteNo) dfC = usgs.readUsgsText(fileC, dataType='sample') if startDate is not None: dfC = dfC[dfC['date'] >= startDate] dfC = dfC.set_index('date') codeSel = list(set(codeLst) & set(dfC.columns.tolist())) codeSel_cd = [code + '_cd' for code in codeSel] dfC = dfC[codeSel + codeSel_cd].dropna(how='all') dfC1 = dfC[codeSel] dfC2 = dfC[codeSel_cd] dfC2[dfC1.notna().values & dfC2.isna().values] = 'x' dfC2 = dfC2.fillna('') bDup = dfC.index.duplicated(keep=False) indUni = dfC.index[~bDup] indDup = dfC.index[bDup].unique() indAll = dfC.index.unique() dfO1 = pd.DataFrame(index=indAll, columns=codeSel)
import ee ee.Initialize() import os import time import numpy as np import pandas as pd import datetime as dt from hydroDL.data import usgs, gee workDir = r'C:\Users\geofk\work\waterQuality' # siteNo = '04086120' siteNo = '053416972' # find out site dfSite = usgs.readUsgsText(os.path.join(workDir, 'inventory_NWIS_streamflow')) rowSite = dfSite.loc[dfSite['site_no'] == siteNo] lat = rowSite['dec_lat_va'].values[0] lon = rowSite['dec_long_va'].values[0] # not right somehow # sd = dt.datetime.strptime(rowSite['qw_begin_date'].values[0], '%Y-%m-%d') # ed = dt.datetime.strptime(rowSite['qw_end_date'].values[0], '%Y-%m-%d') dfDaily = usgs.readUsgsText(os.path.join(workDir, 'data', 'dailyTS', siteNo), dataType='dailyTS') sd = pd.to_datetime(dfDaily['datetime'].values[0]) ed = pd.to_datetime(dfDaily['datetime'].values[-1]) # (lon, lat) = (-92.4363056, 45.12002778) # (lon, lat) = (-88.4008333, 43.62444444) # (lon, lat) = (-88.4063056, 43.12002778)