Пример #1
0
from hydroDL.data import usgs, gageII
import pandas as pd
import numpy as np
import time
import os
import matplotlib.pyplot as plt

# read site inventory
workDir = r'C:\Users\geofk\work\waterQuality'
modelDir = os.path.join(workDir, 'modelUsgs2')
fileInvC = os.path.join(workDir, 'inventory_NWIS_sample')
fileInvQ = os.path.join(workDir, 'inventory_NWIS_streamflow')

# look up sample for interested sample sites
siteC = usgs.readUsgsText(fileInvC)

codeLst = \
    ['00915', '00925', '00930', '00935', '00955', '00940', '00945']+\
    ['00418','00419','39086','39087']+\
    ['00301','00300','00618','00681','00653']+\
    ['00010','00530','00094']+\
    ['00403','00408']

codeSet1 = sorted(set(siteC['parm_cd'].astype(int)))
codeSet = sorted(set(siteC['parm_cd']))

aa = siteC['parm_cd'].astype(str)

aa = siteC.loc[(siteC['parm_cd'].astype(str) == '70')]
aa['site_no'].astype(str)
Пример #2
0
    c. in gageII database
"""

from hydroDL.data import usgs, gageII
from hydroDL import kPath
import pandas as pd
import numpy as np
import time
import os

# site inventory
usgsDir = os.path.join(kPath.dirData, 'USGS')
invDir = os.path.join(kPath.dirData, 'USGS', 'inventory')
fileInvC = os.path.join(invDir, 'inventory_NWIS_sample')
fileInvQ = os.path.join(invDir, 'inventory_NWIS_streamflow')
siteC = usgs.readUsgsText(fileInvC)
siteQ = usgs.readUsgsText(fileInvQ)
tabGageII = gageII.readTab('bas_classif')

# summarize count for all site
fileCountC = os.path.join(invDir, 'count_NWIS_sample_all')
if os.path.exists(fileCountC):
    tabC = pd.read_csv(fileCountC, dtype={'site_no': str})
else:
    codeLst = np.sort(siteC['parm_cd'].unique()).tolist()
    nSite = len(codeLst)
    dictTab = dict()
    t0 = time.time()
    for k, code in enumerate(codeLst):
        # screen out site with only one sample
        site = siteC.loc[(siteC['parm_cd'] == code) & (siteC['count_nu'] > 1)]
Пример #3
0
varFcLst = ['ppt', 'tmean']
nTest = 365
rho = 365
batchSize = 50
nEpoch = 100
hiddenSize = 64
nx = len(varFcLst) + 1
ny = len(varWqLst)

# setup result folder
saveFolder = os.path.join(workDir, 'singleSite', siteNo)
if not os.path.exists(saveFolder):
    os.mkdir(saveFolder)

# load sample data
dfSample = usgs.readUsgsText(os.path.join(workDir, 'data', 'sample', siteNo),
                             dataType='sample')
tS = dfSample['datetime'].values
tY = np.arange(tS[0].astype('datetime64[D]'),
               tS[-1].astype('datetime64[D]') + np.timedelta64(1, 'D'),
               np.timedelta64(1, 'D'))
dataY = np.full([tY.size, len(varWqLst)], np.nan)
for i, d in enumerate(tY):
    ind = np.where(tS.astype('datetime64[D]') == d)[0]
    for j, var in enumerate(varWqLst):
        dataY[i, j] = np.nanmean(dfSample[var].values[ind])

# load forcing and streamflow data
dfDaily = usgs.readUsgsText(os.path.join(workDir, 'data', 'dailyTS', siteNo),
                            dataType='dailyTS')
dfForcing = pd.read_csv(os.path.join(workDir, 'data', 'forcing', siteNo))
tX = dfDaily['datetime'].values
Пример #4
0
from hydroDL.data import usgs
import pandas as pd

# read site inventory
fileName = r'C:\Users\geofk\work\waterQuality\tsDaily\all-streamflow-site.txt'
siteAll = usgs.readUsgsText(fileName)

indLst = list()
for ind, row in siteAll.iterrows():
    strT = row['data_types_cd']
    if strT[0] != 'N' and strT[2] != 'N':
        indLst.append(ind)
siteWq = siteAll.iloc[indLst, :]
tabM=siteWq['instruments_cd'].value_counts()
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  
    print(tabM)

# not right...
siteAll.query('instruments_cd == "NNNYNNNNNNNYNNNNYNNNNNNNNNNNNN"')
Пример #5
0
workDir = os.path.join(kPath.dirData, 'USGS', 'inventory')
fileInvC = os.path.join(workDir, 'inventory_NWIS_sample')
fileInvQ = os.path.join(workDir, 'inventory_NWIS_streamflow')

# look up sample for interested sample sites
# see exploreSample.py for this file
fileCountC = os.path.join(workDir, 'count_NWIS_sample_gageII')
tabC = pd.read_csv(fileCountC, dtype={'site_no': str}).set_index('site_no')
codeLst = waterQuality.codeLst
tabSite = tabC[codeLst]

# download C/Q data - this will download all elements
siteNoLst = tabSite.index.tolist()
errLst = list()
tabState = pd.read_csv(os.path.join(workDir, 'fips_state_code.csv'))
siteQ = usgs.readUsgsText(os.path.join(workDir, 'inventory_NWIS_streamflow'))
t0 = time.time()
for k, siteNo in enumerate(siteNoLst):
    try:
        stateCd = siteQ['state_cd'].loc[siteQ['site_no'] == siteNo].values[0]
        state = tabState['short'].loc[tabState['code'] == int(
            stateCd)].values[0]
        saveFile = os.path.join(kPath.dirData, 'USGS', 'streamflow', siteNo)
        if not os.path.exists(saveFile):
            usgs.downloadDaily(siteNo, ['00060'], state, saveFile)
        saveFile = os.path.join(kPath.dirData, 'USGS', 'sample', siteNo)
        if not os.path.exists(saveFile):
            usgs.downloadSample(siteNo, state, saveFile)
    except:
        errLst.append(siteNo)
    ns = len(siteNoLst)
Пример #6
0
import time
import os
import matplotlib.pyplot as plt

# read site inventory
workDir =os.path.join(kPath.dirData,'USGS','inventory')
modelDir = os.path.join(workDir, 'modelUsgs2')
fileInvC = os.path.join(workDir, 'inventory_NWIS_sample')
fileInvQ = os.path.join(workDir, 'inventory_NWIS_streamflow')

# look up sample for interested sample sites
fileCountC = os.path.join(workDir, 'count_NWIS_sample')
if os.path.exists(fileCountC):
    tabC = pd.read_csv(fileCountC, dtype={'site_no': str})
else:
    siteC = usgs.readUsgsText(fileInvC)
    codeLst = \
        ['00915', '00925', '00930', '00935', '00955', '00940', '00945']+\
        ['00418','00419','39086','39087']+\
        ['00301','00300','00618','00681','00653']+\
        ['00010','00530','00094']+\
        ['00403','00408']
    dictTab = dict()
    for code in codeLst:
        site = siteC.loc[(siteC['parm_cd'] == code) & (siteC['count_nu'] > 1)]
        temp = dict(
            zip(site['site_no'].tolist(),
                site['count_nu'].astype(int).tolist()))
        dictTab[code] = temp
    tabC = pd.DataFrame.from_dict(dictTab)
    tabC = tabC.rename_axis('site_no').reset_index()
Пример #7
0
# upgrade code to read flags and save CSV
from hydroDL.data import usgs
from hydroDL import kPath
from hydroDL.app import waterQuality
import os
import pandas as pd

pd.set_option('display.max_rows', 100)

siteNo = '07060710'
codeLst = usgs.codeLst
startDate = pd.datetime(1979, 1, 1)

fileC = os.path.join(kPath.dirData, 'USGS', 'sample', siteNo)
dfC = usgs.readUsgsText(fileC, dataType='sample')
if startDate is not None:
    dfC = dfC[dfC['date'] >= startDate]
dfC = dfC.set_index('date')

codeSel = list(set(codeLst) & set(dfC.columns.tolist()))
codeSel_cd = [code + '_cd' for code in codeSel]
dfC = dfC[codeSel + codeSel_cd].dropna(how='all')
dfC1 = dfC[codeSel]
dfC2 = dfC[codeSel_cd]
dfC2[dfC1.notna().values & dfC2.isna().values] = 'x'
dfC2 = dfC2.fillna('')
bDup = dfC.index.duplicated(keep=False)
indUni = dfC.index[~bDup]
indDup = dfC.index[bDup].unique()
indAll = dfC.index.unique()
dfO1 = pd.DataFrame(index=indAll, columns=codeSel)
Пример #8
0
import ee

ee.Initialize()
import os
import time
import numpy as np
import pandas as pd
import datetime as dt
from hydroDL.data import usgs, gee

workDir = r'C:\Users\geofk\work\waterQuality'
# siteNo = '04086120'
siteNo = '053416972'

# find out site
dfSite = usgs.readUsgsText(os.path.join(workDir, 'inventory_NWIS_streamflow'))
rowSite = dfSite.loc[dfSite['site_no'] == siteNo]
lat = rowSite['dec_lat_va'].values[0]
lon = rowSite['dec_long_va'].values[0]
# not right somehow
# sd = dt.datetime.strptime(rowSite['qw_begin_date'].values[0], '%Y-%m-%d')
# ed = dt.datetime.strptime(rowSite['qw_end_date'].values[0], '%Y-%m-%d')
dfDaily = usgs.readUsgsText(os.path.join(workDir, 'data', 'dailyTS', siteNo),
                            dataType='dailyTS')
sd = pd.to_datetime(dfDaily['datetime'].values[0])
ed = pd.to_datetime(dfDaily['datetime'].values[-1])

# (lon, lat) = (-92.4363056, 45.12002778)
# (lon, lat) = (-88.4008333, 43.62444444)
# (lon, lat) = (-88.4063056, 43.12002778)