df0 = pd.read_csv(os.path.join(dirInv, 'codeCount.csv'), dtype={ 'siteNo': str }).set_index('siteNo') df1 = pd.read_csv(os.path.join(dirInv, 'codeCount_B2000.csv'), dtype={ 'siteNo': str }).set_index('siteNo') df2 = pd.read_csv(os.path.join(dirInv, 'codeCount_A2000.csv'), dtype={ 'siteNo': str }).set_index('siteNo') code = '00955' # silica num > 100 in both training and testing (named silica64) siteNoLst = df0[(df1[code] > 100) & (df2[code] > 100)].index.tolist() if not waterQuality.exist('Silica64'): wqData = waterQuality.DataModelWQ.new('Silica64', siteNoLst) wqData = waterQuality.DataModelWQ('Silica64') indYr1 = waterQuality.indYr(wqData.info, yrLst=[1979, 2000])[0] # wqData.saveSubset('Y8090', indYr1) indYr2 = waterQuality.indYr(wqData.info, yrLst=[2000, 2020])[0] # wqData.saveSubset('Y0010', indYr2) # subset only have silica ic = wqData.varC.index(code) indC = np.where(~np.isnan(wqData.c[:, ic]))[0] wqData.saveSubset(code, indC) indYr1 = waterQuality.indYr(wqData.info.iloc[indC], yrLst=[1979, 2000])[0] # wqData.saveSubset('{}-Y8090'.format(code), indYr1) indYr2 = waterQuality.indYr(wqData.info.iloc[indC], yrLst=[2000, 2020])[0] # wqData.saveSubset('{}-Y0010'.format(code), indYr2)
import torch import os import json import numpy as np import pandas as pd import matplotlib.pyplot as plt from hydroDL.model import rnn, crit, trainTS import time siteNo = '401733105392404' codeLst = ['00915', '00940', '00955'] # codeLst = ['00915', '00955'] nh = 256 batchSize = [365, 50] if not waterQuality.exist(siteNo): wqData = waterQuality.DataModelWQ.new(siteNo, [siteNo]) wqData = waterQuality.DataModelWQ(siteNo) varX = wqData.varF varXC = wqData.varG varY = [wqData.varQ[0]] varYC = codeLst varTup = (varX, varXC, varY, varYC) dataTup, statTup = wqData.transIn(varTup=varTup) dataTup = trainTS.dealNaN(dataTup, [1, 1, 0, 0]) sizeLst = trainTS.getSize(dataTup) [nx, nxc, ny, nyc, nt, ns] = sizeLst tabG = gageII.readData(varLst=varXC, siteNoLst=[siteNo]) tabG = gageII.updateCode(tabG) dfX = waterQuality.readSiteX(siteNo, varX, nFill=5)
dtype={'siteNo': str}).set_index('siteNo') df2 = pd.read_csv(os.path.join(dirInv, 'codeCount_A2000.csv'), dtype={'siteNo': str}).set_index('siteNo') # pick some sites codeLst = ['00300', '00915'] tempLst = list() for code in codeLst: temp = df0[(df1[code] > 100) & (df2[code] > 100)].index.tolist() # temp = df0[df0[code] > 200].index.tolist() tempLst.append(temp) siteNoLst = tempLst[0] for k in range(1, len(tempLst)): siteNoLst = list(set(siteNoLst).intersection(tempLst[k])) if not waterQuality.exist('CaO49'): wqData = waterQuality.DataModelWQ.new('CaO49', siteNoLst) indYr1 = waterQuality.indYr(wqData.info, yrLst=[1979, 2000])[0] wqData.saveSubset('Y8090', indYr1) indYr2 = waterQuality.indYr(wqData.info, yrLst=[2000, 2020])[0] wqData.saveSubset('Y0010', indYr2) # subset only have Ca and O ic = [wqData.varC.index(code) for code in codeLst] indC = np.where(~np.isnan(wqData.c[:, ic]))[0] wqData.saveSubset('CaO', indC) indYr1 = waterQuality.indYr(wqData.info.iloc[indC], yrLst=[1979, 2000])[0] wqData.saveSubset('CaO-Y8090', indYr1) indYr2 = waterQuality.indYr(wqData.info.iloc[indC], yrLst=[2000, 2020])[0] wqData.saveSubset('CaO-Y0010', indYr2)
from hydroDL.app import waterQuality from hydroDL.data import gageII from hydroDL.master import basins import pandas as pd import numpy as np import os import time # all gages fileSiteNo = os.path.join(kPath.dirData, 'USGS', 'inventory', 'siteNoLst-1979') siteNoLstAll = pd.read_csv(fileSiteNo, header=None, dtype=str)[0].tolist() dfHBN = pd.read_csv(os.path.join(kPath.dirData, 'USGS', 'inventory', 'HBN.csv'), dtype={ 'siteNo': str}).set_index('siteNo') siteNoHBN = [siteNo for siteNo in dfHBN.index.tolist() if siteNo in siteNoLstAll] # wrap up data caseName = 'HBN-30d' if waterQuality.exist(caseName): wqData = waterQuality.DataModelWQ(caseName) else: wqData = waterQuality.DataModelWQ.new(caseName, siteNoHBN, rho=30) ind1 = wqData.indByRatio(0.8) ind2 = wqData.indByRatio(0.2, first=False) wqData.saveSubset(['first80', 'last20'], [ind1, ind2]) basins.trainModelTS('HBN', 'first80', batchSize=[None, 500], saveName='HBN_opt1', optQ=1) a, b = [200, None]
# # select referenced basins tabSel = gageII.readData(varLst=['CLASS'], siteNoLst=siteNoLstAll) tabSel = gageII.updateCode(tabSel) siteNoRef = tabSel[tabSel['CLASS'] == 1].index.tolist() # shapefiles usgsDir = os.path.join(kPath.dirData, 'USGS') outShapeFile = os.path.join(usgsDir, 'basins', 'HBN.shp') # gageII.extractBasins(siteNoHBN, outShapeFile) usgsDir = os.path.join(kPath.dirData, 'USGS') outShapeFile = os.path.join(usgsDir, 'basins', 'basinRef.shp') gageII.extractBasins(siteNoRef, outShapeFile) # wrap up data if not waterQuality.exist('HBN'): wqData = waterQuality.DataModelWQ.new('HBN', siteNoHBN) if not waterQuality.exist('HBN-30d'): wqData = waterQuality.DataModelWQ.new('HBN-30d', siteNoHBN, rho=30) if not waterQuality.exist('HBN-5s'): wqData = waterQuality.DataModelWQ.new('HBN-5s', siteNoHBN[:5]) if not waterQuality.exist('HBN-5s-30d'): wqData = waterQuality.DataModelWQ.new('HBN-5s-30d', siteNoHBN[:5], rho=30) # wrap up data if not waterQuality.exist('HBN'): wqData = waterQuality.DataModelWQ.new('HBN', siteNoHBN) else: wqData = waterQuality.DataModelWQ('HBN') if 'first80-rm2' not in wqData.subset.keys(): ind = wqData.subset['first80']