import json import os import importlib from hydroDL.master import basinFull from hydroDL.app.waterQuality import WRTDS dataName = 'G200N' DF = dbBasin.DataFrameBasin(dataName) trainSet = 'rmR20' testSet = 'pkR20' # trainSet = 'B10' # testSet = 'A10' label = 'QFPRT2C' outName = '{}-{}-{}'.format(dataName, label, trainSet) yP1, ycP1 = basinFull.testModel(outName, DF=DF, testSet=testSet, ep=500) label = 'FPRT2C' outName = '{}-{}-{}'.format(dataName, label, trainSet) yP2, ycP2 = basinFull.testModel(outName, DF=DF, testSet=testSet, ep=500) # deal with mean and std codeLst = usgs.newC yOut1 = np.ndarray(yP1.shape) yOut2 = np.ndarray(yP2.shape) for k, code in enumerate(codeLst): m = DF.g[:, DF.varG.index(code + '-M')] s = DF.g[:, DF.varG.index(code + '-S')] yOut1[:, :, k] = yP1[:, :, k] * s + m yOut2[:, :, k] = yP2[:, :, k] * s + m # WRTDS
from hydroDL.data import usgs, gageII, gridMET, ntn, GLASS from hydroDL.master import slurm from hydroDL.data import dbBasin from hydroDL.master import basinFull if __name__ == '__main__': dataNameLst = ['G200Norm', 'G400Norm'] for dataName in dataNameLst: outName = dataName DF = dbBasin.DataFrameBasin(dataName) testSet = 'all' try: yP, ycP = basinFull.testModel(outName, DF=DF, testSet=testSet, ep=200, reTest=True) print('tested {}'.format(outName), flush=True) except: print('skiped {}'.format(outName), flush=True)
import matplotlib.pyplot as plt from hydroDL.post import axplot, figplot from hydroDL import kPath, utils import json import os import importlib from hydroDL.master import basinFull from hydroDL.app.waterQuality import WRTDS dataName = 'G400Norm' outName = dataName trainSet = 'rmRT20' testSet = 'pkRT20' DF = dbBasin.DataFrameBasin(outName) yP, ycP = basinFull.testModel(outName, DF=DF, testSet=testSet, ep=500) # deal with mean and std codeLst = usgs.newC yOut = np.ndarray(yP.shape) for k, code in enumerate(codeLst): m = DF.g[:, DF.varG.index(code + '-M')] s = DF.g[:, DF.varG.index(code + '-S')] data = yP[:, :, k] yOut[:, :, k] = data * s + m # WRTDS # yW = WRTDS.testWRTDS(dataName, trainSet, testSet, codeLst) dirRoot = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-dbBasin') fileName = '{}-{}-{}'.format(dataName, trainSet, 'all') yW = np.load(os.path.join(dirRoot, fileName) + '.npy')
import os import pandas as pd from hydroDL import kPath, utils import importlib import time import numpy as np from hydroDL.data import usgs, gageII, gridMET, ntn, transform caseLst = ['0801', '0802', '0803', '0804', '0805', '0902', '0903', '0904'] dataName = 'Q90ref' dm = dbBasin.DataModelFull(dataName) outName = '{}-B10'.format(dataName) yP, ycP = basinFull.testModel( outName, DM=dm, batchSize=20, testSet='all') yO, ycO = basinFull.getObs(outName, 'all', DM=dm) indT = np.where(dm.t == np.datetime64('2010-01-01'))[0][0] nash0 = utils.stat.calNash(yP[indT:, :, 0], yO[indT:, :, 0]) rmse0 = utils.stat.calRmse(yP[indT:, :, 0], yO[indT:, :, 0]) corr0 = utils.stat.calCorr(yP[indT:, :, 0], yO[indT:, :, 0]) bias0 = utils.stat.calBias(yP[indT:, :, 0], yO[indT:, :, 0]) nashLst = list() rmseLst = list() corrLst = list() biasLst = list() for case in caseLst: testSet = 'EcoB'+case
importlib.reload(dbBasin.dataModel) importlib.reload(dbBasin) importlib.reload(basinFull) dataName = 'sbY30N5' dm = dbBasin.DataModelFull(dataName) outName = '{}-B10'.format(dataName) master = basinFull.loadMaster(outName) varY = master['varY'] testSet = 'all' sd = '1979-01-01' ed = '2020-01-01' yP, ycP = basinFull.testModel(outName, DM=dm, batchSize=20, testSet=testSet, reTest=True) yO, ycO = basinFull.getObs(outName, testSet, DM=dm) indT = np.where(dm.t == np.datetime64('2010-01-01'))[0][0] importlib.reload(utils.stat) ic = 0 nash1 = utils.stat.calNash(yP[:indT, :, ic], yO[:indT, :, ic]) nash2 = utils.stat.calNash(yP[indT:, :, ic], yO[indT:, :, ic]) rmse1 = utils.stat.calRmse(yP[:indT, :, ic], yO[:indT, :, ic]) rmse2 = utils.stat.calRmse(yP[indT:, :, ic], yO[indT:, :, ic]) corr1 = utils.stat.calCorr(yP[:indT, :, ic], yO[:indT, :, ic]) corr2 = utils.stat.calCorr(yP[indT:, :, ic], yO[indT:, :, ic]) # plot box
dataName=dataName, trainSet=trainSet, varX=varX, varY=varY, varXC=varXC, varYC=varYC, nEpoch=100, batchSize=[rho, 200], nIterEp=20, mtdX=mtdX, mtdY=mtdY, mtdXC=mtdXC, mtdYC=mtdYC) basinFull.trainModel(outName) yP, ycP = basinFull.testModel(outName, DF=DF, testSet='all', reTest=True) yO = DF.extractT(codeSel) indT1, indT2, indS, mask = DF.readSubset(testSet) mask = np.repeat(mask, len(codeSel), axis=2) labelLst = list() for ic, code in enumerate(codeSel): shortName = usgs.codePdf.loc[code]['shortName'] temp = '{} {}'.format(code, shortName) labelLst.append(temp) # deal with mean and std yOut = np.ndarray(yP.shape) for k, code in enumerate(codeSel):
rho = 365 corrMat = np.ndarray([ns, nc, 2]) for k, label in enumerate(labelLst): outName = '{}-{}-t{}-B10'.format(dataName, label, rho) dm = dbBasin.DataModelFull(dataName) master = basinFull.loadMaster(outName) varY = master['varY'] testSet = 'all' sd = '1982-01-01' ed = '2018-12-31' ns = len(dm.siteNoLst) nc = len(varY) yP, ycP = basinFull.testModel(outName, DM=dm, batchSize=20, testSet=testSet, ep=100) yO, ycO = basinFull.getObs(outName, testSet, DM=dm) for ic in range(nc): indT = np.where(dm.t == np.datetime64('2010-01-01'))[0][0] corr1 = utils.stat.calCorr(yP[:indT, :, ic], yO[:indT, :, ic]) corr2 = utils.stat.calCorr(yP[indT:, :, ic], yO[indT:, :, ic]) corrMat[:, ic, k] = corr2 dataBox = list() for ic in range(nc): temp = [corrMat[:, ic, 0], corrMat[:, ic, 1]] dataBox.append(temp) labLst1 = [
freq=freq) yrIn = np.arange(1985, 2020, 5).tolist() tt = dbBasin.func.pickByYear(DF.t, yrIn, pick=False) DF.createSubset('B10', ed='2009-12-31') DF.createSubset('B10', sd='2010-01-01') codeSel = ['00915', '00925', '00930', '00935', '00940', '00945', '00955'] label = 'FPR2QC' varX = dbBasin.label2var(label.split('2')[0]) varY = codeSel varXC = gageII.varLst varYC = None sd = '1982-01-01' ed = '2009-12-31' rho = 365 outName = '{}-{}-t{}-B10'.format(dataName, label, rho) dictP = basinFull.wrapMaster(outName=outName, dataName=dataName, trainSet='B10', varX=varX, varY=varY, varXC=varXC, varYC=varYC, nEpoch=100, batchSize=[rho, 200], nIterEp=20) basinFull.trainModel(outName) yP, ycP = basinFull.testModel(outName, DF=DF, testSet='A10')
DF = dbBasin.DataFrameBasin(dataName) codeLst = usgs.newC siteNoLst = DF.siteNoLst d1 = dbBasin.DataModelBasin(DF, subset=trainSet, varY=codeLst) d2 = dbBasin.DataModelBasin(DF, subset=testSet, varY=codeLst) # selected sites dictSiteName = 'dict{}.json'.format(dataName[:4]) dirSel = os.path.join(kPath.dirData, 'USGS', 'inventory', 'siteSel') with open(os.path.join(dirSel, dictSiteName)) as f: dictSite = json.load(f) epLst = list(range(100, 800, 100)) corrMat = np.full([len(siteNoLst), len(codeLst), len(epLst)], np.nan) for iEp, ep in enumerate(epLst): yP, ycP = basinFull.testModel(outName, DF=DF, testSet='all', ep=ep) # deal with mean and std codeLst = usgs.newC yOut = np.ndarray(yP.shape) for indC, code in enumerate(codeLst): m = DF.g[:, DF.varG.index(code + '-M')] s = DF.g[:, DF.varG.index(code + '-S')] yOut[:, :, indC] = yP[:, :, indC] * s + m for indC, code in enumerate(codeLst): indS = [ siteNoLst.index(siteNo) for siteNo in dictSite[code] if siteNo in siteNoLst ] corr = utils.stat.calCorr(yOut[:, indS, indC], d2.Y[:, indS, indC]) corrMat[indS, indC, iEp] = corr