from hydroDL.data import gageII, usgs, gridMET from hydroDL.master import basins import pandas as pd import numpy as np import os import time caseLst = list() dataName = 'Silica64Mess' subsetLst = ['Y8090', 'Y0010'] codeLst = ['00955'] for subset in subsetLst: saveName = '{}-{}-opt1'.format(dataName, subset) caseName = basins.wrapMaster(dataName=dataName, trainName=subset, batchSize=[None, 200], outName=saveName) caseLst.append(caseName) saveName = '{}-{}-opt2'.format(dataName, subset) caseName = basins.wrapMaster(dataName=dataName, trainName=subset, batchSize=[None, 200], varY=None, varX=usgs.varQ + gridMET.varLst, outName=saveName) caseLst.append(caseName) # saveName = '{}-{}-opt3'.format(dataName, subset) # caseName = basins.wrapMaster(dataName=dataName, trainName=subset, # batchSize=[None, 200], varY=None, outName=saveName) # caseLst.append(caseName) # saveName = '{}-{}-opt4'.format(dataName, subset)
] # ntn variables dataName = 'sbWT' caseLst = list() wqData = waterQuality.DataModelWQ(dataName) codeLst = varNtnUsgsLst label = 'ntnSq' for code in codeLst: varX = ['00060'] + gridMET.varLst + \ [varNtnLst[varNtnUsgsLst.index(code)], 'distNTN'] varY = [code] varYC = None subsetLst = ['{}-Y{}'.format(code, x) for x in [1, 2]] # wrap up # for subset in subsetLst: subset = subsetLst[0] saveName = '{}-{}-{}-{}'.format(dataName, code, label, subset) caseName = basins.wrapMaster(dataName=dataName, trainName=subset, batchSize=[None, 100], outName=saveName, varX=varX, varY=varY, varYC=varYC) caseLst.append(caseName) cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}' for caseName in caseLst: slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=24)
# wqData = waterQuality.DataModelWQ('Silica64') # siteNoLst = wqData.siteNoLst # if not waterQuality.exist('Silica64Seq'): # wqData = waterQuality2.DataModelWQ.new('Silica64Seq', siteNoLst) # importlib.reload(waterQuality2) # wqData = waterQuality2.DataModelWQ('Silica64Seq') temp = waterQuality.DataModelWQ('Silica64') siteNoLst = temp.siteNoLst # wqData = waterQuality2.DataModelWQ.new('Silica64Seq', siteNoLst) wqData = waterQuality2.DataModelWQ('Silica64Seq') # subset only have silica code = '00955' ic = wqData.varQ.index(code) indC = np.where(~np.isnan(wqData.q[-1,:, ic]))[0] wqData.saveSubset(code, indC) indYr1 = waterQuality.indYr(wqData.info.iloc[indC], yrLst=[1979, 2000])[0] wqData.saveSubset('{}-Y8090'.format(code), indYr1) indYr2 = waterQuality.indYr(wqData.info.iloc[indC], yrLst=[2000, 2020])[0] wqData.saveSubset('{}-Y0010'.format(code), indYr2) saveName = 'Silica64Seq-Y8090' caseName = basins.wrapMaster(dataName='Silica64Seq', trainName='00955-Y8090', batchSize=[None, 200], varY=['00060','00955'], varYC=None, outName=saveName) cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}' slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=6)
indYrO, indYrE = waterQuality.indYrOddEven(wqData.info) wqData.saveSubset('Yodd', indYrO) wqData.saveSubset('Yeven', indYrE) codeLst = ['00945', '00935'] # subsetLst = ['Yodd', 'Yeven'] subsetLst = ['Yodd'] varXC = ['DRAIN_SQKM', 'SNOW_PCT_PRECIP', 'STREAMS_KM_SQ_KM', 'PCT_1ST_ORDER', 'BFI_AVE', 'CONTACT', 'FORESTNLCD06', 'HLR_BAS_DOM_100M', 'ELEV_MEAN_M_BASIN', 'PERMAVE', 'WTDEPAVE', 'ROCKDEPAVE', 'SLOPE_PCT'] varX1 = ['00060', 'pr', 'sph', 'srad', 'tmmn', 'tmmx', 'pet', 'etr', 'ph', 'Conduc', 'K', 'Cl'] varX2 = ['00060', 'pr', 'sph', 'srad', 'tmmn', 'tmmx', 'pet', 'etr'] nEp = 200 caseLst = list() for subset in subsetLst: saveName = '{}-{}-ntn'.format(dataName, subset) caseName = basins.wrapMaster( dataName=dataName, trainName=subset, batchSize=[None, 50], nEpoch=nEp, outName=saveName, varXC=varXC, varX=varX1, varYC=codeLst) caseLst.append(caseName) saveName = '{}-{}'.format(dataName, subset) caseName = basins.wrapMaster( dataName=dataName, trainName=subset, batchSize=[None, 50], nEpoch=nEp, outName=saveName, varXC=varXC, varX=varX2, varYC=codeLst) caseLst.append(caseName) for caseName in caseLst: basins.trainModelTS(caseName)
from hydroDL.app import waterQuality from hydroDL.master import slurm from hydroDL.data import gageII, usgs, gridMET # wqData = waterQuality.DataModelWQ('basinAll') # ind1 = wqData.indByRatio(0.8) # ind2 = wqData.indByRatio(0.8, first=False) # wqData.saveSubset(['first80', 'last20'], [ind1, ind2]) # devide to 8090 and 0010 wqData = waterQuality.DataModelWQ('basinAll') indYr1 = waterQuality.indYr(wqData.info, yrLst=[1979, 2000])[0] wqData.saveSubset('Y8090', indYr1) indYr2 = waterQuality.indYr(wqData.info, yrLst=[2000, 2020])[0] wqData.saveSubset('Y0010', indYr2) caseLst = list() subsetLst = ['Y8090', 'Y0010'] for subset in subsetLst: saveName = 'basinAll-{}-opt1'.format(subset) caseName = basins.wrapMaster(dataName='basinAll', trainName=subset, saveEpoch=50, batchSize=[None, 2000], outName=saveName) caseLst.append(caseName) cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}' for caseName in caseLst: slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=48, nM=64)
from hydroDL import kPath from hydroDL.app import waterQuality from hydroDL.master import basins from hydroDL.data import usgs, gageII, gridMET, ntn, gageII import numpy as np from hydroDL.master import slurm import importlib from hydroDL.model import rnn, crit, trainTS dataName = 'test' varQ = ['00060'] varP = ntn.varLst varT = ['sinT', 'cosT'] varF = gridMET.varLst varX = varQ+varF+varP varXC = gageII.varLst varY = varQ varYC = usgs.newC saveName = 'test' trainSet = 'comb-A10' outName = basins.wrapMaster( dataName=dataName, trainName=trainSet, batchSize=[None, 500], outName=saveName, varX=varX, varY=varY, varYC=varYC, crit='SigmaLoss', nEpoch=10, saveEpoch=10) basins.trainModelTS(outName)
wqData = waterQuality.DataModelWQ(dataName) indYrO, indYrE = waterQuality.indYrOddEven(wqData.info) wqData.saveSubset('Yodd', indYrO) wqData.saveSubset('Yeven', indYrE) codeLst = usgs.varC subsetLst = ['Yodd', 'Yeven'] varX1 = gridMET.varLst varX2 = gridMET.varLst + ntn.varLst + ['distNTN'] nEp = 500 caseLst = list() for subset in subsetLst: saveName = '{}-{}'.format(dataName, subset) caseName = basins.wrapMaster(dataName=dataName, trainName=subset, batchSize=[None, 200], nEpoch=nEp, outName=saveName, varX=varX1) caseLst.append(caseName) saveName = '{}-{}-ntn'.format(dataName, subset) caseName = basins.wrapMaster(dataName=dataName, trainName=subset, batchSize=[None, 200], nEpoch=nEp, outName=saveName, varX=varX2) caseLst.append(caseName) cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}' for caseName in caseLst: slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=24)
elif label == 'P_C': varX = varP varY = None elif label == 'Q_C': varX = varQ varY = None elif label == 'QT_C': varX = varQ + varT varY = None elif label == 'QTFP_C': varX = varQ + varT + varF + varP varY = None trainSet = '{}-B10'.format(code) saveName = '{}-{}-{}-{}-hs{}'.format(dataName, code, label, trainSet, hs) caseName = basins.wrapMaster(dataName=dataName, trainName=trainSet, batchSize=[None, 500], outName=saveName, varX=varX, varY=varY, varYC=varYC, hiddenSize=hs) caseLst.append(caseName) cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}' for caseName in caseLst: slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=24) # basins.trainModelTS(caseName)
import pandas as pd import numpy as np import os import time caseLst = list() dataName = 'Silica64' subsetLst = ['00955-Y8090', '00955-Y0010'] codeLst = ['00955'] for subset in subsetLst: for hiddenSize in [256, 128, 64, 32]: saveName = '{}-{}-h{}-opt1'.format(dataName, subset, hiddenSize) caseName = basins.wrapMaster(dataName=dataName, trainName=subset, hiddenSize=hiddenSize, batchSize=[None, 200], outName=saveName) caseLst.append(caseName) # saveName = '{}-{}-opt2'.format(dataName, subset) # caseName = basins.wrapMaster(dataName=dataName, trainName=subset, hiddenSize=hiddenSize, # batchSize=[None, 200], varY=None, # varX=usgs.varQ+gridMET.varLst, outName=saveName) # caseLst.append(caseName) # saveName = '{}-{}-opt3'.format(dataName, subset) # caseName = basins.wrapMaster(dataName=dataName, trainName=subset, # batchSize=[None, 200], varY=None, outName=saveName) # caseLst.append(caseName) # saveName = '{}-{}-opt4'.format(dataName, subset) # caseName = basins.wrapMaster(dataName=dataName, trainName=subset, # batchSize=[None, 200], varYC=None, outName=saveName)
from hydroDL.master import slurm from hydroDL import kPath from hydroDL.app import waterQuality from hydroDL.data import gageII, usgs, gridMET from hydroDL.master import basins dataName = 'Silica64' subset = '00955-Y8090' saveName = '{}-{}-AgeLSTM'.format(dataName, subset) caseName = basins.wrapMaster(dataName=dataName, trainName=subset, hiddenSize=512, batchSize=[None, 200], outName=saveName, modelName='AgeLSTM', crit='RmseLoss2D') basins.trainModelTS(caseName)
# 'eco0902-F50', 'nutr06-F50', 'nutr08-F50'] # for trainName in trainLst: # caseName = basins.wrapMaster(dataName='basinRef', trainName=trainName, batchSize=[ # None, 1000], outName='basinRef-{}-opt1'.format(trainName)) # slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=4) # for trainName in trainLst: # caseName = basins.wrapMaster(dataName='basinRef', trainName=trainName, batchSize=[ # None, 1000], outName='basinRef-{}-opt2'.format(trainName), # varX=usgs.varQ+gridMET.varLst, varY=None) # slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=4) trainLst = ['pQ-F50', 'pQ-rmY10', 'pQ-rmY80'] for train in trainLst: caseName = basins.wrapMaster(dataName='basinRef', trainName=train, batchSize=[None, 1000], outName='basinRef-rq-{}'.format(train), varX=usgs.varQ + ['runoff'] + gridMET.varLst, varY=None) slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=24) caseName = basins.wrapMaster(dataName='basinRef', trainName=train, batchSize=[None, 1000], outName='basinRef-r-{}'.format(train), varX=usgs.varQ + ['runoff'] + gridMET.varLst, varY=None) slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=24) caseName = basins.wrapMaster(dataName='basinRef', trainName=train, batchSize=[None, 1000],
2000])[0] wqData.saveSubset('-'.join(sorted(codeLst) + [lab, 'Y8090']), indYr1) indYr2 = waterQuality.indYr(wqData.info.iloc[ind], yrLst=[2000, 2020])[0] wqData.saveSubset('-'.join(sorted(codeLst) + [lab, 'Y0010']), indYr2) if 'training' in doLst: dataName = 'HBN5' codeLst = ['00618', '00955'] trainset = '00618-00955-all-Y8090' testset = '00618-00955-all-Y0010' out = 'HBN5-00618-00955-all-Y8090' wqData = waterQuality.DataModelWQ(dataName) masterName = basins.wrapMaster(dataName='HBN5', trainName=trainset, batchSize=[None, 100], outName=out, varYC=codeLst, nEpoch=100) basins.trainModelTS(masterName) # sequence testing dataName = 'HBN' outName = 'HBN-00618-00955-all-Y8090-opt2' trainset = '00618-00955-all-Y8090' testset = '00618-00955-all-Y0010' wqData = waterQuality.DataModelWQ(dataName) # point testing yP, ycP = basins.testModel(outName, testset, wqData=wqData)
varY = None elif label == 'P_C': varX = varP varY = None elif label == 'Q_C': varX = varQ varY = None elif label == 'QT_C': varX = varQ + varT varY = None elif label == 'QTFP_C': varX = varQ + varT + varF + varP varY = None trainSet = '{}-B10'.format(code) saveName = '{}-{}-{}-{}-neck'.format(dataName, code, label, trainSet) caseName = basins.wrapMaster(dataName=dataName, trainName=trainSet, batchSize=[None, 500], outName=saveName, varX=varX, varY=varY, varYC=varYC, modelName='LstmModel') caseLst.append(caseName) cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}' for caseName in caseLst: slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=24) # basins.trainModelTS(caseName)