Пример #1
0
from hydroDL.data import gageII, usgs, gridMET
from hydroDL.master import basins

import pandas as pd
import numpy as np
import os
import time

caseLst = list()
dataName = 'Silica64Mess'
subsetLst = ['Y8090', 'Y0010']
codeLst = ['00955']
for subset in subsetLst:
    saveName = '{}-{}-opt1'.format(dataName, subset)
    caseName = basins.wrapMaster(dataName=dataName,
                                 trainName=subset,
                                 batchSize=[None, 200],
                                 outName=saveName)
    caseLst.append(caseName)
    saveName = '{}-{}-opt2'.format(dataName, subset)
    caseName = basins.wrapMaster(dataName=dataName,
                                 trainName=subset,
                                 batchSize=[None, 200],
                                 varY=None,
                                 varX=usgs.varQ + gridMET.varLst,
                                 outName=saveName)
    caseLst.append(caseName)
    # saveName = '{}-{}-opt3'.format(dataName, subset)
    # caseName = basins.wrapMaster(dataName=dataName, trainName=subset,
    #                              batchSize=[None, 200], varY=None, outName=saveName)
    # caseLst.append(caseName)
    # saveName = '{}-{}-opt4'.format(dataName, subset)
Пример #2
0
]

# ntn variables
dataName = 'sbWT'
caseLst = list()
wqData = waterQuality.DataModelWQ(dataName)
codeLst = varNtnUsgsLst
label = 'ntnSq'
for code in codeLst:
    varX = ['00060'] + gridMET.varLst + \
        [varNtnLst[varNtnUsgsLst.index(code)], 'distNTN']
    varY = [code]
    varYC = None
    subsetLst = ['{}-Y{}'.format(code, x) for x in [1, 2]]
    # wrap up
    # for subset in subsetLst:
    subset = subsetLst[0]
    saveName = '{}-{}-{}-{}'.format(dataName, code, label, subset)
    caseName = basins.wrapMaster(dataName=dataName,
                                 trainName=subset,
                                 batchSize=[None, 100],
                                 outName=saveName,
                                 varX=varX,
                                 varY=varY,
                                 varYC=varYC)
    caseLst.append(caseName)

cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}'
for caseName in caseLst:
    slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=24)
Пример #3
0
# wqData = waterQuality.DataModelWQ('Silica64')
# siteNoLst = wqData.siteNoLst
# if not waterQuality.exist('Silica64Seq'):
#     wqData = waterQuality2.DataModelWQ.new('Silica64Seq', siteNoLst)
# importlib.reload(waterQuality2)
# wqData = waterQuality2.DataModelWQ('Silica64Seq')
temp = waterQuality.DataModelWQ('Silica64')
siteNoLst = temp.siteNoLst
# wqData = waterQuality2.DataModelWQ.new('Silica64Seq', siteNoLst)

wqData = waterQuality2.DataModelWQ('Silica64Seq')

# subset only have silica
code = '00955'
ic = wqData.varQ.index(code)
indC = np.where(~np.isnan(wqData.q[-1,:, ic]))[0]
wqData.saveSubset(code, indC)
indYr1 = waterQuality.indYr(wqData.info.iloc[indC], yrLst=[1979, 2000])[0]
wqData.saveSubset('{}-Y8090'.format(code), indYr1)
indYr2 = waterQuality.indYr(wqData.info.iloc[indC], yrLst=[2000, 2020])[0]
wqData.saveSubset('{}-Y0010'.format(code), indYr2)

saveName = 'Silica64Seq-Y8090'
caseName = basins.wrapMaster(dataName='Silica64Seq', trainName='00955-Y8090',
                             batchSize=[None, 200], varY=['00060','00955'], varYC=None,
                             outName=saveName)


cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}'
slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=6)
Пример #4
0
indYrO, indYrE = waterQuality.indYrOddEven(wqData.info)
wqData.saveSubset('Yodd', indYrO)
wqData.saveSubset('Yeven', indYrE)
codeLst = ['00945', '00935']
# subsetLst = ['Yodd', 'Yeven']
subsetLst = ['Yodd']

varXC = ['DRAIN_SQKM', 'SNOW_PCT_PRECIP', 'STREAMS_KM_SQ_KM', 'PCT_1ST_ORDER',
         'BFI_AVE', 'CONTACT', 'FORESTNLCD06', 'HLR_BAS_DOM_100M', 'ELEV_MEAN_M_BASIN',
         'PERMAVE', 'WTDEPAVE', 'ROCKDEPAVE', 'SLOPE_PCT']
varX1 = ['00060', 'pr', 'sph', 'srad', 'tmmn', 'tmmx',
         'pet', 'etr', 'ph', 'Conduc', 'K', 'Cl']
varX2 = ['00060', 'pr', 'sph', 'srad', 'tmmn', 'tmmx',
         'pet', 'etr']
nEp = 200
caseLst = list()
for subset in subsetLst:
    saveName = '{}-{}-ntn'.format(dataName, subset)
    caseName = basins.wrapMaster(
        dataName=dataName, trainName=subset, batchSize=[None, 50], nEpoch=nEp,
        outName=saveName, varXC=varXC, varX=varX1, varYC=codeLst)
    caseLst.append(caseName)
    saveName = '{}-{}'.format(dataName, subset)
    caseName = basins.wrapMaster(
        dataName=dataName, trainName=subset, batchSize=[None, 50], nEpoch=nEp,
        outName=saveName, varXC=varXC, varX=varX2, varYC=codeLst)
    caseLst.append(caseName)

for caseName in caseLst:
    basins.trainModelTS(caseName)
Пример #5
0
from hydroDL.app import waterQuality
from hydroDL.master import slurm
from hydroDL.data import gageII, usgs, gridMET

# wqData = waterQuality.DataModelWQ('basinAll')
# ind1 = wqData.indByRatio(0.8)
# ind2 = wqData.indByRatio(0.8, first=False)
# wqData.saveSubset(['first80', 'last20'], [ind1, ind2])

# devide to 8090 and 0010
wqData = waterQuality.DataModelWQ('basinAll')
indYr1 = waterQuality.indYr(wqData.info, yrLst=[1979, 2000])[0]
wqData.saveSubset('Y8090', indYr1)
indYr2 = waterQuality.indYr(wqData.info, yrLst=[2000, 2020])[0]
wqData.saveSubset('Y0010', indYr2)

caseLst = list()
subsetLst = ['Y8090', 'Y0010']
for subset in subsetLst:
    saveName = 'basinAll-{}-opt1'.format(subset)
    caseName = basins.wrapMaster(dataName='basinAll',
                                 trainName=subset,
                                 saveEpoch=50,
                                 batchSize=[None, 2000],
                                 outName=saveName)
    caseLst.append(caseName)

cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}'
for caseName in caseLst:
    slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=48, nM=64)
Пример #6
0
from hydroDL import kPath
from hydroDL.app import waterQuality
from hydroDL.master import basins
from hydroDL.data import usgs, gageII, gridMET, ntn, gageII
import numpy as np
from hydroDL.master import slurm
import importlib
from hydroDL.model import rnn, crit, trainTS

dataName = 'test'
varQ = ['00060']
varP = ntn.varLst
varT = ['sinT', 'cosT']
varF = gridMET.varLst


varX = varQ+varF+varP
varXC = gageII.varLst
varY = varQ
varYC = usgs.newC

saveName = 'test'
trainSet = 'comb-A10'
outName = basins.wrapMaster(
    dataName=dataName, trainName=trainSet, batchSize=[None, 500],
    outName=saveName, varX=varX, varY=varY, varYC=varYC,
    crit='SigmaLoss',
    nEpoch=10, saveEpoch=10)

basins.trainModelTS(outName)
Пример #7
0
wqData = waterQuality.DataModelWQ(dataName)
indYrO, indYrE = waterQuality.indYrOddEven(wqData.info)
wqData.saveSubset('Yodd', indYrO)
wqData.saveSubset('Yeven', indYrE)

codeLst = usgs.varC
subsetLst = ['Yodd', 'Yeven']
varX1 = gridMET.varLst
varX2 = gridMET.varLst + ntn.varLst + ['distNTN']
nEp = 500
caseLst = list()
for subset in subsetLst:
    saveName = '{}-{}'.format(dataName, subset)
    caseName = basins.wrapMaster(dataName=dataName,
                                 trainName=subset,
                                 batchSize=[None, 200],
                                 nEpoch=nEp,
                                 outName=saveName,
                                 varX=varX1)
    caseLst.append(caseName)
    saveName = '{}-{}-ntn'.format(dataName, subset)
    caseName = basins.wrapMaster(dataName=dataName,
                                 trainName=subset,
                                 batchSize=[None, 200],
                                 nEpoch=nEp,
                                 outName=saveName,
                                 varX=varX2)
    caseLst.append(caseName)

cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}'
for caseName in caseLst:
    slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=24)
Пример #8
0
            elif label == 'P_C':
                varX = varP
                varY = None
            elif label == 'Q_C':
                varX = varQ
                varY = None
            elif label == 'QT_C':
                varX = varQ + varT
                varY = None
            elif label == 'QTFP_C':
                varX = varQ + varT + varF + varP
                varY = None
            trainSet = '{}-B10'.format(code)
            saveName = '{}-{}-{}-{}-hs{}'.format(dataName, code, label,
                                                 trainSet, hs)
            caseName = basins.wrapMaster(dataName=dataName,
                                         trainName=trainSet,
                                         batchSize=[None, 500],
                                         outName=saveName,
                                         varX=varX,
                                         varY=varY,
                                         varYC=varYC,
                                         hiddenSize=hs)
            caseLst.append(caseName)

cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}'
for caseName in caseLst:
    slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=24)

# basins.trainModelTS(caseName)
Пример #9
0
import pandas as pd
import numpy as np
import os
import time

caseLst = list()
dataName = 'Silica64'
subsetLst = ['00955-Y8090', '00955-Y0010']
codeLst = ['00955']
for subset in subsetLst:
    for hiddenSize in [256, 128, 64, 32]:
        saveName = '{}-{}-h{}-opt1'.format(dataName, subset, hiddenSize)
        caseName = basins.wrapMaster(dataName=dataName,
                                     trainName=subset,
                                     hiddenSize=hiddenSize,
                                     batchSize=[None, 200],
                                     outName=saveName)
        caseLst.append(caseName)
        # saveName = '{}-{}-opt2'.format(dataName, subset)
        # caseName = basins.wrapMaster(dataName=dataName, trainName=subset, hiddenSize=hiddenSize,
        #                              batchSize=[None, 200], varY=None,
        #                              varX=usgs.varQ+gridMET.varLst, outName=saveName)
        # caseLst.append(caseName)
    # saveName = '{}-{}-opt3'.format(dataName, subset)
    # caseName = basins.wrapMaster(dataName=dataName, trainName=subset,
    #                              batchSize=[None, 200], varY=None, outName=saveName)
    # caseLst.append(caseName)
    # saveName = '{}-{}-opt4'.format(dataName, subset)
    # caseName = basins.wrapMaster(dataName=dataName, trainName=subset,
    #                              batchSize=[None, 200], varYC=None, outName=saveName)
Пример #10
0
from hydroDL.master import slurm
from hydroDL import kPath
from hydroDL.app import waterQuality
from hydroDL.data import gageII, usgs, gridMET
from hydroDL.master import basins

dataName = 'Silica64'
subset = '00955-Y8090'
saveName = '{}-{}-AgeLSTM'.format(dataName, subset)
caseName = basins.wrapMaster(dataName=dataName,
                             trainName=subset,
                             hiddenSize=512,
                             batchSize=[None, 200],
                             outName=saveName,
                             modelName='AgeLSTM',
                             crit='RmseLoss2D')
basins.trainModelTS(caseName)
Пример #11
0
#             'eco0902-F50', 'nutr06-F50', 'nutr08-F50']
# for trainName in trainLst:
#     caseName = basins.wrapMaster(dataName='basinRef', trainName=trainName, batchSize=[
#                                  None, 1000], outName='basinRef-{}-opt1'.format(trainName))
#     slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=4)
# for trainName in trainLst:
#     caseName = basins.wrapMaster(dataName='basinRef', trainName=trainName, batchSize=[
#                                  None, 1000], outName='basinRef-{}-opt2'.format(trainName),
#                                  varX=usgs.varQ+gridMET.varLst, varY=None)
#     slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=4)

trainLst = ['pQ-F50', 'pQ-rmY10', 'pQ-rmY80']
for train in trainLst:
    caseName = basins.wrapMaster(dataName='basinRef',
                                 trainName=train,
                                 batchSize=[None, 1000],
                                 outName='basinRef-rq-{}'.format(train),
                                 varX=usgs.varQ + ['runoff'] + gridMET.varLst,
                                 varY=None)
    slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=24)

    caseName = basins.wrapMaster(dataName='basinRef',
                                 trainName=train,
                                 batchSize=[None, 1000],
                                 outName='basinRef-r-{}'.format(train),
                                 varX=usgs.varQ + ['runoff'] + gridMET.varLst,
                                 varY=None)
    slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=24)

    caseName = basins.wrapMaster(dataName='basinRef',
                                 trainName=train,
                                 batchSize=[None, 1000],
Пример #12
0
                                                                  2000])[0]
        wqData.saveSubset('-'.join(sorted(codeLst) + [lab, 'Y8090']), indYr1)
        indYr2 = waterQuality.indYr(wqData.info.iloc[ind], yrLst=[2000,
                                                                  2020])[0]
        wqData.saveSubset('-'.join(sorted(codeLst) + [lab, 'Y0010']), indYr2)

if 'training' in doLst:
    dataName = 'HBN5'
    codeLst = ['00618', '00955']
    trainset = '00618-00955-all-Y8090'
    testset = '00618-00955-all-Y0010'
    out = 'HBN5-00618-00955-all-Y8090'
    wqData = waterQuality.DataModelWQ(dataName)
    masterName = basins.wrapMaster(dataName='HBN5',
                                   trainName=trainset,
                                   batchSize=[None, 100],
                                   outName=out,
                                   varYC=codeLst,
                                   nEpoch=100)
    basins.trainModelTS(masterName)

# sequence testing
dataName = 'HBN'
outName = 'HBN-00618-00955-all-Y8090-opt2'
trainset = '00618-00955-all-Y8090'
testset = '00618-00955-all-Y0010'

wqData = waterQuality.DataModelWQ(dataName)

# point testing
yP, ycP = basins.testModel(outName, testset, wqData=wqData)
Пример #13
0
            varY = None
        elif label == 'P_C':
            varX = varP
            varY = None
        elif label == 'Q_C':
            varX = varQ
            varY = None
        elif label == 'QT_C':
            varX = varQ + varT
            varY = None
        elif label == 'QTFP_C':
            varX = varQ + varT + varF + varP
            varY = None
        trainSet = '{}-B10'.format(code)
        saveName = '{}-{}-{}-{}-neck'.format(dataName, code, label, trainSet)
        caseName = basins.wrapMaster(dataName=dataName,
                                     trainName=trainSet,
                                     batchSize=[None, 500],
                                     outName=saveName,
                                     varX=varX,
                                     varY=varY,
                                     varYC=varYC,
                                     modelName='LstmModel')
        caseLst.append(caseName)

cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}'
for caseName in caseLst:
    slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=24)

# basins.trainModelTS(caseName)