Пример #1
0
def getObs(outName, testSet, DM=None):
    master = loadMaster(outName)
    sd = '1979-01-01'
    ed = '2020-01-01'
    if DM is None:
        DM = dbBasin.DataModelFull(master['dataName'])
    varTup = (master['varX'], master['varXC'], master['varY'], master['varYC'])
    dataTup = DM.extractData(varTup, testSet, sd, ed)
    yT, ycT = dataTup[2:]
    return yT, ycT
Пример #2
0
def testModel(outName,
              DM=None,
              testSet='all',
              ep=None,
              reTest=False,
              batchSize=20):
    # load master
    master = loadMaster(outName)
    if ep is None:
        ep = master['nEpoch']
    outFolder = nameFolder(outName)
    testFileName = 'testP-{}-Ep{}.npz'.format(testSet, ep)
    testFile = os.path.join(outFolder, testFileName)

    if os.path.exists(testFile) and reTest is False:
        print('load saved test result')
        npz = np.load(testFile, allow_pickle=True)
        yP = npz['yP']
        ycP = npz['ycP']
    else:
        statTup = loadStat(outName)
        model = loadModel(outName, ep=ep)
        # load test data
        if DM is None:
            DM = dbBasin.DataModelFull(master['dataName'])
        varTup = (master['varX'], master['varXC'], master['varY'],
                  master['varYC'])
        # test for full sequence for now
        sd = '1979-01-01'
        ed = '2020-01-01'
        dataTup = DM.extractData(varTup, testSet, sd, ed)
        dataTup = DM.transIn(dataTup, varTup, statTup=statTup)
        sizeLst = trainBasin.getSize(dataTup)
        if master['optNaN'] == [2, 2, 0, 0]:
            master['optNaN'] = [0, 0, 0, 0]
        dataTup = trainBasin.dealNaN(dataTup, master['optNaN'])
        x = dataTup[0]
        xc = dataTup[1]
        ny = sizeLst[2]
        # test model - point by point
        yOut, ycOut = trainBasin.testModel(model,
                                           x,
                                           xc,
                                           ny,
                                           batchSize=batchSize)
        yP = DM.transOut(yOut, statTup[2], master['varY'])
        ycP = DM.transOut(ycOut, statTup[3], master['varYC'])
        np.savez(testFile, yP=yP, ycP=ycP)
    return yP, ycP
Пример #3
0
from hydroDL.data import dbBasin
from hydroDL.master import basinFull

dataNameLst = ['bsWN5', 'bsDN5', 'brWN5', 'brDN5']

dataName = 'bsWN5'

dm = dbBasin.DataModelFull(dataName)

varX = dm.varF + ['runoff']
varY = usgs.newC
varXC = dm.varG
varYC = None
sd = '1982-01-01'
ed = '2009-12-31'

outName = '{}-B10'.format(dataName)
dictP = basinFull.wrapMaster(outName=outName,
                             dataName=dataName,
                             varX=varX,
                             varY=varY,
                             varXC=varXC,
                             varYC=varYC,
                             sd=sd,
                             ed=ed,
                             nEpoch=100,
                             batchSize=[365, 100])

basinFull.trainModel(outName)
Пример #4
0
def trainModel(outName):
    outFolder = nameFolder(outName)
    dictP = loadMaster(outName)

    # load data
    DM = dbBasin.DataModelFull(dictP['dataName'])
    varTup = (dictP['varX'], dictP['varXC'], dictP['varY'], dictP['varYC'])
    dataTup = DM.extractData(varTup, dictP['subset'], dictP['sd'], dictP['ed'])
    if dictP['borrowStat'] is None:
        dataTup, statTup = DM.transIn(dataTup, varTup)
    else:
        statTup = loadStat(dictP['borrowStat'])
        dataTup = DM.transIn(dataTup, varTup, statTup=statTup)
    dataTup = trainBasin.dealNaN(dataTup, dictP['optNaN'])
    wrapStat(outName, statTup)

    # train model
    [nx, nxc, ny, nyc, nt, ns] = trainBasin.getSize(dataTup)
    # define loss
    lossFun = getattr(crit, dictP['crit'])()
    if dictP['crit'] == 'SigmaLoss':
        ny = ny * 2
        nyc = nyc * 2
    # define model
    if dictP['modelName'] == 'CudnnLSTM':
        model = rnn.CudnnLstmModel(nx=nx + nxc,
                                   ny=ny + nyc,
                                   hiddenSize=dictP['hiddenSize'])
    elif dictP['modelName'] == 'LstmModel':
        model = rnn.LstmModel(nx=nx + nxc,
                              ny=ny + nyc,
                              hiddenSize=dictP['hiddenSize'])
    else:
        raise RuntimeError('Model not specified')

    if torch.cuda.is_available():
        lossFun = lossFun.cuda()
        model = model.cuda()

    if dictP['optim'] == 'AdaDelta':
        optim = torch.optim.Adadelta(model.parameters())
    else:
        raise RuntimeError('optimizor function not specified')

    lossLst = list()
    nEp = dictP['nEpoch']
    sEp = dictP['saveEpoch']
    logFile = os.path.join(outFolder, 'log')
    if os.path.exists(logFile):
        os.remove(logFile)
    for k in range(0, nEp, sEp):
        model, optim, lossEp = trainBasin.trainModel(
            dataTup,
            model,
            lossFun,
            optim,
            batchSize=dictP['batchSize'],
            nEp=sEp,
            cEp=k,
            logFile=logFile)
        # save model
        saveModel(outName, k + sEp, model, optim=optim)
        lossLst = lossLst + lossEp

    lossFile = os.path.join(outFolder, 'loss.csv')
    pd.DataFrame(lossLst).to_csv(lossFile, index=False, header=False)