def getObs(outName, testSet, DM=None): master = loadMaster(outName) sd = '1979-01-01' ed = '2020-01-01' if DM is None: DM = dbBasin.DataModelFull(master['dataName']) varTup = (master['varX'], master['varXC'], master['varY'], master['varYC']) dataTup = DM.extractData(varTup, testSet, sd, ed) yT, ycT = dataTup[2:] return yT, ycT
def testModel(outName, DM=None, testSet='all', ep=None, reTest=False, batchSize=20): # load master master = loadMaster(outName) if ep is None: ep = master['nEpoch'] outFolder = nameFolder(outName) testFileName = 'testP-{}-Ep{}.npz'.format(testSet, ep) testFile = os.path.join(outFolder, testFileName) if os.path.exists(testFile) and reTest is False: print('load saved test result') npz = np.load(testFile, allow_pickle=True) yP = npz['yP'] ycP = npz['ycP'] else: statTup = loadStat(outName) model = loadModel(outName, ep=ep) # load test data if DM is None: DM = dbBasin.DataModelFull(master['dataName']) varTup = (master['varX'], master['varXC'], master['varY'], master['varYC']) # test for full sequence for now sd = '1979-01-01' ed = '2020-01-01' dataTup = DM.extractData(varTup, testSet, sd, ed) dataTup = DM.transIn(dataTup, varTup, statTup=statTup) sizeLst = trainBasin.getSize(dataTup) if master['optNaN'] == [2, 2, 0, 0]: master['optNaN'] = [0, 0, 0, 0] dataTup = trainBasin.dealNaN(dataTup, master['optNaN']) x = dataTup[0] xc = dataTup[1] ny = sizeLst[2] # test model - point by point yOut, ycOut = trainBasin.testModel(model, x, xc, ny, batchSize=batchSize) yP = DM.transOut(yOut, statTup[2], master['varY']) ycP = DM.transOut(ycOut, statTup[3], master['varYC']) np.savez(testFile, yP=yP, ycP=ycP) return yP, ycP
from hydroDL.data import dbBasin from hydroDL.master import basinFull dataNameLst = ['bsWN5', 'bsDN5', 'brWN5', 'brDN5'] dataName = 'bsWN5' dm = dbBasin.DataModelFull(dataName) varX = dm.varF + ['runoff'] varY = usgs.newC varXC = dm.varG varYC = None sd = '1982-01-01' ed = '2009-12-31' outName = '{}-B10'.format(dataName) dictP = basinFull.wrapMaster(outName=outName, dataName=dataName, varX=varX, varY=varY, varXC=varXC, varYC=varYC, sd=sd, ed=ed, nEpoch=100, batchSize=[365, 100]) basinFull.trainModel(outName)
def trainModel(outName): outFolder = nameFolder(outName) dictP = loadMaster(outName) # load data DM = dbBasin.DataModelFull(dictP['dataName']) varTup = (dictP['varX'], dictP['varXC'], dictP['varY'], dictP['varYC']) dataTup = DM.extractData(varTup, dictP['subset'], dictP['sd'], dictP['ed']) if dictP['borrowStat'] is None: dataTup, statTup = DM.transIn(dataTup, varTup) else: statTup = loadStat(dictP['borrowStat']) dataTup = DM.transIn(dataTup, varTup, statTup=statTup) dataTup = trainBasin.dealNaN(dataTup, dictP['optNaN']) wrapStat(outName, statTup) # train model [nx, nxc, ny, nyc, nt, ns] = trainBasin.getSize(dataTup) # define loss lossFun = getattr(crit, dictP['crit'])() if dictP['crit'] == 'SigmaLoss': ny = ny * 2 nyc = nyc * 2 # define model if dictP['modelName'] == 'CudnnLSTM': model = rnn.CudnnLstmModel(nx=nx + nxc, ny=ny + nyc, hiddenSize=dictP['hiddenSize']) elif dictP['modelName'] == 'LstmModel': model = rnn.LstmModel(nx=nx + nxc, ny=ny + nyc, hiddenSize=dictP['hiddenSize']) else: raise RuntimeError('Model not specified') if torch.cuda.is_available(): lossFun = lossFun.cuda() model = model.cuda() if dictP['optim'] == 'AdaDelta': optim = torch.optim.Adadelta(model.parameters()) else: raise RuntimeError('optimizor function not specified') lossLst = list() nEp = dictP['nEpoch'] sEp = dictP['saveEpoch'] logFile = os.path.join(outFolder, 'log') if os.path.exists(logFile): os.remove(logFile) for k in range(0, nEp, sEp): model, optim, lossEp = trainBasin.trainModel( dataTup, model, lossFun, optim, batchSize=dictP['batchSize'], nEp=sEp, cEp=k, logFile=logFile) # save model saveModel(outName, k + sEp, model, optim=optim) lossLst = lossLst + lossEp lossFile = os.path.join(outFolder, 'loss.csv') pd.DataFrame(lossLst).to_csv(lossFile, index=False, header=False)