from hydroDL import pathSMAP, master, utils from hydroDL.master import default from hydroDL.post import plot, stat import os import matplotlib.pyplot as plt import numpy as np import torch # train optData = default.update(default.optDataSMAP, rootDB=pathSMAP['DB_L3_NA'], subset='CONUSv4f1', tRange=[20150402, 20180401], daObs=1) optModel = default.optLstmClose optLoss = default.optLossRMSE optTrain = default.update(default.optTrainSMAP, nEpoch=500) out = os.path.join(pathSMAP['Out_L3_NA'], 'DA', 'CONUS_DA_3yr') masterDict = master.wrapMaster(out, optData, optModel, optLoss, optTrain) master.runTrain(masterDict, cudaID=2, screen='DA') # test subset = 'CONUS' tRange = [20150402, 20180401] # out = os.path.join(pathSMAP['Out_L3_NA'], 'DA', 'CONUSv2f1_DA2015') out = os.path.join(pathSMAP['Out_L3_NA'], 'DA', 'CONUS_DA_3yr') df, yf, obs = master.test(out, tRange=tRange, subset=subset, batchSize=100) # /home/kxf227/work/GitHUB/hydroDL-dev/app/closeLoop/fullCONUS.py
) # initialize three camels module-scope variables in camels.py: dirDB, gageDict, statDict rootOut = os.path.join( os.path.sep, "data", "rnnStreamflow" ) # Root directory to save training results: /data/rnnStreamflow rootOut = "./output/streamflow/" # Root directory to save training results: /data/rnnStreamflow # Define all the configurations into dictionary variables # three purposes using these dictionaries. 1. saved as configuration logging file. 2. for future testing. 3. can also # be used to directly train the model when interfaceOpt == 0 # define dataset # default module stores default configurations, using update to change the config optData = default.optDataCamels optData = default.update(optData, varT=camels.forcingLst, varC=camels.attrLstSel, tRange=Ttrain) # Update the training period if (interfaceOpt == 1) and (2 not in Action): # load training data explicitly for the interpretable interface. Notice: if you want to apply our codes to your own # dataset, here is the place you can replace data. # read data from original CAMELS dataset # df: CAMELS dataframe; x: forcings[nb,nt,nx]; y: streamflow obs[nb,nt,ny]; c:attributes[nb,nc] # nb: number of basins, nt: number of time steps (in Ttrain), nx: number of time-dependent forcing variables # ny: number of target variables, nc: number of constant attributes df = camels.DataframeCamels(subset=optData["subset"], tRange=optData["tRange"]) x = df.getDataTs(varLst=optData["varT"], doNorm=False, rmNan=False) y = df.getDataObs(doNorm=False, rmNan=False, basinnorm=False) # transform discharge from ft3/s to mm/day and then divided by mean precip to be dimensionless. # output = discharge/(area*mean_precip)
# optData = default.update( # default.optDataSMAP, # rootDB=pathSMAP['DB_L3_NA'], # subset='CONUSv2f1', # tRange=tLst[k]) # optModel = default.optLstm # optLoss = default.optLossRMSE # optTrain = default.update(default.optTrainSMAP, nEpoch=300) # out = os.path.join(pathSMAP['Out_L3_NA'], 'DA', 'CONUSv2f1_LSTM'+yrLst[k]) # masterDict = master.wrapMaster(out, optData, optModel, optLoss, optTrain) # master.runTrain(masterDict, cudaID=k % 3, screen='LSTM' + yrLst[k]) # k=0 optData = default.update(default.optDataSMAP, rootDB=pathSMAP['DB_L3_NA'], subset='CONUSv2f1', tRange=tLst[k]) optModel = default.update(default.optLstmClose, name='hydroDL.model.rnn.AnnModel') optLoss = default.optLossRMSE optTrain = default.update(default.optTrainSMAP, nEpoch=300) out = os.path.join(pathSMAP['Out_L3_NA'], 'DA', 'CONUSv2f1_NN' + yrLst[k]) masterDict = master.wrapMaster(out, optData, optModel, optLoss, optTrain) master.runTrain(masterDict, cudaID=k % 3, screen='NN' + yrLst[k]) # master.train(masterDict) optData = default.update(default.optDataSMAP, rootDB=pathSMAP['DB_L3_NA'], subset='CONUSv2f1', tRange=tLst[k], daObs=1)
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--rid', dest='regionId', type=int) args = parser.parse_args() regionId = args.regionId # k = [7, 8, 13] regionId = 7 for k in range(len(subsetLst)): kc = regionId - 1 if k != kc: outName = 'ecoRegion{:02d}{:02d}_v2f1'.format(regionId, k + 1) + '_Forcing' varLst = dbCsv.varForcing optData = default.update(default.optDataSMAP, rootDB=pathSMAP['DB_L3_NA'], tRange=[20150401, 20160401], varT=varLst) optData = default.forceUpdate(optData, subset=[subsetLst[kc], subsetLst[k]]) optModel = default.optLstm optLoss = default.optLossRMSE optTrain = default.optTrainSMAP out = os.path.join(pathSMAP['Out_L3_NA'], 'ecoRegion', outName) masterDict = wrapMaster(out, optData, optModel, optLoss, optTrain) train(masterDict) ''' source /home/kxf227/anaconda3/bin/activate conda activate pytorch CUDA_VISIBLE_DEVICES=0 python /home/kxf227/work/GitHUB/hydroDL-dev/app/region/trainEcoComb.py --rid 7 CUDA_VISIBLE_DEVICES=1 python /home/kxf227/work/GitHUB/hydroDL-dev/app/region/trainEcoComb.py --rid 8
cid = 0 # Hyperparameters EPOCH = 200 BATCH_SIZE = 100 RHO = 365 HIDDENSIZE = 256 # Ttrain=[19900101, 19950101] # define directory to save results exp_name = 'parameter_optim' exp_disp = 'change_basinnorm' # train default model optData = default.optDataCamels optModel = default.update(default.optLstm, hiddenSize=HIDDENSIZE) optLoss = default.optLoss optTrain = default.update(default.optTrainCamels, miniBatch=[BATCH_SIZE, RHO], nEpoch=EPOCH) save_path = exp_name + '/' + exp_disp + \ '/epochs{}_batch{}_rho{}_hiddensize{}'.format(optTrain['nEpoch'],optTrain['miniBatch'][0], optTrain['miniBatch'][1],optModel['hiddenSize']) out = os.path.join(pathCamels['Out'], save_path, 'All-90-95') masterDict = master.wrapMaster(out, optData, optModel, optLoss, optTrain) # master.runTrain(masterDict, cudaID=cid % 3, screen='test') cid = cid + 1 # # train DA model # nDayLst = [25, 27, 29, 31] # for nDay in nDayLst:
import torch dLst = [1, 2, 3, 5, 15, 30] doLst = list() # doLst.append('train') # doLst.append('test') doLst.append('post') saveDir = os.path.join(pathSMAP['dirResult'], 'DA') # training if 'train' in doLst: cid = 0 for nd in dLst: optData = default.update( default.optDataSMAP, rootDB=pathSMAP['DB_L3_NA'], subset='CONUSv2f1', tRange=[20150501, 20160501], daObs=nd) optModel = default.optLstmClose optLoss = default.optLossRMSE optTrain = default.update(default.optTrainSMAP, nEpoch=300) out = os.path.join(pathSMAP['Out_L3_NA'], 'DA', 'CONUSv2f1_d' + str(nd)) masterDict = master.wrapMaster(out, optData, optModel, optLoss, optTrain) master.runTrain(masterDict, cudaID=cid % 3, screen='d' + str(nd)) # master.train(masterDict) cid = cid + 1 # vanila LSTM optData = default.update( default.optDataSMAP,
from hydroDL import pathSMAP from hydroDL.master import default, wrapMaster, train import os import torch cDir = os.path.dirname(os.path.abspath(__file__)) # define training options optData = default.update(default.optDataSMAP, rootDB=os.path.join(cDir, 'data'), subset='CONUSv4f1', tRange=[20150401, 20160401]) if torch.cuda.is_available(): optModel = default.optLstm else: optModel = default.update(default.optLstm, name='hydroDL.model.rnn.CpuLstmModel') optLoss = default.optLossRMSE optTrain = default.update(default.optTrainSMAP, nEpoch=100) out = os.path.join(cDir, 'output', 'CONUSv4f1') masterDict = wrapMaster(out, optData, optModel, optLoss, optTrain) # train train(masterDict)
import os from hydroDL.master import default from hydroDL.master.master import wrapMaster, train, test cDir = os.path.dirname(os.path.abspath(__file__)) # define training options optData = default.update( default.optDataSMAP, rootDB=os.path.join(cDir, "data"), subset="CONUSv4f1", tRange=[20150401, 20160401], ) optModel = default.optLstm optLoss = default.optLossSigma optTrain = default.update(default.optTrainSMAP, nEpoch=5, saveEpoch=5) out = os.path.join(cDir, "output", "CONUSv4f1_sigma") masterDict = wrapMaster(out, optData, optModel, optLoss, optTrain) # train train(masterDict) # test pred = test(out, tRange=[20160401, 20170401], subset="CONUSv4f1", epoch=5)
from hydroDL import pathCamels, master, utils from hydroDL.master import default from hydroDL.post import plot, stat import matplotlib.pyplot as plt from hydroDL.data import camels import numpy as np import os import scipy.stats as stats import pandas as pd gageinfo = camels.gageDict gagelat = gageinfo['lat'] gagelon = gageinfo['lon'] plotattri = ['slope_mean', 'soil_depth_statsgo', 'aridity', 'frac_snow', 'p_seasonality','baseflow_index'] optData = default.update(default.optDataCamels, tRange=[19900101, 20000101]) df = camels.DataframeCamels( subset=optData['subset'], tRange=optData['tRange']) forcing = df.getDataTs( varLst=optData['varT'], doNorm=False, rmNan=False) obs = df.getDataObs( doNorm=False, rmNan=False) attributes = df.getDataConst( varLst=plotattri, doNorm=False, rmNan=False) def auto_corr(x, lag): x1 = x[0:-lag] x2 = x[lag:]
from hydroDL import pathCamels, master, utils from hydroDL.master import default from hydroDL.post import plot, stat import matplotlib.pyplot as plt from hydroDL.data import camels import numpy as np import os from sklearn.linear_model import LinearRegression import json # Hyperparameters nDay = 1 savepath = pathCamels['Out'] + '/comparison/Autoreg' # train default model optData = default.update(default.optDataCamels, daObs=nDay, rmNan=[True, True]) df, x, y, c = master.master.loadData(optData) tRange = [19950101, 20000101] opttestData = default.update(default.optDataCamels, daObs=nDay, tRange=[19950101, 20000101]) dftest, xt, yt, c = master.master.loadData(opttestData) ngage = x.shape[0] daylen = xt.shape[1] Pred = np.full(yt.shape, np.nan) for ii in range(ngage): xdata = x[ii, :, :] ydata = y[ii, :, :] regmodel = LinearRegression().fit(xdata, ydata) xtest = xt[ii, :, :] ypred = regmodel.predict(xtest)
'no_dam_forcing_60%_days118sites.feather') # forcing_data = [] #pd.read_feather(forcing_path) attr_path = os.path.join(os.path.sep, rootDatabase, 'Forcing', 'attr_new', 'no_dam_attr_temp60%_days118sites.feather') attr_data = [] #pd.read_feather(attr_path) camels.initcamels( forcing_data, attr_data, TempTarget, rootDatabase ) # initialize three camels module-scope variables in camels.py: dirDB, gageDict, statDict # Define all the configurations into dictionary variables # three purposes using these dictionaries. 1. saved as configuration logging file. 2. for future testing. 3. can also # be used to directly train the model when interfaceOpt == 0 # define dataset optData = default.optDataCamels optData = default.update(optData, tRange=Ttrain, target='StreamTemp', doNorm=[True, True]) # Update the training period # define model and update parameters if torch.cuda.is_available(): optModel = default.optLstm else: optModel = default.update(default.optLstm, name='hydroDL.model.rnn.CpuLstmModel') optModel = default.update(default.optLstm, hiddenSize=HIDDENSIZE) # define loss function optLoss = default.optLossRMSE # define training options optTrain = default.update(default.optTrainCamels, miniBatch=[BATCH_SIZE, RHO], nEpoch=EPOCH, saveEpoch=saveEPOCH,
cid = 0 # Hyperparameters EPOCH = 200 BATCH_SIZE = 100 RHO = 365 HIDDENSIZE = 256 # Ttrain=[19900101, 19950101] # define directory to save results exp_name = 'longtermDA' exp_disp = 'testmultiobs_rerun' # train default model optData = default.optDataCamels optModel = default.update(default.optLstm, hiddenSize=HIDDENSIZE) optLoss = default.optLoss optTrain = default.update(default.optTrainCamels, miniBatch=[BATCH_SIZE, RHO], nEpoch=EPOCH) save_path = exp_name + '/' + exp_disp + \ '/epochs{}_batch{}_rho{}_hiddensize{}'.format(optTrain['nEpoch'],optTrain['miniBatch'][0], optTrain['miniBatch'][1],optModel['hiddenSize']) out = os.path.join(pathCamels['Out'], save_path, 'All-90-95') masterDict = master.wrapMaster(out, optData, optModel, optLoss, optTrain) # master.runTrain(masterDict, cudaID=cid % 3, screen='test') cid = cid + 1 # train DA model nDayLst = [3, 7, 10] for nDay in nDayLst:
from hydroDL import pathSMAP from hydroDL.master import default, wrapMaster, train, runTrain, test from hydroDL.post import plot, stat import numpy as np import os cDir = os.path.dirname(os.path.abspath(__file__)) cDir = r'/home/kxf227/work/GitHUB/pyRnnSMAP/example/' # for coding. delete. # define training options optData = default.update(default.optDataSMAP, rootDB=pathSMAP['DB_L3_NA'], target=['SMAP_AM', 'SOILM_0-10_NOAH'], subset='CONUSv4f1', tRange=[20150401, 20160401]) optModel = default.optLstm optLoss = default.optLossSigma optTrain = default.update(default.optTrainSMAP, nEpoch=100) out = os.path.join(cDir, 'output', 'CONUSv4f1_multi') masterDict = wrapMaster(out, optData, optModel, optLoss, optTrain) # train # train(masterDict) # runTrain(masterDict, cudaID=2, screen='LSTM-multi') # test df, yp, yt, sigma = test(out, tRange=[20160401, 20170401], subset='CONUSv4f1') # plot ts MAP dataGrid = list()
# cid = 0 # starting GPU id # gnum = 6 # how many GPUs you have # Region withheld as testing target. Take region 1 as an example. # Change this to 1,2,..,7 to run models for all 7 PUR regions in CONUS. testRegion = 1 iexp = testRegion - 1 # index TestLS = regionID[iexp] # basin ID list for testing, should be withheld for training TrainLS = list(set(gageid.tolist()) - set(TestLS)) # basin ID for training gageDic = {"TrainID": TrainLS, "TestID": TestLS} # prepare the training dataset optData = default.optDataCamels optData = default.update( optData, tRange=Ttrain, subset=TrainLS, lckernel=None, fdcopt=False ) climateList = camels.attrLstSel + [ "p_mean", "pet_mean", "p_seasonality", "frac_snow", "aridity", "high_prec_freq", "high_prec_dur", "low_prec_freq", "low_prec_dur", ] # climateList = ['slope_mean', 'area_gages2', 'frac_forest', 'soil_porosity', 'max_water_content'] # climateList = [] optData = default.update(optData, varT=camels.forcingLst, varC=climateList)
from hydroDL import pathSMAP, master import os from hydroDL.master import default cDir = os.path.dirname(os.path.abspath(__file__)) cDir = r'/home/kxf227/work/GitHUB/pyRnnSMAP/example/' # define training options optData = default.update( default.optDataSMAP, rootDB=os.path.join(cDir, 'data'), subset='CONUSv4f1', tRange=[20150401, 20160401], ) optModel = default.optLstm optLoss = default.optLossSigma optTrain = default.update(master.default.optTrainSMAP, nEpoch=5, saveEpoch=5) out = os.path.join(cDir, 'output', 'CONUSv4f1_sigma') masterDict = master.wrapMaster(out, optData, optModel, optLoss, optTrain) # train master.train(masterDict) # test pred = master.test(out, tRange=[20160401, 20170401], subset='CONUSv4f1')
from hydroDL import pathSMAP, master, utils from hydroDL.master import default from hydroDL.post import plot, stat import os import matplotlib.pyplot as plt import numpy as np import torch # training tLst = [[20150501, 20151001], [20150402, 20160401]] tagLst = ['2015RK', '2015'] for k in range(len(tLst)): optData = default.update(default.optDataSMAP, varT=['APCP_FORA'], rootDB=pathSMAP['DB_L3_NA'], subset='CONUSv2f1', tRange=tLst[k], daObs=1) optModel = default.optLstmClose optLoss = default.optLossRMSE optTrain = default.update(default.optTrainSMAP, nEpoch=500) out = os.path.join(pathSMAP['Out_L3_NA'], 'DA', 'CONUSv2f1_DA_Prcp_' + tagLst[k]) masterDict = master.wrapMaster(out, optData, optModel, optLoss, optTrain) master.runTrain(masterDict, cudaID=(k + 1) % 3, screen='DA' + tagLst[k]) optData = default.update(default.optDataSMAP, varT=['APCP_FORA'], rootDB=pathSMAP['DB_L3_NA'], subset='CONUSv2f1', tRange=tLst[k])
cid = 0 # Hyperparameters EPOCH = 200 BATCH_SIZE = 100 RHO = 365 HIDDENSIZE = 256 # Ttrain=[19900101, 19950101] # define directory to save results exp_name = 'longtermDA' exp_disp = 'testprecip' # train default model optData = default.optDataCamels optModel = default.update(default.optLstm, hiddenSize=HIDDENSIZE) optLoss = default.optLoss optTrain = default.update(default.optTrainCamels, miniBatch=[BATCH_SIZE, RHO], nEpoch=EPOCH) save_path = exp_name + '/' + exp_disp + \ '/epochs{}_batch{}_rho{}_hiddensize{}'.format(optTrain['nEpoch'],optTrain['miniBatch'][0], optTrain['miniBatch'][1],optModel['hiddenSize']) out = os.path.join(pathCamels['Out'], save_path, 'All-90-95') masterDict = master.wrapMaster(out, optData, optModel, optLoss, optTrain) # master.runTrain(masterDict, cudaID=cid % 3, screen='test') cid = cid + 1 # train DA model nDayLst = [3, 10, 30, 100, 365] for nDay in nDayLst:
# from hydroDL import pathSMAP from hydroDL.master import default from hydroDL.master.master import wrapMaster, train import os import torch cDir = os.path.dirname(os.path.abspath(__file__)) # define training options optData = default.update( default.optDataSMAP, rootDB=os.path.join(cDir, "data"), subset="CONUSv4f1", tRange=[20150401, 20160401], ) if torch.cuda.is_available(): optModel = default.optLstm else: optModel = default.update(default.optLstm, name="hydroDL.model.rnn.CpuLstmModel") optLoss = default.optLossRMSE optTrain = default.update(default.optTrainSMAP, nEpoch=100) out = os.path.join(cDir, "output", "CONUSv4f1") masterDict = wrapMaster(out, optData, optModel, optLoss, optTrain) # train train(masterDict)