示例#1
0
                                     subset='CONUSv4f1',
                                     tRange=ty2)
x = df.getData(varT=dbCsv.varForcing,
               varC=dbCsv.varConst,
               doNorm=True,
               rmNan=True)
y = df.getData(varT='SMAP_AM', doNorm=True, rmNan=False)
nx = x.shape[-1]
ny = 1

model = rnn.CnnCondLstm(nx=nx, ny=ny, ct=365, hiddenSize=64, cnnSize=32, opt=3)
lossFun = crit.RmseLoss()
model = train.trainModel(model,
                         x,
                         y,
                         lossFun,
                         xc=c,
                         nEpoch=nEpoch,
                         miniBatch=[100, 30])

yOut = train.testModelCnnCond(model, x, y)
# yOut = train.testModel(model, x)
yP = dbCsv.transNorm(yOut[:, :, 0],
                     rootDB=rootDB,
                     fieldName='SMAP_AM',
                     fromRaw=False)
yT = dbCsv.transNorm(y[:, model.ct:, 0],
                     rootDB=rootDB,
                     fieldName='SMAP_AM',
                     fromRaw=False)
statDict = post.statError(yP, yT)
示例#2
0
文件: test.py 项目: sadeghst/geolearn
    # load data
    df = hydroDL.data.dbCsv.DataframeCsv(rootDB=rootDB,
                                         subset='CONUSv4f1',
                                         tRange=ty1)
    x = df.getDataTs(dbCsv.varForcing, doNorm=True, rmNan=True)
    c = df.getDataConst(dbCsv.varConst, doNorm=True, rmNan=True)
    y = df.getDataTs('SMAP_AM', doNorm=True, rmNan=False)
    nx = x.shape[-1] + c.shape[-1]
    ny = 1

    model = rnn.CudnnLstmModel(nx=nx, ny=ny, hiddenSize=64)
    lossFun = crit.RmseLoss()
    model = train.trainModel(model,
                             x,
                             y,
                             c,
                             lossFun,
                             nEpoch=nEpoch,
                             miniBatch=[100, 30])
    modelName = 'test-LSTM'
    train.saveModel(outFolder, model, nEpoch, modelName=modelName)

    for k in dLst:
        sd = utils.time.t2dt(ty1[0]) - dt.timedelta(days=k)
        ed = utils.time.t2dt(ty1[1]) - dt.timedelta(days=k)
        df2 = hydroDL.data.dbCsv.DataframeCsv(rootDB=rootDB,
                                              subset='CONUSv4f1',
                                              tRange=[sd, ed])
        obs = df2.getDataTs('SMAP_AM', doNorm=True, rmNan=False)

        model = rnn.LstmCloseModel(nx=nx, ny=ny, hiddenSize=64)
示例#3
0
    dfz2 = camels.DataframeCamels(subset='all', tRange=[20041225, 20091225])
    z2 = dfz2.getDataObs(doNorm=True, rmNan=True)
    # z2 = interp.interpNan1d(z2, mode='pre')
    xz2 = np.concatenate([x1, z2], axis=2)

    ny = 1
    nx = x1.shape[-1] + c1.shape[-1]
    lossFun = crit.RmseLoss()

    # model1 = rnn.CudnnLstmModel(nx=nx, ny=ny, hiddenSize=64)
    # model1 = train.trainModel(
    #     model1, x1, y1, c1, lossFun, nEpoch=nEpoch, miniBatch=(50, 365))
    # train.saveModel(outFolder, model1, nEpoch, modelName='LSTM')

    model2 = rnn.CudnnLstmModel(nx=nx + 1, ny=ny, hiddenSize=64)
    model2 = train.trainModel(
        model2, xz1, y1, c1, lossFun, nEpoch=nEpoch, miniBatch=(50, 365))
    train.saveModel(outFolder, model2, nEpoch, modelName='DA-1')

    model3 = rnn.CudnnLstmModel(nx=nx + 1, ny=ny, hiddenSize=64)
    model3 = train.trainModel(
        model3, xz2, y1, c1, lossFun, nEpoch=nEpoch, miniBatch=(50, 365))
    train.saveModel(outFolder, model3, nEpoch, modelName='DA-7')

if 'test' in doLst:
    df2 = camels.DataframeCamels(subset='all', tRange=[20050101, 20150101])
    x2 = df2.getDataTS(varLst=camels.forcingLst, doNorm=True, rmNan=True)
    c2 = df2.getDataConst(varLst=camels.attrLstSel, doNorm=True, rmNan=True)
    yt2 = df2.getDataObs(doNorm=False, rmNan=False).squeeze()

    dfz1 = camels.DataframeCamels(subset='all', tRange=[20041231, 20141231])
    z1 = dfz1.getDataObs(doNorm=True, rmNan=True)
示例#4
0
文件: master.py 项目: mhpi/hydroDL
def train(mDict):
    if mDict is str:
        mDict = readMasterFile(mDict)
    out = mDict["out"]
    optData = mDict["data"]
    optModel = mDict["model"]
    optLoss = mDict["loss"]
    optTrain = mDict["train"]

    # fix the random seed
    if optTrain["seed"] is None:
        # generate random seed
        randomseed = int(np.random.uniform(low=0, high=1e6))
        optTrain["seed"] = randomseed
        print("random seed updated!")
    else:
        randomseed = optTrain["seed"]

    random.seed(randomseed)
    torch.manual_seed(randomseed)
    np.random.seed(randomseed)
    torch.cuda.manual_seed(randomseed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # data
    df, x, y, c = loadData(optData)
    # x: ngage*nday*nvar
    # y: ngage*nday*nvar
    # c: ngage*nvar
    # temporal test, fill obs nan using LSTM forecast
    # temp = x[:,:,-1, None]
    # y[np.isnan(y)] = temp[np.isnan(y)]

    if c is None:
        if type(x) is tuple:
            nx = x[0].shape[-1]
        else:
            nx = x.shape[-1]
    else:
        if type(x) is tuple:
            nx = x[0].shape[-1] + c.shape[-1]
        else:
            nx = x.shape[-1] + c.shape[-1]
    ny = y.shape[-1]

    # loss
    if eval(optLoss["name"]) is hydroDL.model.crit.SigmaLoss:
        lossFun = crit.SigmaLoss(prior=optLoss["prior"])
        optModel["ny"] = ny * 2
    elif eval(optLoss["name"]) is hydroDL.model.crit.RmseLoss:
        lossFun = crit.RmseLoss()
        optModel["ny"] = ny
    elif eval(optLoss["name"]) is hydroDL.model.crit.NSELoss:
        lossFun = crit.NSELoss()
        optModel["ny"] = ny
    elif eval(optLoss["name"]) is hydroDL.model.crit.NSELosstest:
        lossFun = crit.NSELosstest()
        optModel["ny"] = ny
    elif eval(optLoss["name"]) is hydroDL.model.crit.MSELoss:
        lossFun = crit.MSELoss()
        optModel["ny"] = ny
    elif eval(optLoss["name"]) is hydroDL.model.crit.RmseLossCNN:
        lossFun = crit.RmseLossCNN()
        optModel["ny"] = ny
    elif eval(optLoss["name"]) is hydroDL.model.crit.ModifyTrend1:
        lossFun = crit.ModifyTrend1()
        optModel["ny"] = ny

    # model
    if optModel["nx"] != nx:
        print("updated nx by input data")
        optModel["nx"] = nx
    if eval(optModel["name"]) is hydroDL.model.rnn.CudnnLstmModel:
        if type(x) is tuple:
            x = np.concatenate([x[0], x[1]], axis=2)
            if c is None:
                nx = x.shape[-1]
            else:
                nx = x.shape[-1] + c.shape[-1]
            optModel["nx"] = nx
            print("Concatenate input and obs, update nx by obs")
        model = rnn.CudnnLstmModel(
            nx=optModel["nx"], ny=optModel["ny"], hiddenSize=optModel["hiddenSize"]
        )
    elif eval(optModel["name"]) is hydroDL.model.rnn.CpuLstmModel:
        model = rnn.CpuLstmModel(
            nx=optModel["nx"], ny=optModel["ny"], hiddenSize=optModel["hiddenSize"]
        )
    elif eval(optModel["name"]) is hydroDL.model.rnn.LstmCloseModel:
        model = rnn.LstmCloseModel(
            nx=optModel["nx"],
            ny=optModel["ny"],
            hiddenSize=optModel["hiddenSize"],
            fillObs=True,
        )
    elif eval(optModel["name"]) is hydroDL.model.rnn.AnnModel:
        model = rnn.AnnCloseModel(
            nx=optModel["nx"], ny=optModel["ny"], hiddenSize=optModel["hiddenSize"]
        )
    elif eval(optModel["name"]) is hydroDL.model.rnn.AnnCloseModel:
        model = rnn.AnnCloseModel(
            nx=optModel["nx"],
            ny=optModel["ny"],
            hiddenSize=optModel["hiddenSize"],
            fillObs=True,
        )
    elif eval(optModel["name"]) is hydroDL.model.cnn.LstmCnn1d:
        convpara = optModel["convNKSP"]
        model = hydroDL.model.cnn.LstmCnn1d(
            nx=optModel["nx"],
            ny=optModel["ny"],
            rho=optModel["rho"],
            nkernel=convpara[0],
            kernelSize=convpara[1],
            stride=convpara[2],
            padding=convpara[3],
        )
    elif eval(optModel["name"]) is hydroDL.model.rnn.CNN1dLSTMmodel:
        daobsOption = optData["daObs"]
        if type(daobsOption) is list:
            if len(daobsOption) - 3 >= 7:
                # using 1dcnn only when number of obs larger than 7
                optModel["nobs"] = len(daobsOption)
                convpara = optModel["convNKS"]
                model = rnn.CNN1dLSTMmodel(
                    nx=optModel["nx"],
                    ny=optModel["ny"],
                    nobs=optModel["nobs"] - 3,
                    hiddenSize=optModel["hiddenSize"],
                    nkernel=convpara[0],
                    kernelSize=convpara[1],
                    stride=convpara[2],
                    poolOpt=optModel["poolOpt"],
                )
                print("CNN1d Kernel is used!")
            else:
                if type(x) is tuple:
                    x = np.concatenate([x[0], x[1]], axis=2)
                    nx = x.shape[-1] + c.shape[-1]
                    optModel["nx"] = nx
                    print("Concatenate input and obs, update nx by obs")
                model = rnn.CudnnLstmModel(
                    nx=optModel["nx"],
                    ny=optModel["ny"],
                    hiddenSize=optModel["hiddenSize"],
                )
                optModel["name"] = "hydroDL.model.rnn.CudnnLstmModel"
                print("Too few obserservations, not using cnn kernel")
        else:
            raise Exception("CNN kernel used but daobs option is not obs list")
    elif eval(optModel["name"]) is hydroDL.model.rnn.CNN1dLSTMInmodel:
        # daobsOption = optData['daObs']
        daobsOption = list(range(24))
        if type(daobsOption) is list:
            if len(daobsOption) - 3 >= 7:
                # using 1dcnn only when number of obs larger than 7
                optModel["nobs"] = len(daobsOption)
                convpara = optModel["convNKS"]
                model = rnn.CNN1dLSTMInmodel(
                    nx=optModel["nx"],
                    ny=optModel["ny"],
                    # nobs=optModel['nobs']-3,
                    nobs=24,  # temporary test
                    hiddenSize=optModel["hiddenSize"],
                    nkernel=convpara[0],
                    kernelSize=convpara[1],
                    stride=convpara[2],
                    poolOpt=optModel["poolOpt"],
                )
                print("CNN1d Kernel is used!")
            else:
                if type(x) is tuple:
                    x = np.concatenate([x[0], x[1]], axis=2)
                    nx = x.shape[-1] + c.shape[-1]
                    optModel["nx"] = nx
                    print("Concatenate input and obs, update nx by obs")
                model = rnn.CudnnLstmModel(
                    nx=optModel["nx"],
                    ny=optModel["ny"],
                    hiddenSize=optModel["hiddenSize"],
                )
                optModel["name"] = "hydroDL.model.rnn.CudnnLstmModel"
                print("Too few obserservations, not using cnn kernel")
        else:
            raise Exception("CNN kernel used but daobs option is not obs list")
    elif eval(optModel["name"]) is hydroDL.model.rnn.CNN1dLCmodel:
        # LCrange = optData['lckernel']
        # tLCLst = utils.time.tRange2Array(LCrange)
        if len(x[1].shape) == 2:
            # for LC-FDC
            optModel["nobs"] = x[1].shape[-1]
        elif len(x[1].shape) == 3:
            # for LC-SMAP--get time step
            optModel["nobs"] = x[1].shape[1]
        convpara = optModel["convNKS"]
        model = rnn.CNN1dLCmodel(
            nx=optModel["nx"],
            ny=optModel["ny"],
            nobs=optModel["nobs"],
            hiddenSize=optModel["hiddenSize"],
            nkernel=convpara[0],
            kernelSize=convpara[1],
            stride=convpara[2],
            poolOpt=optModel["poolOpt"],
        )
        print("CNN1d Local calibartion Kernel is used!")
    elif eval(optModel["name"]) is hydroDL.model.rnn.CNN1dLCInmodel:
        LCrange = optData["lckernel"]
        tLCLst = utils.time.tRange2Array(LCrange)
        optModel["nobs"] = x[1].shape[-1]
        convpara = optModel["convNKS"]
        model = rnn.CNN1dLCInmodel(
            nx=optModel["nx"],
            ny=optModel["ny"],
            nobs=optModel["nobs"],
            hiddenSize=optModel["hiddenSize"],
            nkernel=convpara[0],
            kernelSize=convpara[1],
            stride=convpara[2],
            poolOpt=optModel["poolOpt"],
        )
        print("CNN1d Local calibartion Kernel is used!")
    elif eval(optModel["name"]) is hydroDL.model.rnn.CudnnInvLstmModel:
        # optModel['ninv'] = x[1].shape[-1]
        optModel["ninv"] = x[1].shape[-1] + c.shape[-1]  # Test the inv using attributes
        model = rnn.CudnnInvLstmModel(
            nx=optModel["nx"],
            ny=optModel["ny"],
            hiddenSize=optModel["hiddenSize"],
            ninv=optModel["ninv"],
            nfea=optModel["nfea"],
            hiddeninv=optModel["hiddeninv"],
        )
        print("LSTMInv model is used!")
    # train
    if optTrain["saveEpoch"] > optTrain["nEpoch"]:
        optTrain["saveEpoch"] = optTrain["nEpoch"]

    # train model
    writeMasterFile(mDict)
    model = trainModel(
        model,
        x,
        y,
        c,
        lossFun,
        nEpoch=optTrain["nEpoch"],
        miniBatch=optTrain["miniBatch"],
        saveEpoch=optTrain["saveEpoch"],
        saveFolder=out,
    )
示例#5
0
        lossFun = RmseLoss()
        # the loaded loss should be consistent with the 'name' in optLoss Dict above for logging purpose
        # update and write the dictionary variable to out folder for logging and future testing
        masterDict = wrapMaster(out, optData, optModel, optLoss, optTrain)
        writeMasterFile(masterDict)
        # log statistics
        statFile = os.path.join(out, "statDict.json")
        with open(statFile, "w") as fp:
            json.dump(statDict, fp, indent=4)
        # train model
        model = trainModel(
            model,
            xTrain,
            yTrain,
            attrs,
            lossFun,
            nEpoch=EPOCH,
            miniBatch=[BATCH_SIZE, RHO],
            saveEpoch=saveEPOCH,
            saveFolder=out,
        )
    elif interfaceOpt == 0:  # directly train the model using dictionary variable
        master.train(masterDict)

# Train DI model
if 1 in Action:
    nDayLst = [1, 3]
    for nDay in nDayLst:
        # nDay: previous Nth day observation to integrate
        # update parameter "daObs" for data dictionary variable
        optData = default.update(default.optDataCamels, daObs=nDay)
示例#6
0
# select model: GPU or CPU
if torch.cuda.is_available():
    LSTM = LSTM
else:
    LSTM = LSTM_CPU
model = LSTM(nx=len(var_time_series) + len(var_constant),
             ny=len(target),
             hiddenSize=HIDDEN_SIZE)

# training the model
last_model = trainModel(
    model,
    x_train,
    y_train,
    c_train,
    loss_fn,
    nEpoch=EPOCH,
    miniBatch=[BATCH_SIZE, RHO],
    saveEpoch=1,
    saveFolder=output_s,
)

# validation the result
# load validation datasets
val_date_list = ["2016-04-01", "2017-03-31"]  # validation period
# load your data. same as training data
val_csv = LoadCSV(csv_path_s, val_date_list, all_date_list)
x_val = val_csv.load_time_series(var_time_series)
c_val = val_csv.load_constant(var_constant, convert_time_series=False)
y_val = val_csv.load_time_series(target, remove_nan=False)
        lossFun = crit.RmseLoss()
        # the loaded loss should be consistent with the 'name' in optLoss Dict above for logging purpose
        # update and write the dictionary variable to out folder for logging and future testing
        masterDict = master.wrapMaster(out, optData, optModel, optLoss,
                                       optTrain)
        master.writeMasterFile(masterDict)
        # train model

        out1 = out

        ############
        model = train.trainModel(model,
                                 x,
                                 y,
                                 c,
                                 lossFun,
                                 nEpoch=EPOCH,
                                 miniBatch=[BATCH_SIZE, RHO],
                                 saveEpoch=saveEPOCH,
                                 saveFolder=out)
    elif interfaceOpt == 0:  # directly train the model using dictionary variable
        master.train(masterDict)

# Test models
if 2 in Action:
    TestEPOCH = 2000  # it was 200  # choose the model to test after trained "TestEPOCH" epoches
    # generate a folder name list containing all the tested model output folders
    caseLst = [
        'All-2010-2016'
    ]  #, '494-B247-H100','460-B230-H100' ,'327-B163-H100','258-B129-H100' ,'169-B169-H100', '29-B29-H100']
示例#8
0
outFolder = os.path.join(hydroDL.pathSMAP['outTest'], 'closeLoop')
ty1 = [20150401, 20160401]
ty2 = [20160401, 20170401]
ty3 = [20170401, 20180401]

doLst = list()
doLst.append('train')
# doLst.append('test')
# doLst.append('post')

df = hydroDL.data.dbCsv.DataframeCsv(rootDB=rootDB,
                                     subset='CONUSv4f1',
                                     tRange=ty1)
x = df.getData(varT=dbCsv.varForcing,
               varC=dbCsv.varConst,
               doNorm=True,
               rmNan=True)
y = df.getData(varT='SMAP_AM', doNorm=True, rmNan=False)
nx = x.shape[-1]
ny = 1

model3 = rnn.LstmCloseModel(nx=nx + 1, ny=ny, hiddenSize=64, opt=1)
lossFun = crit.RmseLoss()
model3 = train.trainModel(model3,
                          x,
                          y,
                          lossFun,
                          nEpoch=nEpoch,
                          miniBatch=[100, 30])
modelName = 'LSTM-DA'
train.saveModel(outFolder, model3, nEpoch, modelName=modelName)
示例#9
0
            nx=optModel["nx"], ny=optModel["ny"], hiddenSize=optModel["hiddenSize"]
        )
        # Wrap up all the training configurations to one dictionary in order to save into "out" folder
        masterDict = master.wrapMaster(out, optData, optModel, optLoss, optTrain)
        master.writeMasterFile(masterDict)
        # log statistics
        statFile = os.path.join(out, "statDict.json")
        with open(statFile, "w") as fp:
            json.dump(statDict, fp, indent=4)
        # Train the model
        trainedModel = train.trainModel(
            model,
            xTrain,
            yTrain,
            attrs,
            lossFun,
            nEpoch=EPOCH,
            miniBatch=[BATCH_SIZE, RHO],
            saveEpoch=saveEPOCH,
            saveFolder=out,
        )

    if interfaceOpt == 0:
        # Only need to pass the wrapped configuration dict 'masterDict' for training
        # nx, ny will be automatically updated later
        masterDict = master.wrapMaster(out, optData, optModel, optLoss, optTrain)
        master.train(masterDict)

        ## Not used here.
        ## A potential way to run batch jobs simultaneously in background through multiple GPUs and Linux screens.
        ## To use this, must manually set the "pathCamels['DB']" in hydroDL/__init__.py as your own root path of CAMELS data.