Exemplo n.º 1
0
def trainModelTS(outName):
    outFolder = nameFolder(outName)
    dictP = loadMaster(outName)

    # load data
    wqData = waterQuality.DataModelWQ(dictP['dataName'])
    varTup = (dictP['varX'], dictP['varXC'], dictP['varY'], dictP['varYC'])
    dataTup, statTup = wqData.transIn(subset=dictP['trainName'], varTup=varTup)
    dataTup = trainTS.dealNaN(dataTup, dictP['optNaN'])
    wrapStat(outName, statTup)

    # train model
    [nx, nxc, ny, nyc, nt, ns] = trainTS.getSize(dataTup)
    if dictP['modelName'] == 'CudnnLSTM':
        model = rnn.CudnnLstmModel(nx=nx + nxc,
                                   ny=ny + nyc,
                                   hiddenSize=dictP['hiddenSize'])
    lossFun = crit.RmseLoss()
    if torch.cuda.is_available():
        lossFun = lossFun.cuda()
        model = model.cuda()
    optim = torch.optim.Adadelta(model.parameters())
    lossLst = list()
    nEp = dictP['nEpoch']
    sEp = dictP['saveEpoch']
    logFile = os.path.join(outFolder, 'log')
    if os.path.exists(logFile):
        os.remove(logFile)
    for k in range(0, nEp, sEp):
        model, optim, lossEp = trainTS.trainModel(dataTup,
                                                  model,
                                                  lossFun,
                                                  optim,
                                                  batchSize=dictP['batchSize'],
                                                  nEp=sEp,
                                                  cEp=k,
                                                  logFile=logFile)
        # save model
        saveModel(outName, k + sEp, model, optim=optim)
        lossLst = lossLst + lossEp

    lossFile = os.path.join(outFolder, 'loss.csv')
    pd.DataFrame(lossLst).to_csv(lossFile, index=False, header=False)
Exemplo n.º 2
0
# doLst.append('test')
# doLst.append('post')

if 'train' in doLst:
    # load data
    df = hydroDL.data.dbCsv.DataframeCsv(rootDB=rootDB,
                                         subset='CONUSv4f1',
                                         tRange=ty1)
    x = df.getDataTs(dbCsv.varForcing, doNorm=True, rmNan=True)
    c = df.getDataConst(dbCsv.varConst, doNorm=True, rmNan=True)
    y = df.getDataTs('SMAP_AM', doNorm=True, rmNan=False)
    nx = x.shape[-1] + c.shape[-1]
    ny = 1

    model = rnn.CudnnLstmModel(nx=nx, ny=ny, hiddenSize=64)
    lossFun = crit.RmseLoss()
    model = train.trainModel(model,
                             x,
                             y,
                             c,
                             lossFun,
                             nEpoch=nEpoch,
                             miniBatch=[100, 30])
    modelName = 'test-LSTM'
    train.saveModel(outFolder, model, nEpoch, modelName=modelName)

    for k in dLst:
        sd = utils.time.t2dt(ty1[0]) - dt.timedelta(days=k)
        ed = utils.time.t2dt(ty1[1]) - dt.timedelta(days=k)
        df2 = hydroDL.data.dbCsv.DataframeCsv(rootDB=rootDB,
                                              subset='CONUSv4f1',
Exemplo n.º 3
0
def trainModelTS(outName):
    outFolder = nameFolder(outName)
    dictP = loadMaster(outName)

    # load data
    rmFlag = dictP['rmFlag'] if 'rmFlag' in dictP else False
    wqData = waterQuality.DataModelWQ(dictP['dataName'], rmFlag)
    varTup = (dictP['varX'], dictP['varXC'], dictP['varY'], dictP['varYC'])
    dataTup, statTup = wqData.transIn(subset=dictP['trainName'], varTup=varTup)
    dataTup = trainTS.dealNaN(dataTup, dictP['optNaN'])
    wrapStat(outName, statTup)

    # train model
    [nx, nxc, ny, nyc, nt, ns] = trainTS.getSize(dataTup)
    # define loss
    if dictP['crit'] == 'RmseLoss':
        lossFun = crit.RmseLoss()
    elif dictP['crit'] == 'RmseLoss2D':
        lossFun = crit.RmseLoss2D()
    elif dictP['crit'] == 'SigmaLoss':
        lossFun = crit.SigmaLoss()
        ny = ny * 2
        nyc = nyc * 2
    else:
        raise RuntimeError('loss function not specified')
    # define model
    if dictP['modelName'] == 'CudnnLSTM':
        model = rnn.CudnnLstmModel(nx=nx + nxc,
                                   ny=ny + nyc,
                                   hiddenSize=dictP['hiddenSize'])
    elif dictP['modelName'] == 'LstmModel':
        model = rnn.LstmModel(nx=nx + nxc,
                              ny=ny + nyc,
                              hiddenSize=dictP['hiddenSize'])
    elif dictP['modelName'] == 'AgeLSTM':
        model = rnn.AgeLSTM2(nx=nx + nxc,
                             ny=ny,
                             nyc=nyc,
                             rho=365,
                             nh=dictP['hiddenSize'])
    else:
        raise RuntimeError('Model not specified')

    if torch.cuda.is_available():
        lossFun = lossFun.cuda()
        model = model.cuda()

    if dictP['optim'] == 'AdaDelta':
        optim = torch.optim.Adadelta(model.parameters())
    else:
        raise RuntimeError('optimizor function not specified')

    lossLst = list()
    nEp = dictP['nEpoch']
    sEp = dictP['saveEpoch']
    logFile = os.path.join(outFolder, 'log')
    if os.path.exists(logFile):
        os.remove(logFile)
    for k in range(0, nEp, sEp):
        model, optim, lossEp = trainTS.trainModel(dataTup,
                                                  model,
                                                  lossFun,
                                                  optim,
                                                  batchSize=dictP['batchSize'],
                                                  nEp=sEp,
                                                  cEp=k,
                                                  logFile=logFile)
        # save model
        saveModel(outName, k + sEp, model, optim=optim)
        lossLst = lossLst + lossEp

    lossFile = os.path.join(outFolder, 'loss.csv')
    pd.DataFrame(lossLst).to_csv(lossFile, index=False, header=False)
Exemplo n.º 4
0
    xLst.append(tempX.values)
x = np.stack(xLst, axis=-1).swapaxes(1, 2).astype(np.float32)
y = np.stack(yLst, axis=-1).swapaxes(1, 2).astype(np.float32)
x[np.where(np.isnan(x))] = -1
ind1 = dfC.index.year <= 2016
xx = x[:, ind1, :]
yy = y[:, ind1, :]
# training
nbatch = 20
nEp = 100
ns = xx.shape[1]
nx = xx.shape[-1]
ny = yy.shape[-1]
model = rnn.LstmModel(nx=nx, ny=ny, hiddenSize=256).cuda()
optim = torch.optim.Adadelta(model.parameters())
lossFun = crit.RmseLoss().cuda()
nIterEp = int(np.ceil(np.log(0.01) / np.log(1 - nbatch / ns)))
lossLst = list()
for iEp in range(1, nEp + 1):
    lossEp = 0
    t0 = time.time()
    for iIter in range(nIterEp):
        iR = np.random.randint(0, ns, nbatch)
        xTemp = xx[:, iR, :]
        yTemp = yy[:, iR, :]
        xT = torch.from_numpy(xTemp).float().cuda()
        yT = torch.from_numpy(yTemp).float().cuda()
        if iEp == 1 and iIter == 0:
            try:
                yP = model(xT)
            except:
Exemplo n.º 5
0
def train(mDict):
    if mDict is str:
        mDict = readMasterFile(mDict)
    out = mDict["out"]
    optData = mDict["data"]
    optModel = mDict["model"]
    optLoss = mDict["loss"]
    optTrain = mDict["train"]

    # fix the random seed
    if optTrain["seed"] is None:
        # generate random seed
        randomseed = int(np.random.uniform(low=0, high=1e6))
        optTrain["seed"] = randomseed
        print("random seed updated!")
    else:
        randomseed = optTrain["seed"]

    random.seed(randomseed)
    torch.manual_seed(randomseed)
    np.random.seed(randomseed)
    torch.cuda.manual_seed(randomseed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # data
    df, x, y, c = loadData(optData)
    # x: ngage*nday*nvar
    # y: ngage*nday*nvar
    # c: ngage*nvar
    # temporal test, fill obs nan using LSTM forecast
    # temp = x[:,:,-1, None]
    # y[np.isnan(y)] = temp[np.isnan(y)]

    if c is None:
        if type(x) is tuple:
            nx = x[0].shape[-1]
        else:
            nx = x.shape[-1]
    else:
        if type(x) is tuple:
            nx = x[0].shape[-1] + c.shape[-1]
        else:
            nx = x.shape[-1] + c.shape[-1]
    ny = y.shape[-1]

    # loss
    if eval(optLoss["name"]) is hydroDL.model.crit.SigmaLoss:
        lossFun = crit.SigmaLoss(prior=optLoss["prior"])
        optModel["ny"] = ny * 2
    elif eval(optLoss["name"]) is hydroDL.model.crit.RmseLoss:
        lossFun = crit.RmseLoss()
        optModel["ny"] = ny
    elif eval(optLoss["name"]) is hydroDL.model.crit.NSELoss:
        lossFun = crit.NSELoss()
        optModel["ny"] = ny
    elif eval(optLoss["name"]) is hydroDL.model.crit.NSELosstest:
        lossFun = crit.NSELosstest()
        optModel["ny"] = ny
    elif eval(optLoss["name"]) is hydroDL.model.crit.MSELoss:
        lossFun = crit.MSELoss()
        optModel["ny"] = ny
    elif eval(optLoss["name"]) is hydroDL.model.crit.RmseLossCNN:
        lossFun = crit.RmseLossCNN()
        optModel["ny"] = ny
    elif eval(optLoss["name"]) is hydroDL.model.crit.ModifyTrend1:
        lossFun = crit.ModifyTrend1()
        optModel["ny"] = ny

    # model
    if optModel["nx"] != nx:
        print("updated nx by input data")
        optModel["nx"] = nx
    if eval(optModel["name"]) is hydroDL.model.rnn.CudnnLstmModel:
        if type(x) is tuple:
            x = np.concatenate([x[0], x[1]], axis=2)
            if c is None:
                nx = x.shape[-1]
            else:
                nx = x.shape[-1] + c.shape[-1]
            optModel["nx"] = nx
            print("Concatenate input and obs, update nx by obs")
        model = rnn.CudnnLstmModel(
            nx=optModel["nx"], ny=optModel["ny"], hiddenSize=optModel["hiddenSize"]
        )
    elif eval(optModel["name"]) is hydroDL.model.rnn.CpuLstmModel:
        model = rnn.CpuLstmModel(
            nx=optModel["nx"], ny=optModel["ny"], hiddenSize=optModel["hiddenSize"]
        )
    elif eval(optModel["name"]) is hydroDL.model.rnn.LstmCloseModel:
        model = rnn.LstmCloseModel(
            nx=optModel["nx"],
            ny=optModel["ny"],
            hiddenSize=optModel["hiddenSize"],
            fillObs=True,
        )
    elif eval(optModel["name"]) is hydroDL.model.rnn.AnnModel:
        model = rnn.AnnCloseModel(
            nx=optModel["nx"], ny=optModel["ny"], hiddenSize=optModel["hiddenSize"]
        )
    elif eval(optModel["name"]) is hydroDL.model.rnn.AnnCloseModel:
        model = rnn.AnnCloseModel(
            nx=optModel["nx"],
            ny=optModel["ny"],
            hiddenSize=optModel["hiddenSize"],
            fillObs=True,
        )
    elif eval(optModel["name"]) is hydroDL.model.cnn.LstmCnn1d:
        convpara = optModel["convNKSP"]
        model = hydroDL.model.cnn.LstmCnn1d(
            nx=optModel["nx"],
            ny=optModel["ny"],
            rho=optModel["rho"],
            nkernel=convpara[0],
            kernelSize=convpara[1],
            stride=convpara[2],
            padding=convpara[3],
        )
    elif eval(optModel["name"]) is hydroDL.model.rnn.CNN1dLSTMmodel:
        daobsOption = optData["daObs"]
        if type(daobsOption) is list:
            if len(daobsOption) - 3 >= 7:
                # using 1dcnn only when number of obs larger than 7
                optModel["nobs"] = len(daobsOption)
                convpara = optModel["convNKS"]
                model = rnn.CNN1dLSTMmodel(
                    nx=optModel["nx"],
                    ny=optModel["ny"],
                    nobs=optModel["nobs"] - 3,
                    hiddenSize=optModel["hiddenSize"],
                    nkernel=convpara[0],
                    kernelSize=convpara[1],
                    stride=convpara[2],
                    poolOpt=optModel["poolOpt"],
                )
                print("CNN1d Kernel is used!")
            else:
                if type(x) is tuple:
                    x = np.concatenate([x[0], x[1]], axis=2)
                    nx = x.shape[-1] + c.shape[-1]
                    optModel["nx"] = nx
                    print("Concatenate input and obs, update nx by obs")
                model = rnn.CudnnLstmModel(
                    nx=optModel["nx"],
                    ny=optModel["ny"],
                    hiddenSize=optModel["hiddenSize"],
                )
                optModel["name"] = "hydroDL.model.rnn.CudnnLstmModel"
                print("Too few obserservations, not using cnn kernel")
        else:
            raise Exception("CNN kernel used but daobs option is not obs list")
    elif eval(optModel["name"]) is hydroDL.model.rnn.CNN1dLSTMInmodel:
        # daobsOption = optData['daObs']
        daobsOption = list(range(24))
        if type(daobsOption) is list:
            if len(daobsOption) - 3 >= 7:
                # using 1dcnn only when number of obs larger than 7
                optModel["nobs"] = len(daobsOption)
                convpara = optModel["convNKS"]
                model = rnn.CNN1dLSTMInmodel(
                    nx=optModel["nx"],
                    ny=optModel["ny"],
                    # nobs=optModel['nobs']-3,
                    nobs=24,  # temporary test
                    hiddenSize=optModel["hiddenSize"],
                    nkernel=convpara[0],
                    kernelSize=convpara[1],
                    stride=convpara[2],
                    poolOpt=optModel["poolOpt"],
                )
                print("CNN1d Kernel is used!")
            else:
                if type(x) is tuple:
                    x = np.concatenate([x[0], x[1]], axis=2)
                    nx = x.shape[-1] + c.shape[-1]
                    optModel["nx"] = nx
                    print("Concatenate input and obs, update nx by obs")
                model = rnn.CudnnLstmModel(
                    nx=optModel["nx"],
                    ny=optModel["ny"],
                    hiddenSize=optModel["hiddenSize"],
                )
                optModel["name"] = "hydroDL.model.rnn.CudnnLstmModel"
                print("Too few obserservations, not using cnn kernel")
        else:
            raise Exception("CNN kernel used but daobs option is not obs list")
    elif eval(optModel["name"]) is hydroDL.model.rnn.CNN1dLCmodel:
        # LCrange = optData['lckernel']
        # tLCLst = utils.time.tRange2Array(LCrange)
        if len(x[1].shape) == 2:
            # for LC-FDC
            optModel["nobs"] = x[1].shape[-1]
        elif len(x[1].shape) == 3:
            # for LC-SMAP--get time step
            optModel["nobs"] = x[1].shape[1]
        convpara = optModel["convNKS"]
        model = rnn.CNN1dLCmodel(
            nx=optModel["nx"],
            ny=optModel["ny"],
            nobs=optModel["nobs"],
            hiddenSize=optModel["hiddenSize"],
            nkernel=convpara[0],
            kernelSize=convpara[1],
            stride=convpara[2],
            poolOpt=optModel["poolOpt"],
        )
        print("CNN1d Local calibartion Kernel is used!")
    elif eval(optModel["name"]) is hydroDL.model.rnn.CNN1dLCInmodel:
        LCrange = optData["lckernel"]
        tLCLst = utils.time.tRange2Array(LCrange)
        optModel["nobs"] = x[1].shape[-1]
        convpara = optModel["convNKS"]
        model = rnn.CNN1dLCInmodel(
            nx=optModel["nx"],
            ny=optModel["ny"],
            nobs=optModel["nobs"],
            hiddenSize=optModel["hiddenSize"],
            nkernel=convpara[0],
            kernelSize=convpara[1],
            stride=convpara[2],
            poolOpt=optModel["poolOpt"],
        )
        print("CNN1d Local calibartion Kernel is used!")
    elif eval(optModel["name"]) is hydroDL.model.rnn.CudnnInvLstmModel:
        # optModel['ninv'] = x[1].shape[-1]
        optModel["ninv"] = x[1].shape[-1] + c.shape[-1]  # Test the inv using attributes
        model = rnn.CudnnInvLstmModel(
            nx=optModel["nx"],
            ny=optModel["ny"],
            hiddenSize=optModel["hiddenSize"],
            ninv=optModel["ninv"],
            nfea=optModel["nfea"],
            hiddeninv=optModel["hiddeninv"],
        )
        print("LSTMInv model is used!")
    # train
    if optTrain["saveEpoch"] > optTrain["nEpoch"]:
        optTrain["saveEpoch"] = optTrain["nEpoch"]

    # train model
    writeMasterFile(mDict)
    model = trainModel(
        model,
        x,
        y,
        c,
        lossFun,
        nEpoch=optTrain["nEpoch"],
        miniBatch=optTrain["miniBatch"],
        saveEpoch=optTrain["saveEpoch"],
        saveFolder=out,
    )
Exemplo n.º 6
0
    def forward(self, x):
        x = F.relu(self.hidden1(x))
        x = F.relu(self.hidden2(x))
        x = self.predict(x)
        return x


net = Net(n_feature=x.shape[2], n_hidden=Hid_lay,
          n_output=1)  # define the network
net.to(device, dtype=torch.float32)
print(net)

# define the training model
optimizer = torch.optim.Adadelta(net.parameters())
loss_func = crit.RmseLoss(
    get_length=True)  # this is for regression mean squared loss
logger = {}
logger['rmse_train'] = []
logger['rmse_test'] = []
logger['train_loss'] = []


# define testing
def test(epoch):
    testmse = 0.0
    Nsample = 0
    for step, (batch_x, batch_y) in enumerate(test_loader):
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        out = net(batch_x)
        loss, Nday = loss_func(out, batch_y)
        tempmse = (loss.item()**2) * Nday