def funcPoint(iP, axP): [axTS, axH1, axH2, axH3, axP1, axP2] = axP siteNo = siteNoLstCode[iP] outName1 = '{}-{}-{}-{}'.format(dataName, 'comb', 'QTFP_C', trainSet) outName2 = '{}-{}-{}-{}'.format(dataName, 'comb', 'QT_C', trainSet) dfL1 = basins.loadSeq(outName1, siteNo) dfL2 = basins.loadSeq(outName2, siteNo) dfW = pd.read_csv(os.path.join(dirWrtds, 'output', siteNo), index_col=None).set_index('date') dfO = waterQuality.readSiteTS(siteNo, codeLst + ['00060'], freq=wqData.freq) dfOD = waterQuality.readSiteTS(siteNo, codeLst + ['00060'], freq='D') t = dfO.index # ts tBar = np.datetime64('2010-01-01') sd = np.datetime64('1980-01-01') legLst = ['LSTM QTFP', 'LSTM QT', 'WRTDS', 'Obs'] axplot.plotTS(axTS, t, [dfL1[code], dfL2[code], dfW[code], dfO[code]], tBar=tBar, sd=sd, styLst='---*', cLst='mrbk', legLst=legLst) corrL = corrMat[indS[iP], iCode, 0] corrW = corrMat[indS[iP], iCode, 1] axplot.titleInner(axTS, 'siteNo {} {:.2f} {:.2f}'.format(siteNo, corrL, corrW)) axTS.legend() # hist axH1.hist(dfOD[code].values, density=True, bins=50) axplot.titleInner(axH1, 'histogram {}'.format(shortName)) axH2.hist(dfOD['00060'].values, density=True, bins=50) axplot.titleInner(axH2, 'histogram {}'.format('Q')) axH3.hist(np.log(dfOD['00060'].values + 1), density=True, bins=50) axplot.titleInner(axH3, 'histogram {}'.format('log Q')) # periodgram freqQ, powerQ, pQ = calPower('00060', dfOD) freqC, powerC, pC = calPower(code, dfOD) axP1.plot(1 / freqQ, powerC, '-*b', label='Periodograms') axP1.plot(1 / freqQ, pQ, '-*r', label='baluev probability') axplot.titleInner(axP1, 'streamflow') axP1.legend() axP2.plot(1 / freqC, powerC, '-*b', label='Periodograms') axP2.plot(1 / freqC, pC, '-*r', label='baluev probability') axplot.titleInner(axP2, shortName) axP2.legend()
def funcPoint(iP, axP): siteNo = siteNoLst[iP] cLst = 'cb' dfO = waterQuality.readSiteTS(siteNo, [code], freq=wqData.freq)[code] yr = pd.DatetimeIndex(dfO.index).year dfO1 = dfO[yr % 2 == 1] dfO2 = dfO[yr % 2 == 0] dfC = pd.DataFrame(index=dfO2.dropna().index) dfC['obs'] = dfO2 for k, label in enumerate(labelLst): outName = '{}-{}-{}-{}'.format(dataName, code, label, trainSet) dfP = basins.loadSeq(outName, siteNo)[code] dfC[label] = dfP axplot.plotTS(axP, dfP.index, dfP.values, styLst='-', cLst=cLst[k]) axplot.plotTS(axP, dfO1.index, dfO1.values, styLst='*', cLst='m') axplot.plotTS(axP, dfO2.index, dfO2.values, styLst='*', cLst='r') axP.legend(labelLst + ['obs train', 'obs test']) titleStr = 'site {}'.format(siteNo) for k, label in enumerate(labelLst): axplot.plotTS(axP, dfC[label].index, dfC[label].values, styLst='*', cLst=cLst[k]) rmse, corr = utils.stat.calErr(dfC[label].values, dfC['obs'].values) titleStr = titleStr + ' corr{}={:.3f}'.format(k, corr) axP.set_title(titleStr)
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfO = waterQuality.readSiteTS(siteNo, ['00060', code], freq=freq) t = dfO.index.values axplot.plotTS(axP[0], t, dfO['00060'].values, styLst='-*', cLst='bgr') axplot.plotTS(axP[1], t, dfO[code].values, styLst='*', cLst='bgr') axP[0].set_title(siteNo)
def loadModel(siteNoLst, outNameLSTM, codeLst): # load all sequence # LSTM dictLSTM = dict() for k, siteNo in enumerate(siteNoLst): print('\t LSTM site {}/{}'.format(k, len(siteNoLst)), end='\r') df = basins.loadSeq(outNameLSTM, siteNo) dictLSTM[siteNo] = df # WRTDS dictWRTDS = dict() dirWRTDS = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-W', 'B10') for k, siteNo in enumerate(siteNoLst): print('\t WRTDS site {}/{}'.format(k, len(siteNoLst)), end='\r') saveFile = os.path.join(dirWRTDS, siteNo) df = pd.read_csv(saveFile, index_col=None).set_index('date') # df = utils.time.datePdf(df) dictWRTDS[siteNo] = df # Observation dictObs = dict() for k, siteNo in enumerate(siteNoLst): print('\t USGS site {}/{}'.format(k, len(siteNoLst)), end='\r') df = waterQuality.readSiteTS(siteNo, varLst=['00060'] + codeLst, freq='W', rmFlag=True) dictObs[siteNo] = df return dictLSTM, dictWRTDS, dictObs,
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfO = waterQuality.readSiteTS(siteNo, [code], freq=wqData.freq)[code] t = dfO.index yr = t.year.values ind1 = (yr <= 2016) & (yr >= 1980) ind2 = yr > 2016 o1 = dfO[ind1].values o2 = dfO[ind2].values t1 = t[ind1] t2 = t[ind2] # LSTM outName = '{}-{}-{}-{}'.format(dataName, 'comb', label, trainSet) dfP = basins.loadSeq(outName, siteNo)[code] # WRTDS fileWrtds = os.path.join(dirWrtds, 'B16', siteNo) dfW = pd.read_csv(fileWrtds, index_col=None).set_index('date')[code] dfW.index = pd.to_datetime(dfW.index) v1 = [dfP[ind1].values, dfW[ind1].values, o1] v2 = [dfP[ind2].values, dfW[ind2].values, o2] axplot.plotTS(axP[0], t1, v1, styLst='--*', cLst='bgr') axplot.plotTS(axP[1], t2, v2, styLst='--*', cLst='bgr') # print corr rmseWRTDS1, corrWRTDS1 = utils.stat.calErr(dfW[ind1].values, o1) rmseLSTM1, corrLSTM1 = utils.stat.calErr(dfP[ind1].values, o1) axP[0].set_title('site {} WRTDS {:.2f} LSTM {:.2f}'.format( siteNo, corrWRTDS1, corrLSTM1)) rmseWRTDS2, corrWRTDS2 = utils.stat.calErr(dfW[ind2].values, o2) rmseLSTM2, corrLSTM2 = utils.stat.calErr(dfP[ind2].values, o2) axP[1].set_title('site {} WRTDS {:.2f} LSTM {:.2f}'.format( siteNo, corrWRTDS2, corrLSTM2))
def funcPoint(iP, axP): siteNo = siteNoLst[iP] print(iP, siteNo) dfO = waterQuality.readSiteTS(siteNo, codeLst + ['00060'], freq='D') t = dfO.index for k, code in enumerate(codeLst): ax = axP[k] axplot.plotTS(ax, t, dfO[code] * dfO['00060'], styLst='*', cLst='k')
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfO = waterQuality.readSiteTS(siteNo, ['runoff', 'pr', code], freq=freq) t = dfO.index.values axplot.plotTS(axP[0], t, dfO['runoff'].values, styLst='-*', cLst='bgr') axplot.plotTS(axP[1], t, dfO['pr'].values, styLst='-*', cLst='bgr') axplot.plotTS(axP[2], t, dfO[code].values, styLst='*', cLst='bgr') r = np.nanmean(dfO['runoff'].values)/np.nanmean(dfO['pr'].values)*365/100 axP[0].set_title('{} {:.3f}'.format(siteNo, r))
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfO = waterQuality.readSiteTS(siteNo, ['00060', code], freq=freq) file1 = os.path.join(dirRoot1, 'output', siteNo) dfP = pd.read_csv(file1, index_col='date') t = dfO.index.values axplot.plotTS(axP[0], t, dfO['00060'].values, styLst='-*', cLst='bgr') axplot.plotTS(axP[1], t, dfP[code].values, styLst='-', cLst='r') axplot.plotTS(axP[1], t, dfO[code].values, styLst='*', cLst='b') axP[0].set_title(siteNo)
def funcPoint(iP, axP): siteNo = siteNoLstCode[iP] outName1 = '{}-{}-{}-{}'.format(dataName, 'comb', 'QTFP_C', trainSet) dfL1 = basins.loadSeq(outName1, siteNo) dfO = waterQuality.readSiteTS(siteNo, [code], freq='W') t = dfO.index # ts tBar = np.datetime64('2010-01-01') sd = np.datetime64('1980-01-01') legLst = ['LSTM', 'Obs'] axplot.plotTS(axP, t, [dfL1[code], dfO[code]], tBar=tBar, sd=sd, styLst='-*', cLst='rk', legLst=legLst) axP.set_title('site {} corr={:.3f}'.format(siteNo, matMap[iP])) axP.legend()
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfP = basins.loadSeq(outName, siteNo)[code] dfO = waterQuality.readSiteTS(siteNo, [code], freq=wqData.freq)[code] yr = pd.DatetimeIndex(dfP.index).year dfO1 = dfO[yr % 2 == 1] dfO2 = dfO[yr % 2 == 0] axplot.plotTS(axP, dfP.index, dfP.values, styLst='-', cLst='b') axplot.plotTS(axP, dfO1.index, dfO1.values, styLst='*', cLst='m') axplot.plotTS(axP, dfO2.index, dfO2.values, styLst='*', cLst='r') axP.legend(['pred', 'obs train', 'obs test']) dfC = pd.DataFrame(index=dfO2.dropna().index) dfC['obs'] = dfO2 dfC['pred'] = dfP rmse, corr = utils.stat.calErr(dfC['pred'].values, dfC['obs'].values) axP.set_title('site {} corr = {:.3f}'.format(siteNo, corr))
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfO = waterQuality.readSiteTS(siteNo, [code], freq='W')[code] t = dfO.index file1 = os.path.join(dirRoot1, 'output', siteNo) file2 = os.path.join(dirRoot2, 'output', siteNo) dfP1 = pd.read_csv(file1, index_col='date')[code] dfP2 = pd.read_csv(file2, index_col='date')[code] v = [dfP1.values, dfP2.values, dfO.values] [v1, v2, o], iv = utils.rmNan([dfP1.values, dfP2.values, dfO.values]) tt = t[iv] styLst = [['-*'] for x in range(3)] axplot.plotTS(axP, tt.values, [v1, v2, o], cLst='rbk') # print corr rmse1, corr1 = utils.stat.calErr(v[0], v[-1]) rmse2, corr2 = utils.stat.calErr(v[1], v[-1]) axP.set_title('site {} WRTDS {:.2f} only T {:.2f}'.format( siteNo, corr1, corr2))
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfO = waterQuality.readSiteTS(siteNo, [code], freq=wqData.freq)[code] t = dfO.index yr = pd.DatetimeIndex(t).year o1 = dfO[yr <= 2016].values o2 = dfO[yr > 2016].values t1 = t[yr <= 2016] t2 = t[yr > 2016] pLst1, pLst2 = (list(), list()) for label in labelLst: outName = '{}-{}-{}-{}'.format(dataName, 'comb', label, trainSet) dfP = basins.loadSeq(outName, siteNo)[code] pLst1.append(dfP[yr <= 2016].values) pLst2.append(dfP[yr > 2016].values) axplot.plotTS(axP[0], t1, pLst1 + [o1], styLst='--*', cLst='bgr') axplot.plotTS(axP[1], t2, pLst2 + [o2], styLst='--*', cLst='bgr') axP[0].set_title(siteNo)
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfP = basins.loadSeq(outName, siteNo)[code] dfO = waterQuality.readSiteTS(siteNo, [code], freq=wqData.freq)[code] t = dfP.index yr = pd.DatetimeIndex(t).year dfO1 = dfO[yr <= 2016] dfO2 = dfO[yr > 2016] dfP1 = dfP[yr <= 2016] dfP2 = dfP[yr > 2016] axplot.plotTS(axP[0], dfP1.index, [dfP1.values, dfO1.values], styLst='-*', cLst='br') axplot.plotTS(axP[1], dfP2.index, [dfP2.values, dfO2.values], styLst='-*', cLst='br') # axP.legend(['pred', 'obs train', 'obs test']) rmse, corr = utils.stat.calErr(dfP1.values, dfO1.values) axP[0].set_title('site {} {:.2f} {:.2f}'.format( siteNo, corr, corrMat[iP, 0])) rmse, corr = utils.stat.calErr(dfP2.values, dfO2.values) axP[1].set_title('site {} {:.2f} {:.2f}'.format( siteNo, corr, corrMat[iP, 1]))
def funcPoint(iP, axP): siteNo = siteNoLst[iP] print(iP, siteNo) dfO = waterQuality.readSiteTS(siteNo, codeLst+['00060'], freq='D') dfW = pd.read_csv(os.path.join(dirWrtds, 'output', siteNo), index_col=None).set_index('date') t = dfO.index for k, code in enumerate(codeLst): ax = axP[k, 0] ax2 = axP[k, 1] axplot.plotTS(ax2, t, dfO['00060'], styLst='-', cLst='b', alpha=0.3) axplot.plotTS(ax, t, dfO[code], styLst='*', cLst='k') axplot.plotTS(ax, t, dfW[code], styLst='-', cLst='r', alpha=0.3) for k, code in enumerate(codeLst): dfTemp = dfO[[code, '00060']].dropna(how='any') ax = axP[k, 2] x = np.log(dfTemp['00060'].values) y = dfTemp[code].values c = dfTemp.index.month.values cs = ax.plot(x, y, 'k-', alpha=0.3) cs = ax.scatter(x, y, c=c) cbar = figP.colorbar(cs, ax=ax, cax=axP[k, 3])
def funcPoint(iP, axP): siteNo = siteNoLstCode[iP] outName1 = '{}-{}-{}-{}'.format(dataName, 'comb', 'QTFP_C', trainSet) dfL1 = basins.loadSeq(outName1, siteNo) dfW = pd.read_csv(os.path.join(dirWrtds, 'output', siteNo), index_col=None).set_index('date') dfO = waterQuality.readSiteTS(siteNo, codeLst+['00060'], freq=wqData.freq) t = dfO.index # ts tBar = np.datetime64('2010-01-01') sd = np.datetime64('1980-01-01') legLst = ['LSTM', 'WRTDS', 'Obs'] axplot.plotTS(axP, t, [dfL1[code], dfW[code], dfO[code]], tBar=tBar, sd=sd, styLst='--*', cLst='rbk', legLst=legLst) corrL = corrMat[indS[iP], iCode, 0] corrW = corrMat[indS[iP], iCode, 1] axP.set_title('{} site {}; LSTM corr={:.2f} WRTDS corr={:.2f}'.format( shortName, siteNo, corrL, corrW)) # axplot.titleInner( # axP, 'siteNo {} {:.2f} {:.2f}'.format(siteNo, corrL, corrW)) axP.legend()
def testModelSeq(outName, siteNoLst, wqData=None, ep=None, returnOut=False, retest=False, sd=np.datetime64('1979-01-01'), ed=np.datetime64('2019-12-31')): # run sequence test for all sites, default to be from first date to last date if type(siteNoLst) is not list: siteNoLst = [siteNoLst] master = loadMaster(outName) if master['crit'] == 'SigmaLoss': doSigma = True else: doSigma = False if ep is None: ep = master['nEpoch'] outDir = nameFolder(outName) sdS = pd.to_datetime(sd).strftime('%Y%m%d') edS = pd.to_datetime(ed).strftime('%Y%m%d') saveDir = os.path.join(outDir, 'seq-{}-{}-ep{}'.format(sdS, edS, ep)) if not os.path.exists(saveDir): os.mkdir(saveDir) siteSaveLst = os.listdir(saveDir) if retest is True: sitePredLst = siteNoLst else: sitePredLst = [ siteNo for siteNo in siteNoLst if siteNo not in siteSaveLst ] if len(sitePredLst) != 0: if wqData is None: wqData = waterQuality.DataModelWQ(master['dataName']) (varX, varXC, varY, varYC) = (master['varX'], master['varXC'], master['varY'], master['varYC']) (statX, statXC, statY, statYC) = loadStat(outName) model = loadModel(outName, ep=ep) tabG = gageII.readData(varLst=varXC, siteNoLst=siteNoLst) tabG = gageII.updateCode(tabG) for siteNo in sitePredLst: if 'DRAIN_SQKM' in varXC: area = tabG[tabG.index == siteNo]['DRAIN_SQKM'].values[0] else: area = None # test model print('testing {} from {} to {}'.format(siteNo, sdS, edS)) freq = wqData.freq dfX = waterQuality.readSiteTS(siteNo, varX, freq=freq, area=area, sd=sd, ed=ed) # dfX = waterQuality.readSiteX( # siteNo, varX, sd=sd, ed=ed, area=area, nFill=5) xA = np.expand_dims(dfX.values, axis=1) xcA = np.expand_dims(tabG.loc[siteNo].values.astype(np.float), axis=0) mtdX = waterQuality.extractVarMtd(varX) x = transform.transInAll(xA, mtdX, statLst=statX) mtdXC = waterQuality.extractVarMtd(varXC) xc = transform.transInAll(xcA, mtdXC, statLst=statXC) [x, xc] = trainTS.dealNaN([x, xc], master['optNaN'][:2]) yOut = trainTS.testModel(model, x, xc) # transfer out nt = len(dfX) ny = len(varY) if varY is not None else 0 nyc = len(varYC) if varYC is not None else 0 if doSigma: yP = np.full([nt, ny + nyc], np.nan) sP = np.full([nt, ny + nyc], np.nan) yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny * 2:2], statY, varY) yP[:, ny:] = wqData.transOut(yOut[:, 0, ny * 2::2], statYC, varYC) sP[:, :ny] = wqData.transOut( np.sqrt(np.exp(yOut[:, 0, 1:ny * 2:2])), statY, varY) sP[:, ny:] = wqData.transOut( np.sqrt(np.exp(yOut[:, 0, ny * 2 + 1::2])), statYC, varYC) else: yP = np.full([nt, ny + nyc], np.nan) yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny], statY, varY) yP[:, ny:] = wqData.transOut(yOut[:, 0, ny:], statYC, varYC) # save output t = dfX.index.values.astype('datetime64[D]') colY = [] if varY is None else varY colYC = [] if varYC is None else varYC dfOut = pd.DataFrame(data=yP, columns=[colY + colYC], index=t) dfOut.index.name = 'date' dfOut = dfOut.reset_index() dfOut.to_csv(os.path.join(saveDir, siteNo), index=False) if doSigma: dfOutS = pd.DataFrame(data=sP, columns=[colY + colYC], index=t) dfOutS.index.name = 'date' dfOutS = dfOut.reset_index() dfOutS.to_csv(os.path.join(saveDir, siteNo + '_sigma'), index=False) # load all csv if returnOut: dictOut = dict() for siteNo in siteNoLst: # print('loading {} from {} to {}'.format(siteNo, sdS, edS)) dfOut = pd.read_csv(os.path.join(saveDir, siteNo)) dictOut[siteNo] = dfOut if doSigma: dfOut = pd.read_csv(os.path.join(saveDir, siteNo + '_sigma')) dictOut[siteNo + '_sigma'] = dfOut return dictOut
import numpy as np import pandas as pd import time import matplotlib.pyplot as plt siteNo = '08195000' code = '00955' freq = 'W' sn = 1 # load data varF = gridMET.varLst+ntn.varLst varC = usgs.varC varQ = usgs.varQ varLst = varF+varC+varQ df = waterQuality.readSiteTS(siteNo, varLst=varLst, freq='W') # training / testing yr = df.index.year.values ind1 = np.where(yr <= 2016)[0] ind2 = np.where(yr > 2016)[0] dfYP = pd.DataFrame(index=df.index, columns=['WRTDS', 'LSTM']) # WRTDS dfX = pd.DataFrame({'date': df.index}).set_index('date') dfX = dfX.join(np.log(df['00060']+sn)).rename( columns={'00060': 'logQ'}) t = yr+dfX.index.dayofyear.values/365 dfX['sinT'] = np.sin(2*np.pi*t) dfX['cosT'] = np.cos(2*np.pi*t) x = dfX.iloc[ind1].values
def func(siteNo, fitAll=True): # prep data print(siteNo) saveName = os.path.join(dirOut, siteNo) if os.path.exists(saveName): return () t0 = time.time() varQ = '00060' varLst = codeLst + [varQ] df = waterQuality.readSiteTS(siteNo, varLst=varLst, freq='W') dfYP = pd.DataFrame(index=df.index, columns=codeLst) dfX = pd.DataFrame({'date': df.index}).set_index('date') dfX = dfX.join(np.log(df[varQ] + sn)).rename(columns={varQ: 'logQ'}) yr = dfX.index.year.values t = yr + dfX.index.dayofyear.values / 365 dfX['sinT'] = np.sin(2 * np.pi * t) dfX['cosT'] = np.cos(2 * np.pi * t) dfX['yr'] = yr dfX['t'] = t xVarLst = ['yr', 'logQ', 'sinT', 'cosT'] # train / test fitCodeLst = list() for code in codeLst: if siteNo in dictSite[code]: fitCodeLst.append(code) for code in fitCodeLst: ind1 = np.where(yr < 2010)[0] ind2 = np.where(yr >= 2010)[0] dfXY = dfX.join(np.log(df[code] + sn)) df1 = dfXY.iloc[ind1].dropna() if fitAll: df2 = dfXY[xVarLst + ['t']].dropna() else: df2 = dfXY.iloc[ind2].dropna() # only fit for observations now n = len(df1) if n == 0: break # calculate weight h = np.array([7, 2, 0.5]) # window [Y Q S] from EGRET tLst = df2.index.tolist() for t in tLst: dY = np.abs((df2.loc[t]['t'] - df1['t']).values) dQ = np.abs((df2.loc[t]['logQ'] - df1['logQ']).values) dS = np.min(np.stack( [abs(np.ceil(dY) - dY), abs(dY - np.floor(dY))]), axis=0) d = np.stack([dY, dQ, dS]) if n > 100: hh = np.repeat(h[:, None], n, axis=1) bW = False while ~bW: bW = np.min(np.sum((hh - d) > 0, axis=1)) > 100 hh = hh * 1.1 if not bW else hh else: htemp = np.max(d, axis=1) * 1.1 hh = np.repeat(htemp[:, None], n, axis=1) w = (1 - (d / hh)**3)**3 w[w < 0] = 0 wAll = w[0] * w[1] * w[2] ind = np.where(wAll > 0)[0] ww = wAll[ind] # fit WLS Y = df1.iloc[ind][code].values X = df1.iloc[ind][xVarLst].values model = sm.WLS(Y, X, weights=ww).fit() xp = df2.loc[t][xVarLst].values yp = model.predict(xp)[0] dfYP.loc[t][code] = np.exp(yp) - sn t1 = time.time() print(siteNoLst.index(siteNo), siteNo, code, t1 - t0) saveName = os.path.join(dirOut, siteNo) dfYP.to_csv(saveName) return
p = yP[-1, :, master['varY'].index(code)] o = wqData.c[-1, ind, ic] elif len(wqData.c.shape) == 2: p = ycP[:, master['varYC'].index(code)] o = wqData.c[ind, ic] for siteNo in dictSite[code]: iS = siteNoLst.index(siteNo) indS = info[info['siteNo'] == siteNo].index.values rmse, corr = utils.stat.calErr(p[indS], o[indS]) corrMat[iS, iCode, iT] = corr rmseMat[iS, iCode, iT] = rmse # seq test for iS, siteNo in enumerate(siteNoLst): dfP = basins.loadSeq(outName, siteNo) dfO = waterQuality.readSiteTS(siteNo, codeLst, freq=wqData.freq) yr = pd.DatetimeIndex(dfP.index).year for iC, code in enumerate(codeLst): if siteNo in dictSite[code]: o1 = dfO[code].values[(yr <= 2016) & (yr >= 1980)] p1 = dfP[code].values[(yr <= 2016) & (yr >= 1980)] o2 = dfO[code].values[yr > 2016] p2 = dfP[code].values[yr > 2016] rmse1, corr1 = utils.stat.calErr(p1, o1) rmse2, corr2 = utils.stat.calErr(p2, o2) corrMat[iS, iC, 2] = corr1 corrMat[iS, iC, 3] = corr2 rmseMat[iS, iC, 2] = rmse1 rmseMat[iS, iC, 3] = rmse2 # plot box
dictLSTMLst.append(dictLSTM) # WRTDS dictWRTDS = dict() dirWRTDS = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-W', 'B10', 'output') for k, siteNo in enumerate(siteNoLst): print('\t site {}/{}'.format(k, len(siteNoLst)), end='\r') saveFile = os.path.join(dirWRTDS, siteNo) df = pd.read_csv(saveFile, index_col=None).set_index('date') # df = utils.time.datePdf(df) dictWRTDS[siteNo] = df # Observation dictObs = dict() for k, siteNo in enumerate(siteNoLst): print('\t site {}/{}'.format(k, len(siteNoLst)), end='\r') df = waterQuality.readSiteTS( siteNo, varLst=['00060']+codeLst, freq='W') dictObs[siteNo] = df # calculate correlation tt = np.datetime64('2010-01-01') t0 = np.datetime64('1980-01-01') indT1 = np.where((df.index.values < tt) & (df.index.values >= t0))[0] indT2 = np.where(df.index.values >= tt)[0] dictLSTM = dictLSTMLst[0] corrMat = np.full([len(siteNoLst), len(codeLst), 3], np.nan) rmseMat = np.full([len(siteNoLst), len(codeLst), 3], np.nan) for ic, code in enumerate(codeLst): for siteNo in dictSite[code]: indS = siteNoLst.index(siteNo) v1 = dictLSTM[siteNo][code].iloc[indT2].values v2 = dictWRTDS[siteNo][code].iloc[indT2].values
import pandas as pd import time import matplotlib.pyplot as plt from hydroDL.new.model import flowPath siteNo = '07060710' code = '00955' freq = 'D' sn = 1 # load data varF = gridMET.varLst+ntn.varLst varC = usgs.varC varQ = usgs.varQ varLst = varF+varC+varQ df = waterQuality.readSiteTS(siteNo, varLst=varLst) # plot data fig, axes = plt.subplots(3, 1, figsize=(16, 6)) axplot.plotTS(axes[0], df.index, df['runoff'].values, styLst='-*', cLst='bgr') axplot.plotTS(axes[1], df.index, df['pr'].values, styLst='-*', cLst='bgr') axplot.plotTS(axes[2], df.index, df[code].values, styLst='*', cLst='bgr') fig.show() # training / testing yrTrain = [2000, 2005] yr = df.index.year.values indTrain = np.where((yr >= yrTrain[0]) & (yr < yrTrain[1]))[0] # data # varX = varF
# outName = 'sbWT-00945-ntnS-00945-Y1' outName = 'sbWT-00945-plain-00945-Y1' dataName = 'sbWT' wqData = waterQuality.DataModelWQ(dataName) code = '00945' siteNoLst = dictSite[code] ep = None retest = True basins.testModelSeq(outName, siteNoLst, wqData=wqData) rmseMat = np.ndarray([len(siteNoLst), 2]) corrMat = np.ndarray([len(siteNoLst), 2]) for k, siteNo in enumerate(siteNoLst): dfP = basins.loadSeq(outName, siteNo) dfO = waterQuality.readSiteTS(siteNo, dfP.columns.tolist(), freq=wqData.freq) codeLst = dfP.columns.tolist() codeLst = ['00945'] sd = np.datetime64('1980-01-01') ed = np.datetime64('2020-12-31') dfP = dfP[dfP.index >= sd] dfO = dfO[dfO.index >= sd] yr = pd.DatetimeIndex(dfP.index).year dfP1 = dfP[yr % 2 == 1] dfO1 = dfO[yr % 2 == 1] dfP2 = dfP[yr % 2 == 0] dfO2 = dfO[yr % 2 == 0] rmse1, corr1 = utils.stat.calErr(dfP1[code].values, dfO1[code].values) rmse2, corr2 = utils.stat.calErr(dfP2[code].values, dfO2[code].values) rmseMat[k, :] = [rmse1, rmse2]
dirWRTDS = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-W', 'B10') dirOut = os.path.join(dirWRTDS, 'output') dirPar = os.path.join(dirWRTDS, 'params') dfCorr1 = df.copy() dfCorr2 = df.copy() dfRmse1 = df.copy() dfRmse2 = df.copy() t0 = time.time() for kk, siteNo in enumerate(siteNoLst): print('{}/{} {:.2f}'.format(kk, len(siteNoLst), time.time() - t0)) saveFile = os.path.join(dirOut, siteNo) dfP = pd.read_csv(saveFile, index_col=None).set_index('date') dfP.index = pd.to_datetime(dfP.index) dfC = waterQuality.readSiteTS(siteNo, varLst=usgs.newC, freq='W') yr = dfC.index.year.values for code in usgs.newC: ind1 = np.where(yr < 2010)[0] ind2 = np.where(yr >= 2010)[0] rmse1, corr1 = utils.stat.calErr(dfP.iloc[ind1][code].values, dfC.iloc[ind1][code].values) rmse2, corr2 = utils.stat.calErr(dfP.iloc[ind2][code].values, dfC.iloc[ind2][code].values) dfCorr1.loc[siteNo][code] = corr1 dfRmse1.loc[siteNo][code] = rmse1 dfCorr2.loc[siteNo][code] = corr2 dfRmse2.loc[siteNo][code] = rmse2 dfCorr1.to_csv(os.path.join(dirWRTDS, '{}-{}-corr'.format(trainSet, trainSet))) dfRmse1.to_csv(os.path.join(dirWRTDS, '{}-{}-rmse'.format(trainSet, trainSet)))
dictLSTM[siteNo] = df dictLSTMLst.append(dictLSTM) # WRTDS dictWRTDS = dict() dirWRTDS = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-W', 'B10', 'output') for k, siteNo in enumerate(siteNoLst): print('\t site {}/{}'.format(k, len(siteNoLst)), end='\r') saveFile = os.path.join(dirWRTDS, siteNo) df = pd.read_csv(saveFile, index_col=None).set_index('date') # df = utils.time.datePdf(df) dictWRTDS[siteNo] = df # Observation dictObs = dict() for k, siteNo in enumerate(siteNoLst): print('\t site {}/{}'.format(k, len(siteNoLst)), end='\r') df = waterQuality.readSiteTS(siteNo, varLst=codeLst, freq='W') dictObs[siteNo] = df # calculate correlation tt = np.datetime64('2010-01-01') ind1 = np.where(df.index.values < tt)[0] ind2 = np.where(df.index.values >= tt)[0] dictLSTM = dictLSTMLst[1] dictLSTM2 = dictLSTMLst[0] corrMat = np.full([len(siteNoLst), len(codeLst), 4], np.nan) rmseMat = np.full([len(siteNoLst), len(codeLst), 4], np.nan) for ic, code in enumerate(codeLst): for siteNo in dictSite[code]: indS = siteNoLst.index(siteNo) v1 = dictLSTM[siteNo][code].iloc[ind2].values v2 = dictWRTDS[siteNo][code].iloc[ind2].values
dirOut = os.path.join(dirRoot, 'output') dirPar = os.path.join(dirRoot, 'params') for folder in [dirRoot, dirOut, dirPar]: if not os.path.exists(folder): os.mkdir(folder) colLst = ['count', 'pSinT', 'pCosT', 'b'] dfPar = pd.DataFrame(index=siteNoLst, columns=colLst) for kk, siteNo in enumerate(siteNoLst): print('{}/{} {:.2f}'.format( kk, len(siteNoLst), time.time()-t0)) saveName = os.path.join(dirOut, siteNo) # if os.path.exists(saveName): # continue df = waterQuality.readSiteTS(siteNo, varLst=['00060'], freq='D') dfX = pd.DataFrame({'date': df.index}).set_index('date') yr = dfX.index.year.values t = yr+dfX.index.dayofyear.values/365 dfX['sinT'] = np.sin(2*np.pi*t) dfX['cosT'] = np.cos(2*np.pi*t) x = dfX.values y = np.log(df['00060'].values+sn) [xx, yy], iv = utils.rmNan([x, y]) if len(xx) > 0: lrModel = LinearRegression() lrModel = lrModel.fit(xx, yy) yp = lrModel.predict(dfX.values) # yp = np.exp(yp)-sn dfYP = pd.DataFrame(index=df.index, columns=[ '00060'], data=np.exp(yp)-1)
dictS = dict() dirS = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-DS', 'All', 'output') dictQ = dict() dirQ = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-DQ', 'All', 'output') for dirTemp, dictTemp in zip([dirL, dirS, dirQ], [dictL, dictS, dictQ]): for k, siteNo in enumerate(siteNoLst): print('\t WRTDS site {}/{}'.format(k, len(siteNoLst)), end='\r') saveFile = os.path.join(dirTemp, siteNo) df = pd.read_csv(saveFile, index_col=None).set_index('date') dictTemp[siteNo] = df dictObs = dict() for k, siteNo in enumerate(siteNoLst): print('\t USGS site {}/{}'.format(k, len(siteNoLst)), end='\r') df = waterQuality.readSiteTS(siteNo, varLst=['00060'] + codeLst, freq='D', rmFlag=True) dictObs[siteNo] = df # calculate rsq rMat = np.full([len(siteNoLst), len(codeLst), 2], np.nan) for ic, code in enumerate(codeLst): for siteNo in dictSite[code]: indS = siteNoLst.index(siteNo) v1 = dictL[siteNo][code].values v2 = dictS[siteNo][code].values v0 = dictObs[siteNo][code].values (vv0, vv1, vv2), indV = utils.rmNan([v0, v1, v2]) rmse1, corr1 = utils.stat.calErr(vv1, vv0) rmse2, corr2 = utils.stat.calErr(vv2, vv0) rMat[indS, ic, 0] = corr1**2
fig.show() # prcp t = np.arange('2000-01-01', '2005-01-01', dtype='datetime64[D]') x = (t - np.datetime64('1990-01-01')).astype(np.float) p = 10 * np.cos(x*2*np.pi/365) +\ 10 * np.cos((x+120)*np.pi/365*4) p[p < 0] = 0 fig, ax = plt.subplots(1, 1, figsize=(12, 6)) ax.plot(t, p) fig.show() # # prcp - real world siteNo = '401733105392404' code = '00955' dfO = waterQuality.readSiteTS(siteNo, ['runoff', 'pr', code]) t = dfO.index.values p = dfO['pr'].values q = dfO['runoff'].values fig, axes = plt.subplots(3, 1, figsize=(12, 6)) axes[0].plot(t, p) axes[1].plot(t, dfO['runoff'].values) axes[2].plot(t, dfO[code].values, '*') fig.show() fig, ax = plt.subplots(1, 1, figsize=(6, 4)) ax.plot(dfO['runoff'].values, dfO[code].values, '*') fig.show() # calculate concentration curve nf = len(kLst) rho = 365
import pandas as pd from sklearn.linear_model import LinearRegression siteNo = '01545600' code = '00955' dataName = 'nbW' labelLst = ['QF_C', 'QFP_C'] trainSet = '{}-B16'.format('comb') # WRTDS varF = gridMET.varLst varP = ntn.varLst[2:3] varQ = '00060' varLst = ['00060', '00955']+varF+varP varX = varF+varP df = waterQuality.readSiteTS(siteNo, varLst=varLst, freq='W') dfX = pd.DataFrame({'date': df.index}).set_index('date') sn = 1 dfX = dfX.join(np.log(df[varQ]+sn)).rename( columns={varQ: 'logQ'}) dfX = dfX.join(df[varP]) yr = dfX.index.year.values t = yr+dfX.index.dayofyear.values/365 dfX['sinT'] = np.sin(2*np.pi*t) dfX['cosT'] = np.cos(2*np.pi*t) ind = np.where(yr < 2010)[0] dfYP = pd.DataFrame(index=df.index, columns=[code]) dfYP.index.name = 'date' dfXN = (dfX-dfX.min())/(dfX.max()-dfX.min()) # dfXN = dfX x = dfXN.iloc[ind].values
from hydroDL.app import waterQuality from hydroDL.data import usgs import numpy as np import pandas as pd from hydroDL.post import axplot, figplot import matplotlib.pyplot as plt siteNo = '09163500' varC = ['00660', '00618'] sd = np.datetime64('1979-01-01') ed = np.datetime64('2019-12-31') df = waterQuality.readSiteTS(siteNo, varLst=['00060'] + varC) dfC, dfCF = usgs.readSample(siteNo, codeLst=varC, startDate=sd, flag=2) # fig, axes = plt.subplots(2, 1) for k, code in enumerate(varC): v = dfC[code].values f = dfCF[code + '_cd'].values t = dfC.index.values indF = np.where(f == 1)[0] axplot.plotTS(axes[k], t, v, cLst='r', styLst=['-*']) axplot.plotTS(axes[k], t[indF], v[indF], cLst='b', styLst='*') fig.show()
from hydroDL import kPath, utils from hydroDL.app import waterQuality from hydroDL.master import basins from hydroDL.data import usgs, gageII, gridMET, ntn, transform from hydroDL.master import slurm from hydroDL.post import axplot, figplot import numpy as np import matplotlib.pyplot as plt code = '00660' siteNo = '01111500' df = waterQuality.readSiteTS(siteNo, [code], freq='D').dropna() dfC, dfCF = usgs.readSample(siteNo, codeLst=[code], flag=2) dfC = dfC.resample('W-TUE').mean() dfCF = dfCF.fillna(0) dfCFW = dfCF.resample('W-TUE').mean() dfCFW = dfCFW.fillna(0) dfCFW[dfCFW != 0] = 1 fig, ax = plt.subplots(1, 1, figsize=(12, 4)) t = dfC.index v = dfC[code].values flag = dfCFW[code+'_cd'].values ax.plot(t[flag == 0], v[flag == 0], 'r*') ax.plot(t[flag != 0], v[flag != 0], 'k*') fig.show()