wqData = waterQuality.DataModelWQ('HBN') figFolder = os.path.join(kPath.dirWQ, 'HBN', 'years') # compare of opt1-4 yrLst = ['80s', '90s', '00s', '10s'] for yr in yrLst: outLst = ['HBN-{}-rm-opt1'.format(yr), 'HBN-{}-rm-opt2'.format(yr)] trainSet = '{}-rm'.format(yr) testSet = yr # outLst = ['HBN-opt1', 'HBN-opt2', # 'HBN-opt3', 'HBN-opt4'] # trainSet = 'first80' # testSet = 'last20' pLst1, pLst2, errMatLst1, errMatLst2 = [list() for x in range(4)] for outName in outLst: p1, o1 = basins.testModel(outName, trainSet, wqData=wqData) p2, o2 = basins.testModel(outName, testSet, wqData=wqData) errMat1 = wqData.errBySite(p1, subset=trainSet) errMat2 = wqData.errBySite(p2, subset=testSet) pLst1.append(p1) pLst2.append(p2) errMatLst1.append(errMat1) errMatLst2.append(errMat2) codePdf = usgs.codePdf groupLst = codePdf.group.unique().tolist() for group in groupLst: codeLst = codePdf[codePdf.group == group].index.tolist() indLst = [wqData.varC.index(code) for code in codeLst] labLst1 = [codePdf.loc[code]['shortName'] + '\n'+code for code in codeLst]
import os import json import pandas as pd import numpy as np import matplotlib.pyplot as plt wqData = waterQuality.DataModelWQ('basinRef', rmFlag=True) outName = 'basinRef-Yeven-opt2' trainSet = 'Yeven' testSet = 'Yodd' siteNoLst = wqData.info['siteNo'].unique().tolist() master = basins.loadMaster(outName) ep = 300 yP1, ycP1 = basins.testModel(outName, trainSet, wqData=wqData, ep=ep) yP2, ycP2 = basins.testModel(outName, testSet, wqData=wqData, ep=ep) errMatC1 = wqData.errBySiteC(ycP1, varC=master['varYC'], subset=trainSet, rmExt=True) errMatC2 = wqData.errBySiteC(ycP2, varC=master['varYC'], subset=testSet, rmExt=True) dirWrtds = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-F') dfCorr1 = pd.read_csv(os.path.join(dirWrtds, '{}-{}-corr'.format(trainSet, trainSet)), index_col=0) dfCorr2 = pd.read_csv(os.path.join(dirWrtds,
siteNoLst = wqData.info.siteNo.unique() nSite = len(siteNoLst) # single corrMat = np.full([nSite, len(codeLst), 4], np.nan) rmseMat = np.full([nSite, len(codeLst), 4], np.nan) for iCode, code in enumerate(codeLst): trainSet = '{}-Y1'.format(code) testSet = '{}-Y2'.format(code) outName = '{}-{}-{}-{}'.format(dataName, code, 'ntnS', trainSet) master = basins.loadMaster(outName) ic = wqData.varC.index(code) for iT, subset in enumerate([trainSet, testSet]): yP, ycP = basins.testModel(outName, subset, wqData=wqData, ep=ep, reTest=reTest) ind = wqData.subset[subset] info = wqData.info.iloc[ind].reset_index() o = wqData.c[-1, ind, ic] p = yP[-1, :, 1] for iS, siteNo in enumerate(siteNoLst): sd = np.datetime64('1980-01-01') infoS = info[info['siteNo'] == siteNo] indS = infoS[infoS['date'] >= sd].index.values rmse, corr = utils.stat.calErr(p[indS], o[indS]) corrMat[iS, iCode, iT] = corr rmseMat[iS, iCode, iT] = rmse # plot box
dataName = 'chloride' wqData = waterQuality.DataModelWQ(dataName) outLst = ['chloride-Yodd-ntn', 'chloride-Yodd'] # outLst = ['sulfateNE-Yodd-ntn-silica', 'sulfateNE-Yodd-silica'] trainSet = 'Yodd' testSet = 'Yeven' # outLst = ['sulfateNE-Yeven-ntn', 'sulfateNE-Yeven'] # trainSet = 'Yeven' # testSet = 'Yodd' errMatLst1, errMatLst2, ypLst1, ypLst2 = [list() for x in range(4)] for outName in outLst: master = basins.loadMaster(outName) yP1, ycP1 = basins.testModel(outName, trainSet, wqData=wqData, ep=100, reTest=True) yP2, ycP2 = basins.testModel(outName, testSet, wqData=wqData, ep=100, reTest=True) ypLst1.append(ycP1) ypLst2.append(ycP2) ypLst1[1][np.isnan(ypLst1[0])] = np.nan ypLst2[1][np.isnan(ypLst2[0])] = np.nan for k in range(2): errMatC1 = wqData.errBySiteC(ypLst1[k], subset=trainSet,
import torch import os import json import numpy as np import matplotlib.pyplot as plt wqData = waterQuality.DataModelWQ('HBN') figFolder = os.path.join(kPath.dirWQ, 'HBN') codeLst = ['00955', '00915', '00405'] outLst = ['HBN-first50-{}'.format(x) for x in codeLst] trainSet = 'first50' testSet = 'last50' outName = 'HBN-first50-opt1' p1, o1 = basins.testModel(outName, trainSet) p2, o2 = basins.testModel(outName, testSet) errMat1 = wqData.errBySite(p1, subset=trainSet) errMat2 = wqData.errBySite(p2, subset=testSet) dataBox1 = list() dataBox2 = list() for code in codeLst: outName = 'HBN-first50-{}'.format(code) p1, o1 = basins.testModel(outName, trainSet) p2, o2 = basins.testModel(outName, testSet) varC = [code] err1 = wqData.errBySite(p1, subset=trainSet, varC=varC) err2 = wqData.errBySite(p2, subset=testSet, varC=varC) temp = list() ic = wqData.varC.index(code) temp.append(errMat2[:, ic, 1])
outLst = [ 'Silica64-Y8090-opt1', 'Silica64-Y8090-opt2', 'Silica64Mess-Y8090-opt1', 'Silica64Mess-Y8090-opt2' ] code = '00955' trainset = 'Y8090' testset = 'Y0010' errMatLst1 = list() errMatLst2 = list() for outName in outLst: master = basins.loadMaster(outName) dataName = master['dataName'] # wqData = waterQuality.DataModelWQ(dataName) # point test yP1, ycP1 = basins.testModel(outName, trainset, wqData=wqData) errMatC1 = wqData.errBySiteC(ycP1, subset=trainset, varC=master['varYC']) yP2, ycP2 = basins.testModel(outName, testset, wqData=wqData) errMatC2 = wqData.errBySiteC(ycP2, subset=testset, varC=master['varYC']) ic = master['varYC'].index(code) errMatLst1.append(errMatC1[:, ic, :]) errMatLst2.append(errMatC2[:, ic, :]) # box for k in range(2): dataBox = list() for errMatLst in [errMatLst1, errMatLst2]: temp = [errMat[:, k] for errMat in errMatLst] dataBox.append(temp) label1 = ['B2000', 'A2000'] label2 = [
import os import json import numpy as np import matplotlib.pyplot as plt outLst = ['basinRef-Y8090-opt1', 'basinRef-Y8090-rmF-opt1'] trainSet = 'Y8090' testSet = 'Y0010' errMatLst1 = list() errMatLst2 = list() wqData1 = waterQuality.DataModelWQ('basinRef') wqData2 = waterQuality.DataModelWQ('basinRef', rmFlag=True) for outName in outLst: master = basins.loadMaster(outName) yP1, ycP1 = basins.testModel(outName, trainSet) yP2, ycP2 = basins.testModel(outName, testSet) for wqData in [wqData1, wqData2]: errMatC1 = wqData.errBySiteC(ycP1, subset=trainSet, varC=master['varYC']) errMatC2 = wqData.errBySiteC(ycP2, subset=testSet, varC=master['varYC']) errMatLst1.append(errMatC1) errMatLst2.append(errMatC2) # figure out number of sample siteNoLst = wqData1.info['siteNo'].unique().tolist() nc = ycP1.shape[1] countMat1 = np.full([len(siteNoLst), nc, 2], 0)
dfO1 = dfO[yr % 2 == 1] dfP2 = dfP[yr % 2 == 0] dfO2 = dfO[yr % 2 == 0] rmse1, corr1 = utils.stat.calErr(dfP1[code].values, dfO1[code].values) rmse2, corr2 = utils.stat.calErr(dfP2[code].values, dfO2[code].values) rmseMat[k, :] = [rmse1, rmse2] corrMat[k, :] = [corr1, corr2] rmseMat2 = np.ndarray([len(siteNoLst), 2]) corrMat2 = np.ndarray([len(siteNoLst), 2]) trainSet = '{}-Y1'.format(code) testSet = '{}-Y2'.format(code) master = basins.loadMaster(outName) ic = wqData.varC.index(code) for iT, subset in enumerate([trainSet, testSet]): yP, ycP = basins.testModel(outName, subset, wqData=wqData) ind = wqData.subset[subset] info = wqData.info.iloc[ind].reset_index() if dataName == 'sbWT': o = wqData.c[-1, ind, ic] p = yP[-1, :, 1] elif dataName == 'sbW': o = wqData.c[ind, ic] p = ycP[:, 0] for iS, siteNo in enumerate(siteNoLst): indS = info[info['siteNo'] == siteNo].index.values if len(indS) > 0: [a, b], indV = utils.rmNan([o[indS], p[indS]]) corr = np.corrcoef(a, b)[0, 1] rmse = np.sqrt(np.nanmean((a - b)**2)) corrMat2[iS, iT] = corr
varT = ['sinT', 'cosT'] varF = gridMET.varLst varX = varQ + varF + varP varXC = gageII.varLst varY = varQ varYC = usgs.newC saveName = 'test' trainSet = 'comb-A10' outName = basins.wrapMaster(dataName=dataName, trainName=trainSet, batchSize=[None, 500], outName=saveName, varX=varX, varY=varY, varYC=varYC, crit='SigmaLoss', nEpoch=10, saveEpoch=10) wqData = waterQuality.DataModelWQ('test') basins.trainModelTS(outName) importlib.reload(basins) yp, sp, ycp, scp = basins.testModel(outName, trainSet, wqData=wqData, ep=10, reTest=True)
import torch import os import json import numpy as np import matplotlib.pyplot as plt wqData = waterQuality.DataModelWQ('HBN') figFolder = os.path.join(kPath.dirWQ, 'HBN') # compare of opt1-4 outLst = ['HBN-Y8090-opt1', 'HBN-Y8090-opt2'] trainSet = 'Y8090' testSet = 'Y0010' errMatLst = list() for outName in outLst: yp1, ycp1 = basins.testModel(outName, trainSet, wqData=wqData) yp2, ycp2 = basins.testModel(outName, testSet, wqData=wqData) errMat1 = wqData.errBySiteC(ycp1, wqData.varC, subset=trainSet) errMat2 = wqData.errBySiteC(ycp2, wqData.varC, subset=testSet) errMatLst.append(errMat1) errMatLst.append(errMat2) codePdf = usgs.codePdf groupLst = codePdf.group.unique().tolist() for group in groupLst: codeLst = codePdf[codePdf.group == group].index.tolist() indLst = [wqData.varC.index(code) for code in codeLst] labLst1 = [codePdf.loc[code]['shortName'] + '\n'+code for code in codeLst] labLst2 = ['train opt1','test opt1','train opt2', 'test opt2'] dataBox = list()
wqData = waterQuality.DataModelWQ('HBN') doLst = list() doLst.append('subset') dataName = 'HBN' # outLst = ['HBN-00618-00955-all-Y8090-opt2', 'HBN-00618-00955-all-Y8090-opt4'] # testset = '00618-00955-all-Y0010' outLst = ['HBN-Y8090-opt1', 'HBN-Y8090-opt4'] testset = 'Y0010' siteNoLst = wqData.info['siteNo'].unique().tolist() errMatLst = list() for out in outLst: basins.testModelSeq(out, siteNoLst, wqData=wqData) yP2, ycP2 = basins.testModel(out, testset, wqData=wqData) errMat = wqData.errBySiteQ(yP2, ['00060'], subset=testset) errMatLst.append(errMat) # # calculate error - adhoc # siteNo = siteNoLst[0] # tB = np.datetime64('2000-01-01') # dfPred1, dfObs1 = basins.loadSeq(outLst[0], siteNo) # a1 = dfPred1['00060'] # dfPred2, dfObs2 = basins.loadSeq(outLst[1], siteNo) # b = dfPred2['00060'] # obs = dfObs1['00060'] a=errMatLst[0][:,0,1]