matP1[ind1, ic] = modelYC.predict(xT1) if len(ind2) > 0: xT2 = x2[ind2, :] matP1[ind2, ic] = modelYC.predict(xT2) matO1 = wqData.transOut(matP1, statYC, varYC) matO2 = wqData.transOut(matP2, statYC, varYC) errMatL1 = wqData.errBySiteC(matO1, varYC, subset=trainset) errMatL2 = wqData.errBySiteC(matO2, varYC, subset=testset) # box dataBox = list() for k in range(nc): temp = [errMatL1[:, k, 1], errMatL2[:, k, 1]] dataBox.append(temp) fig = figplot.boxPlot(dataBox) fig.show() # auto regression x1 = dataTup1[0] yc1 = dataTup1[3] x2 = dataTup2[0] siteNo = siteNoLst[0] ind1 = infoTrain[infoTrain['siteNo'] == siteNo].index ind2 = infoTest[infoTest['siteNo'] == siteNo].index xT1 = x1[:, ind1, :] ycT1 = yc1[ind1, :] xT2 = x1[:, ind2, :] for ic in range(nc): [xx, yy], iv = utils.rmNan([xT1, ycT1[:, ic]])
dataPlot = [yOut[:, k, :], d1.Y[:, k, :], d2.Y[:, k, :]] cLst = ['red', 'grey', 'black'] fig, axes = figplot.multiTS(DF.t, dataPlot, labelLst=labelLst, cLst=cLst) fig.show() # correlation matrix mat1 = np.ndarray([len(siteNoLst), len(codeSel), 4]) mat2 = np.ndarray([len(siteNoLst), len(codeSel), 4]) for indS, siteNo in enumerate(siteNoLst): for indC, code in enumerate(codeSel): stat = utils.stat.calStat(yOut[:, indS, indC], d2.Y[:, indS, indC]) mat1[indS, indC, 0] = stat['Bias'] mat1[indS, indC, 1] = stat['RMSE'] mat1[indS, indC, 2] = stat['NSE'] mat1[indS, indC, 3] = stat['Corr'] statStrLst = ['Bias', 'RMSE', 'NSE', 'Corr'] dataPlot = list() for k, statStr in enumerate(statStrLst): temp = list() for ic, code in enumerate(codeSel): temp.append(mat1[:, ic, k]) dataPlot.append(temp) fig = figplot.boxPlot(dataPlot, widths=0.5, figsize=(12, 4), label1=statStrLst, label2=codeSel, sharey=False) fig.show()
# plot box labLst1 = [usgs.codePdf.loc[code]['shortName'] + '\n'+code for code in codeLst] # labLst2 = ['WRTDS train', 'WRTDS test', 'LSTM train', 'LSTM test'] labLst2 = ['WRTDS test', 'LSTM test'] dataBox = list() for k in range(len(codeLst)): code = codeLst[k] temp = list() # for i in [2, 3, 0 ,1]: for i in [3, 1]: temp.append(corrMat[:, k, i]) dataBox.append(temp) fig = figplot.boxPlot(dataBox, label1=labLst1, widths=0.5, cLst='br', label2=labLst2, figsize=(12, 4), yRange=[0, 1]) # fig = figplot.boxPlot(dataBox, label1=labLst1, widths=0.5, # label2=labLst2, figsize=(12, 4), sharey=False) fig.show() # p-values testLst = ['p-value'] indLst = [[1, 3]] codeStrLst = ['{} {}'.format( code, usgs.codePdf.loc[code]['shortName']) for code in codeLst] dfS = pd.DataFrame(index=codeStrLst, columns=testLst) for (test, ind) in zip(testLst, indLst): for k, code in enumerate(codeLst): data = [corrMat[:, k, x] for x in ind] [a, b], _ = utils.rmNan(data) # s, p = scipy.stats.ttest_ind(a, b, equal_var=False)
p2, o2 = basins.testModel(outName, testSet, wqData=wqData) errMat1 = wqData.errBySite(p1, subset=trainSet) errMat2 = wqData.errBySite(p2, subset=testSet) pLst1.append(p1) pLst2.append(p2) errMatLst1.append(errMat1) errMatLst2.append(errMat2) codePdf = usgs.codePdf groupLst = codePdf.group.unique().tolist() for group in groupLst: codeLst = codePdf[codePdf.group == group].index.tolist() indLst = [wqData.varC.index(code) for code in codeLst] labLst1 = [codePdf.loc[code]['shortName'] + '\n'+code for code in codeLst] labLst2 = ['opt1-train', 'opt2-train', 'opt1-test', 'opt2-test'] dataBox = list() for ic in indLst: temp = list() for errMat in errMatLst1: temp.append(errMat[:, ic, 1]) for errMat in errMatLst2: temp.append(errMat[:, ic, 1]) dataBox.append(temp) title = 'correlation of {} group on {}'.format(group, yr) figName = 'box_{}_{}'.format(group, yr) fig = figplot.boxPlot(dataBox, label1=labLst1, label2=labLst2) fig.suptitle(title) fig.show() fig.savefig(os.path.join(figFolder, figName))
statStrLst = ['Bias', 'RMSE', 'NSE', 'Corr'] dataPlot = list() labelLst = [ usgs.codePdf.loc[code]['shortName'] + '\n' + code for code in codeLst ] for k, statStr in enumerate(statStrLst): temp = list() for ic, code in enumerate(codeLst): [a, b, c], _ = utils.rmNan([mat1[:, ic, k], mat2[:, ic, k], mat3[:, ic, k]]) temp.append([a, b, c]) sharey = False if statStr in ['Bias', 'RMSE'] else True fig, axes = figplot.boxPlot(temp, widths=0.5, figsize=(12, 4), label2=['LSTM w/ Q', 'LSTM w/o Q', 'WRTDS'], label1=labelLst, sharey=sharey) if statStr == 'Bias': for ax in axes: _ = ax.axhline(0) fig.show() # # DF2 = dbBasin.DataFrameBasin('G400') # labelLst = [usgs.codePdf.loc[code]['shortName'] + code for code in codeLst] # d1 = dbBasin.DataModelBasin(DF2, subset=trainSet, varY=codeLst) # d2 = dbBasin.DataModelBasin(DF2, subset=testSet, varY=codeLst) # k = 60 # dataPlot = [yW[:, k, :], d1.Y[:, k, :], d2.Y[:, k, :]]
errMatC2 = wqData.errBySiteC(ycP2, subset=testset, varC=master['varYC']) if master['varY'] is not None: errMatQ2 = wqData.errBySiteQ(yP2, subset=testset, varQ=master['varY']) # box dataBox = list() for k in range(2): for var in plotVar: if var == '00060': temp = [errMatQ1[:, 0, k], errMatQ2[:, 0, k]] else: ic = master['varYC'].index(var) temp = [errMatC1[:, ic, k], errMatC2[:, ic, k]] dataBox.append(temp) fig = figplot.boxPlot(dataBox, label1=['RMSE', 'Corr'], label2=['train', 'test'], sharey=False) fig.show() # seq test siteNoLst = wqData.info['siteNo'].unique().tolist() basins.testModelSeq(outName, siteNoLst, wqData=wqData) # time series map dfCrd = gageII.readData(varLst=['LAT_GAGE', 'LNG_GAGE'], siteNoLst=siteNoLst) lat = dfCrd['LAT_GAGE'].values lon = dfCrd['LNG_GAGE'].values codePdf = usgs.codePdf def funcMap():
o = wqData.c[ind, ic] for siteNo in dictSite[code[:5]]: iS = siteNoLst.index(siteNo) indS = info[info['siteNo'] == siteNo].index.values rmse, corr = utils.stat.calErr(p[indS], o[indS]) corrMat[iS, iCode, iLab] = corr rmseMat[iS, iCode, iLab] = rmse # plot box labLst1 = [ usgs.codePdf.loc[code]['shortName'] + '\n' + code for code in codeLst ] labLst3 = [lab + '-WRTDS' for lab in labLst2] dataBox = list() for k in range(len(codeLst)): code = codeLst[k] temp = list() for i in range(len(labelLst)): temp.append(corrMat[:, k, i]) dataBox.append(temp) fig = figplot.boxPlot(dataBox, label1=labLst1, widths=0.5, cLst=cLst, label2=labLst3, figsize=(12, 4), yRange=[-1, 1]) # fig = figplot.boxPlot(dataBox, label1=labLst1, widths=0.5, # label2=labLst2, figsize=(12, 4), sharey=False) fig.show()
indS2 = info2[info2['siteNo'] == siteNo].index.values for iC in range(nc): countMat[i, iC, 0] = np.count_nonzero(~np.isnan(ycT1[indS1, iC])) countMat[i, iC, 1] = np.count_nonzero(~np.isnan(ycT2[indS2, iC])) countMatLst = [countMat1, countMat2, countMat1, countMat2] # plot box codePdf = usgs.codePdf codeLst = ['00660', '00665', '00600', '00605', '00618', '71846', '00950'] # codeLst = codePdf[codePdf.group == group].index.tolist() indLst = [wqData.varC.index(code) for code in codeLst] labLst1 = [codePdf.loc[code]['shortName'] + '\n' + code for code in codeLst] labLst2 = [ 'train all test all', 'train all test rmFlag', 'train rmFlag test all', 'train rmFlag test rmFlag' ] dataBox = list() rho = 20 for ic in indLst: temp = list() for errMat, countMat in zip(errMatLst2, countMatLst): ind = np.where((countMat[:, ic, 0] > rho) & (countMat[:, ic, 1] > rho))[0] temp.append(errMat[ind, ic, 1]) # temp.append(errMat[:, ic, 1]) dataBox.append(temp) title = 'test correlation of referenced sites with >{} samples'.format(rho) fig = figplot.boxPlot(dataBox, label1=labLst1, label2=labLst2, figsize=(12, 6)) fig.suptitle(title) fig.show()
# plot box labLst1 = [ usgs.codePdf.loc[code]['shortName'] + '\n' + code for code in codeLst ] labLst2 = ['LSTM vs WRTDS', 'LSTM vs Obs', 'WRTDS vs Obs'] dataBox = list() for k in range(len(codeLst)): code = codeLst[k] temp = list() for i in [0, 1, 2]: temp.append(corrMat[:, k, i]) dataBox.append(temp) fig = figplot.boxPlot(dataBox, label1=labLst1, widths=0.5, cLst='grb', label2=labLst2, figsize=(20, 5), yRange=[0, 1]) fig.show() # plot 121 importlib.reload(axplot) codeLst2 = [ '00095', '00400', '00405', '00600', '00605', '00618', '00660', '00665', '00681', '00915', '00925', '00930', '00935', '00940', '00945', '00950', '00955', '70303', '71846', '80154' ] fig, axes = plt.subplots(5, 4) ticks = [-0.5, 0, 0.5, 1] for k, code in enumerate(codeLst2):
name = nameLst[kk] mat = matLst[kk] yRange = rangeLst[kk] label1 = ecoIdLst label2 = ['Local', 'CONUS'] dataBox = list() for k in range(len(subsetLst)): temp = list() temp.append(mat[0][k]) temp.append(mat[1][k]) dataBox.append(temp) if kk == 0: label2 = ['Local', 'CONUS'] else: label2 = None fig = figplot.boxPlot(dataBox, widths=0.5, cLst='rb', label1=label1, label2=label2, figsize=(12, 4), yRange=yRange) saveFile = os.path.join(saveFolder, 'q_ecoR_{}'.format(name)) fig.savefig(saveFile) fig.show() # # significance test # testLst = ['Q as target', 'Q as input'] # indLst = [[0, 2], [1, 2]] # codeStrLst = ['{} {}'.format( # code, usgs.codePdf.loc[code]['shortName']) for code in codeLst] # dfS = pd.DataFrame(index=codeStrLst, columns=testLst) # for (test, ind) in zip(testLst, indLst): # for k, code in enumerate(codeLst): # data = [corrMat[:, k, x] for x in ind] # [a, b], _ = utils.rmNan(data)
indS = info[info['siteNo'] == siteNo].index.values rmse, corr = utils.stat.calErr(p[indS], o[indS]) corrMat[iS, iLab, iT] = corr # rmseMat[iS, iCode, iT*2] = rmse # # WRTDS # dirWrtds = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-W', 'B10') # # dirWrtds = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS') # file1 = os.path.join(dirWrtds, '{}-{}-corr'.format('B10N5', 'B10N5')) # dfCorr1 = pd.read_csv(file1, dtype={'siteNo': str}).set_index('siteNo') # file2 = os.path.join(dirWrtds, '{}-{}-corr'.format('B10N5', 'A10N5')) # dfCorr2 = pd.read_csv(file2, dtype={'siteNo': str}).set_index('siteNo') # for iCode, code in enumerate(codeLst): # indS = [siteNoLst.index(siteNo) for siteNo in dictSite[code]] # corrMat[indS, iCode, 4] = dfCorr1.iloc[indS][code].values # corrMat[indS, iCode, 5] = dfCorr2.iloc[indS][code].values # plot box dataBox = list() for k in range(len(codeLst)): code = codeLst[k] temp = list() # for i in [2, 3, 0 ,1]: for i in range(len(labelLst)): temp.append(corrMat[:, i, 1]) dataBox.append(temp) fig = figplot.boxPlot(dataBox, widths=0.5, figsize=(12, 4), yRange=[0, 1]) # fig = figplot.boxPlot(dataBox, label1=labLst1, widths=0.5, # label2=labLst2, figsize=(12, 4), sharey=False) fig.show()
y = dfR1['corr'].values x[x < -900] = np.nan # density plot vLst = np.arange(0, 1, 0.1) dataBox = list() labLst = list() for k in range(1, len(vLst)): v1 = vLst[k - 1] v2 = vLst[k] ind = np.where((y >= v1) & (y < v2))[0] if len(ind) > 0: dataBox.append(x[ind]) labLst.append('{:.2f}'.format(v1)) vRange = [np.nanmin(x), np.nanmax(x)] fig = figplot.boxPlot(dataBox, label1=labLst, figsize=(8, 4), widths=0.3) plt.subplots_adjust(wspace=0) fig.show() # cum plot yr, ind = utils.rankData(y) yr = yr[::-1] ind = ind[::-1] xr = np.cumsum(x[ind]) / np.arange(1, len(ind) + 1) fig, ax = plt.subplots(1, 1) ax.plot(yr[10:], xr[10:], '-') fig.show() # 121 fig, ax = plt.subplots(1, 1) ax.plot(x, y, '*')
for subset in subsetLst: testSet = subset outName = '{}-{}-B10-gs'.format(dataName, subset) yP, ycP = basinFull.testModel( outName, DM=dm, batchSize=20, testSet=testSet,reTest=False) yO, ycO = basinFull.getObs(outName, testSet, DM=dm) nash2 = utils.stat.calNash(yP[indT:, :, 0], yO[indT:, :, 0]) rmse2 = utils.stat.calRmse(yP[indT:, :, 0], yO[indT:, :, 0]) corr2 = utils.stat.calCorr(yP[indT:, :, 0], yO[indT:, :, 0]) # nash2 = utils.stat.calNash(yP[:indT, :, 0], yO[:indT, :, 0]) # rmse2 = utils.stat.calRmse(yP[:indT, :, 0], yO[:indT, :, 0]) # corr2 = utils.stat.calCorr(yP[:indT, :, 0], yO[:indT, :, 0]) nashLst2.append(nash2) rmseLst2.append(rmse2) corrLst2.append(corr2) # plot box # matLst = [nashLst1, nashLst2] matLst = [corrLst1, corrLst2] label1 = subsetLst label2 = ['CONUS', 'Local'] dataBox = list() for k in range(len(subsetLst)): temp = list() temp.append(matLst[0][k]) temp.append(matLst[1][k]) dataBox.append(temp) fig = figplot.boxPlot(dataBox, widths=0.5, cLst='brgk', label1=label1, label2=label2, figsize=(6, 4), yRange=[0, 1]) fig.show()
matplotlib.rcParams.update({'font.size': 18}) matplotlib.rcParams.update({'lines.linewidth': 2}) matplotlib.rcParams.update({'lines.markersize': 12}) # # plot box # labLst1 = [usgs.codePdf.loc[code]['shortName'] + # '\n'+code for code in codeLst] label2 = ['train', 'test'] label1 = ['correlation', 'RMSE'] dataBox = list() ic = 0 # dataBox = [[corrLSTM[:, 0], corrLSTM[:, 1],], # [rmseLSTM[:, 0], rmseLSTM[:, 1]]] dataBox = [[corrLSTM[:, 0], corrLSTM[:, 1], corrWRTDS[:, 0], corrWRTDS[:, 1]], [rmseLSTM[:, 0], rmseLSTM[:, 1], rmseWRTDS[:, 0], rmseWRTDS[:, 1]]] fig = figplot.boxPlot(dataBox, widths=0.5, cLst='brgb', label2=label2, label1=label1, figsize=(8, 5), sharey=False) fig.show() # map figM, axM = plt.subplots(1, 1, figsize=(8, 4)) siteNoLstCode = dictSite[code] indS = [siteNoLst.index(siteNo) for siteNo in siteNoLstCode] dfCrd = gageII.readData( varLst=['LAT_GAGE', 'LNG_GAGE'], siteNoLst=siteNoLstCode) lat = dfCrd['LAT_GAGE'].values lon = dfCrd['LNG_GAGE'].values shortName = usgs.codePdf.loc[code]['shortName'] matMap = corrLSTM[indS, 1] axplot.mapPoint(axM, lat, lon, matMap, s=24)
dfCorr1 = pd.read_csv(file1, dtype={'siteNo': str}).set_index('siteNo') file2 = os.path.join(dirWrtds, '{}-{}-corr'.format('B10N5', 'A10N5')) dfCorr2 = pd.read_csv(file2, dtype={'siteNo': str}).set_index('siteNo') for iCode, code in enumerate(codeLst): indS = [siteNoLst.index(siteNo) for siteNo in dictSite[code]] corrWRTDS[indS, iCode, 0] = dfCorr1.iloc[indS][code].values corrWRTDS[indS, iCode, 1] = dfCorr2.iloc[indS][code].values # plot box labLst1 = [ usgs.codePdf.loc[code]['shortName'] + '\n' + code for code in codeLst ] dataBox = list() for k in range(len(codeLst)): code = codeLst[k] temp = list() for i in range(len(labelLst)): temp.append(corrMat[:, k, i]) temp.append(corrWRTDS[:, k, 1]) dataBox.append(temp) fig = figplot.boxPlot(dataBox, label1=labLst1, widths=0.5, cLst=cLst, label2=labLst2 + ['WRTDS'], figsize=(12, 4), yRange=[0, 1]) # fig = figplot.boxPlot(dataBox, label1=labLst1, widths=0.5, # label2=labLst2, figsize=(12, 4), sharey=False) fig.show()
for kk in range(3): name = nameLst[kk] mat = matLst[kk] yRange = rangeLst[kk] label1 = ecoIdLst label2 = ['Local', 'CONUS'] dataBox = list() for k in range(len(subsetLst)): temp = list() temp.append(mat[0][k]) temp.append(mat[1][k]) dataBox.append(temp) fig = figplot.boxPlot(dataBox, widths=0.5, cLst='rb', label1=label1, label2=None, figsize=(12, 4), yRange=yRange) saveFile = os.path.join(saveFolder, 'q_ref_ecoR_{}'.format(name)) fig.savefig(saveFile) fig.savefig(saveFile + '.eps') fig.show() fig = figplot.boxPlot(dataBox, widths=0.5, cLst='rb', label1=label1, label2=['Local', 'CONUS'], legOnly=True) saveFile = os.path.join(saveFolder, 'q_ecoR_legend') fig.savefig(saveFile)
# plot box labLst1 = [ usgs.codePdf.loc[code]['shortName'] + '\n' + code for code in codeLst ] labLst2 = ['LSTM w/o Q', 'LSTM w/ Q', 'WRTDS'] dataBox = list() for k in range(len(codeLst)): code = codeLst[k] temp = list() for i in [0, 1, 2]: temp.append(corrMat[:, k, i]) dataBox.append(temp) fig = figplot.boxPlot(dataBox, label1=labLst1, widths=0.5, cLst='bgr', label2=labLst2, figsize=(20, 5), yRange=[0, 1]) fig.show() # plot box labLst1 = [ usgs.codePdf.loc[code]['shortName'] + '\n' + code for code in codeLst ] labLst2 = ['LSTM w/o Q', 'LSTM w/ Q', 'WRTDS'] dataBox = list() for k in range(len(codeLst)): code = codeLst[k] temp = list() for i in [0, 1, 2]:
saveFolder = r'C:\Users\geofk\work\paper\SMAP-regional' # tempLst = ['080401', '080305', '080304', '090203', '080301', '050301'] tempLst = caseLst rangeLst = [[0, 1], [0.4, 1], [0.0, 1]] for kk in range(3): kk name = nameLst[kk] mat = [matLst[kk][caseLst.index(x)] for x in tempLst] yRange = rangeLst[kk] lab1 = [labLst[caseLst.index(x)] for x in tempLst] if kk == 0: label2 = ['lev II', 'lev I', 'CONUS'] else: label2 = None fig = figplot.boxPlot(mat, widths=0.5, cLst='ygbr', label1=lab1, label2=label2, figsize=(12, 4), yRange=yRange) plt.tight_layout() plt.subplots_adjust(wspace=0, hspace=0) saveFile = os.path.join(saveFolder, 'q_sim_{}'.format(name)) # fig.savefig(saveFile) fig.show() # # another # tempLst = ['090402', '090403'] # rangeLst = [[0, 1], [0.0, 1], [-0.4, 1]] # for kk in range(3): # name = nameLst[kk] # mat = [matLst[kk][caseLst.index(x)] for x in tempLst] # yRange = rangeLst[kk] # lab1 = [labLst[caseLst.index(x)] for x in tempLst]
testLst = ['rmYr5', 'pkR20', 'pkL20', 'pkRT20', 'A10'] df = pd.DataFrame(index=DF.varC, columns=trainLst) aLst = list() bLst = list() for trainSet, testSet in zip(trainLst, testLst): a = DF.extractSubset(DF.c, trainSet) b = DF.extractSubset(DF.c, testSet) aLst.append(a) bLst.append(b) dataBox = list() for code in DF.varC: indC = DF.varC.index(code) temp = list() for trainSet, a, b in zip(trainLst, aLst, bLst): x = ~np.isnan(a[:, :, indC]) y = ~np.isnan(b[:, :, indC]) n1 = np.sum(x, axis=0) n2 = np.sum(y, axis=0) indS = np.where((n1 > 160) & (n2 > 40))[0] temp.append(n2[indS] / n1[indS]) df.at[code, trainSet] = len(indS) temp.append(n1[indS]) dataBox.append(temp) labLst1 = [ '{}\n{}'.format(usgs.codePdf.loc[code]['shortName'], code) for code in DF.varC ] fig, ax = figplot.boxPlot(dataBox, label1=labLst1, figsize=(6, 4)) fig.show()
yP2, ycP2 = basins.testModel(outName, testset, wqData=wqData) errMatC2 = wqData.errBySiteC(ycP2, subset=testset, varC=master['varYC']) if master['varY'] is not None: errMatQ2 = wqData.errBySiteQ(yP2, subset=testset, varQ=master['varY']) # box dataBox = list() for k in range(2): for var in plotVar: if var == '00060': temp = [errMatQ1[:, 0, k], errMatQ2[:, 0, k]] else: ic = master['varYC'].index(var) temp = [errMatC1[:, ic, k], errMatC2[:, ic, k]] dataBox.append(temp) fig = figplot.boxPlot(dataBox, sharey=False) fig.show() # seq test siteNoLst = wqData.info['siteNo'].unique().tolist() basins.testModelSeq(outName, siteNoLst, wqData=wqData) # time series map dfCrd = gageII.readData(varLst=['LAT_GAGE', 'LNG_GAGE'], siteNoLst=siteNoLst) lat = dfCrd['LAT_GAGE'].values lon = dfCrd['LNG_GAGE'].values codePdf = usgs.codePdf def funcMap(): nM = len(plotVar)
cVar = 'STOR_NID_2009' cMat = dfG[cVar].values # cMat = np.log(cMat+1) # cR = [np.nanpercentile(cMat, 10), np.nanpercentile(cMat, 90)] cR = [np.nanmin(cMat), np.nanmax(cMat)] code = '00618' pcLst = np.arange(0, 101, 20) nBox = len(pcLst) - 1 labelLst = list() dataBox = list() for k in range(nBox): temp = list() v1 = np.nanpercentile(cMat, pcLst[k]) v2 = np.nanpercentile(cMat, pcLst[k + 1]) labelLst.append('{:.2f}\n{:.2f}'.format(v1, v2)) if k == 0: ind = np.where((cMat >= v1) & (cMat <= v2))[0] else: ind = np.where((cMat > v1) & (cMat <= v2))[0] ic = codeLst.index(code) temp.append(corrMat[ind, ic, 1]) temp.append(corrMat[ind, ic, 2]) dataBox.append(temp) fig = figplot.boxPlot(dataBox, label1=labelLst, widths=0.5, label2=['LSTM', 'WRTDS'], figsize=(12, 4), yRange=[0, 1]) fig.show()
errMatLst1 = list() errMatLst2 = list() for outName in outLst: master = basins.loadMaster(outName) dataName = master['dataName'] # wqData = waterQuality.DataModelWQ(dataName) # point test yP1, ycP1 = basins.testModel(outName, trainset, wqData=wqData) errMatC1 = wqData.errBySiteC(ycP1, subset=trainset, varC=master['varYC']) yP2, ycP2 = basins.testModel(outName, testset, wqData=wqData) errMatC2 = wqData.errBySiteC(ycP2, subset=testset, varC=master['varYC']) ic = master['varYC'].index(code) errMatLst1.append(errMatC1[:, ic, :]) errMatLst2.append(errMatC2[:, ic, :]) # box for k in range(2): dataBox = list() for errMatLst in [errMatLst1, errMatLst2]: temp = [errMat[:, k] for errMat in errMatLst] dataBox.append(temp) label1 = ['B2000', 'A2000'] label2 = [ 'all C, Q in', 'all C, Q out', 'all C, Q in, messed', 'all C, Q out, messed' ] fig = figplot.boxPlot(dataBox, label1=label1, label2=label2, sharey=True) fig.suptitle('RMSE') if k == 0 else fig.suptitle('Correlation') fig.show()
errLst.append(err) errLstAll.append(errLst) # plot box cLst = 'ygbr' keyLst = ['RMSE', 'Corr'] for key in keyLst: dataBox = list() for errLst in errLstAll: temp = list() for err in errLst: temp.append(err[key]) dataBox.append(temp) fig = figplot.boxPlot(dataBox, label1=caseLabLst, cLst=cLst, figsize=(12, 4), sharey=True) plt.tight_layout() plt.subplots_adjust(wspace=0, hspace=0) fig.show() saveFile = os.path.join(saveFolder, 'sm_sim_{}'.format(key)) fig.savefig(saveFile) fig.savefig(saveFile + '.eps') label2 = ['local', 'local+close', 'local+far', 'local+dissimilar'] fig = figplot.boxPlot(dataBox, label2=label2, cLst=cLst, legOnly=True) saveFile = os.path.join(saveFolder, 'sm_sim_legend') fig.savefig(saveFile) fig.savefig(saveFile + '.eps') fig.show()
import matplotlib.pyplot as plt import pandas as pd import os import json # load WRTDS results dirRoot1 = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS_weekly') dirRoot2 = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS_weekly_rmq') code = '00955' dfRes1 = pd.read_csv(os.path.join(dirRoot1, 'result', code), dtype={ 'siteNo': str}).set_index('siteNo') dfRes2 = pd.read_csv(os.path.join(dirRoot2, 'result', code), dtype={ 'siteNo': str}).set_index('siteNo') dirInv = os.path.join(kPath.dirData, 'USGS', 'inventory') fileSiteNo = os.path.join(dirInv, 'siteNoLst-1979') siteNoLstAll = pd.read_csv(fileSiteNo, header=None, dtype=str)[0].tolist() countMatW = np.load(os.path.join(dirInv, 'matCountWeekly.npy')) codeLst = sorted(usgs.codeLst) ic = codeLst.index(code) ny = 3 count = np.sum(countMatW[:, -ny:, ic], axis=1) nsLst = np.arange(5, 20)*ny dataBox = list() dataBox.append(dfRes1[dfRes1['count']>10]['corr'].values) for j, ns in enumerate(nsLst): dataBox.append(dfRes1[count >= ns]['corr'].values) fig = figplot.boxPlot(dataBox, figsize=(12, 4), yRange=[0, 1]) fig.show()
ind = wqData.subset[testSet] info = wqData.info.iloc[ind].reset_index() ic = wqData.varC.index(code) if len(wqData.c.shape) == 3: p = yP[-1, :, master['varY'].index(code)] o = wqData.c[-1, ind, ic] elif len(wqData.c.shape) == 2: p = ycP[:, master['varYC'].index(code)] o = wqData.c[ind, ic] for siteNo in dictSite[code]: iS = siteNoLst.index(siteNo) indS = info[info['siteNo'] == siteNo].index.values rmse, corr = utils.stat.calErr(p[indS], o[indS]) corrMat[iS, iCode, k] = corr rmseMat[iS, iCode, k] = rmse # plot box labLst1 = [usgs.codePdf.loc[code]['shortName'] + '\n'+code for code in codeLst] dataBox = list() for k in range(len(codeLst)): code = codeLst[k] temp = list() for i in range(len(dataLst)): temp.append(corrMat[:, k, i]) dataBox.append(temp) fig = figplot.boxPlot(dataBox, label1=labLst1, widths=0.5, cLst='rb', label2=['weekly,daily'], figsize=(12, 4), yRange=[0, 1]) fig.show()
[ '00915', '00925', '00935', '00930', '00940', '00945', '00955', '00410', '00405', '00300', '00950', '00440' ]] strLst = ['physical and nutrient variables', 'inorganics variables'] for k in range(2): codeLst = groupLst[k] indLst = [wqData.varC.index(code) for code in codeLst] labLst1 = [ codePdf.loc[code]['shortName'] + '\n' + code for code in codeLst ] labLst2 = ['train LSTM', 'test LSTM', 'train WRTDS', 'test WRTDS'] dataBox = list() for ic in indLst: temp = list() for errMat in [errMatC1, errMatC2, errMatC3, errMatC4]: ind = np.where((countMat[:, ic, 0] > 20) & (countMat[:, ic, 1] > 20))[0] temp.append(errMat[ind, ic, 1]) dataBox.append(temp) fig = figplot.boxPlot(dataBox, label1=labLst1, widths=0.4, label2=labLst2, figsize=(16, 4), yRange=[0, 1]) title = 'correlation of {}'.format(strLst[k]) fig.suptitle(title) fig.show() # fig.savefig(os.path.join(saveDir, 'box_group{}'.format(k)))
yOut[:, :, indC] = yP[:, :, indC] * s + m for indC, code in enumerate(codeLst): indS = [ siteNoLst.index(siteNo) for siteNo in dictSite[code] if siteNo in siteNoLst ] corr = utils.stat.calCorr(yOut[:, indS, indC], d2.Y[:, indS, indC]) corrMat[indS, indC, iEp] = corr # plot labelLst = [ usgs.codePdf.loc[code]['shortName'] + '\n' + code for code in codeLst ] dataBox = list() for ic, code in enumerate(codeLst): temp = list() for iEp, ep in enumerate(epLst): temp.append(corrMat[:, ic, iEp]) dataBox.append(temp) fig, axes = figplot.boxPlot( dataBox, widths=0.5, figsize=(12, 4), label1=labelLst, sharey=True, cLst='rrrrrrrrrr', ) # for ax in axes: # ax.axhline(0) fig.show()