def metrics(yp, yt, keyLst, var_s, output_path="./"): statErr = statError(yp, yt) statDictLst = [statErr] dataBox = list() for iS in range(len(keyLst)): statStr = keyLst[iS] temp = list() for k in range(len(statDictLst)): data = statDictLst[k][statStr] data = data[~np.isnan(data)] temp.append(data) dataBox.append(temp) median_dict = plotBoxFigMulti(dataBox, label1=keyLst, sharey=False, figsize=(12, 5), path_fig=output_path) return median_dict
temp = 1 else: if temp != 0: temp = temp + 1 ind = np.random.randint(0, ngrid) print(np.array([maskObs[ind, :], maskDay[ind, :]])) maskObsDay = maskObs * maskDay unique, counts = np.unique(maskDay, return_counts=True) print(np.asarray((unique, counts)).T) print(counts / ngrid / nt) fLst = [1, 2, 3] statLstF = list() statLstP = list() maskF = (maskDay >= 1) & (maskDay <= 3) statP = stat.statError(utils.fillNan(yp, maskF), utils.fillNan(obs, maskF)) statF = stat.statError(utils.fillNan(yf, maskF), utils.fillNan(obs, maskF)) # plot map and time series import importlib importlib.reload(plot) dataGrid = [statP['RMSE'] - statF['RMSE'], statP['Corr'] - statF['Corr']] prcp = df.getDataTs('APCP_FORA').squeeze() dataTs = [obs, yp, yf] crd = df.getGeo() t = df.getT() mapNameLst = ['dRMSE', 'dR'] tsNameLst = ['obs', 'prj', 'fore'] plot.plotTsMap( dataGrid, dataTs,
# # test error train on different year trLst = [[20150402, 20160401], [20160401, 20170401], [20170401, 20180401]] statPLst = list() statFLst = list() for k in range(3): trTrain = trLst[k] taTrain = utils.time.tRange2Array(trTrain) taAll = utils.time.tRange2Array([20150402, 20180401]) indTrain, ind2 = utils.time.intersect(taAll, taTrain) indTest = np.delete(np.arange(len(taAll)), indTrain) tempYp = ypLst[k][:, indTest] tempYf = yfLst[k][:, indTest] tempMask = maskF[:, indTest] tempObs = obs[:, indTest] tempStatP = stat.statError(utils.fillNan(tempYp, tempMask), utils.fillNan(tempObs, tempMask)) tempStatF = stat.statError(utils.fillNan(tempYf, tempMask), utils.fillNan(tempObs, tempMask)) statPLst.append(tempStatP) statFLst.append(tempStatF) # plot map and time series import importlib importlib.reload(plot) dataGrid = [ statPLst[0]['RMSE'] - statFLst[0]['RMSE'], statPLst[1]['RMSE'] - statFLst[1]['RMSE'], statPLst[2]['RMSE'] - statFLst[2]['RMSE'] ] prcp = df.getDataTs('APCP_FORA').squeeze() dataTs = [[obs, ypLst[0], yfLst[0]], [obs, ypLst[1], yfLst[1]],
tRange = [20160501, 20170501] predLst = list() outLSTM = os.path.join(pathSMAP['Out_L3_NA'], 'DA', 'CONUSv2f1') df, pred, obs = master.test( outLSTM, tRange=tRange, subset=subset, batchSize=100) predLst.append(pred.squeeze()) for out in outLst: df, pred, obs = master.test( out, tRange=tRange, subset=subset, batchSize=100) predLst.append(pred.squeeze()) obs = obs.squeeze() # plot box - latency # if 'post' in doLst: caseLst = ['Predict'] + ['Nowcast ' + str(nd) + 'd latency' for nd in dLst] statLst1 = [stat.statError(x, obs) for x in predLst] keyLst = list(statLst1[0].keys()) dataBox = list() for iS in range(len(keyLst)): statStr = keyLst[iS] temp = list() for k in range(len(statLst1)): data = statLst1[k][statStr] data = data[~np.isnan(data)] temp.append(data) dataBox.append(temp) fig = plot.plotBoxFig(dataBox, keyLst, caseLst, sharey=False) fig.show() fig.savefig(os.path.join(saveDir, 'box_latency')) # figure out how many days observation lead
caseLst.append('All-90-95-DA' + str(nDay)) outLst = [os.path.join(pathCamels['Out'], save_path, x) for x in caseLst] subset = 'All' tRange = [19950101, 20000101] predLst = list() for out in outLst: df, pred, obs = master.test(out, tRange=tRange, subset=subset, basinnorm=True, epoch=200) # pred=np.maximum(pred,0) predLst.append(pred) # plot box statDictLst = [stat.statError(x.squeeze(), obs.squeeze()) for x in predLst] # keyLst = list(statDictLst[0].keys()) keyLst = ['Bias', 'RMSE', 'NSE'] dataBox = list() for iS in range(len(keyLst)): statStr = keyLst[iS] temp = list() for k in range(len(statDictLst)): data = statDictLst[k][statStr] data = data[~np.isnan(data)] temp.append(data) dataBox.append(temp) # plt.style.use('classic') plt.rcParams['font.size'] = 14 plt.rcParams['font.family'] = 'Times New Roman' plt.rcParams["legend.columnspacing"] = 0.1
ypLst = list() modelName = 'LSTM' model = train.loadModel(outFolder, nEpoch, modelName=modelName) yp = train.testModel(model, x, batchSize=100).squeeze() ypLst.append( dbCsv.transNorm(yp, rootDB=rootDB, fieldName='SMAP_AM', fromRaw=False)) modelName = 'LSTM-DA' model = train.loadModel(outFolder, nEpoch, modelName=modelName) yp = train.testModel(model, x, z=y, batchSize=100).squeeze() ypLst.append( dbCsv.transNorm(yp, rootDB=rootDB, fieldName='SMAP_AM', fromRaw=False)) ## statErr1 = stat.statError(ypLst[0], yt) statErr2 = stat.statError(ypLst[1], yt) dataGrid = [statErr2['RMSE'], statErr2['RMSE'] - statErr1['RMSE']] dataTs = [ypLst[0], ypLst[1], yt] t = df.getT() crd = df.getGeo() mapNameLst = ['DA', 'DA-LSTM'] tsNameLst = ['LSTM', 'DA', 'SMAP'] colorMap = None colorTs = None plot.plotTsMap( dataGrid, dataTs, crd, t,
yp1 = train.testModel(model1, x2, c2) yp1 = camels.transNorm(yp1, 'usgsFlow', toNorm=False).squeeze() model2 = train.loadModel(outFolder, nEpoch, modelName='DA-1') yp2 = train.testModel(model2, xz1, c2) yp2 = camels.transNorm(yp2, 'usgsFlow', toNorm=False).squeeze() model3 = train.loadModel(outFolder, nEpoch, modelName='DA-7') yp3 = train.testModel(model3, xz2, c2) yp3 = camels.transNorm(yp3, 'usgsFlow', toNorm=False).squeeze() yLst = [yt2, yp1, yp2, yp3] # plot box statDictLst = [ stat.statError(yp1, yt2), stat.statError(yp2, yt2), stat.statError(yp3, yt2) ] keyLst = list(statDictLst[0].keys()) dataBox = list() for iS in range(len(keyLst)): statStr = keyLst[iS] temp = list() for k in range(len(statDictLst)): data = statDictLst[k][statStr] data = data[~np.isnan(data)] temp.append(data) dataBox.append(temp) fig = plot.plotBoxFig(dataBox, keyLst, ['LSTM', 'DA-1', 'DA-7'], sharey=False)
predLst.append(pred) # the prediction list for all the models obsLst.append(obs) np.save(os.path.join(out, 'pred.npy'), pred) np.save(os.path.join(out, 'obs.npy'), obs) f = np.load(os.path.join(out, 'x.npy')) # it has been saved previously in the out directory (forcings) T = (f[:, :, 3] + f[:, :, 4]) / 2 # mean air T for T_residual T_air = np.expand_dims(T, axis=2) pred_res = pred - T_air obs_res = obs - T_air predLst_res.append(pred_res) obsLst_res.append(obs_res) # calculate statistic metrics # statDict = stat.statError(pred.squeeze(), obs.squeeze()) # statDictLst.append([statDict]) # statDictLst1 = [stat.statError(x.squeeze(), obs.squeeze()) for x, y in predLst] statDictLst = [stat.statError(x.squeeze(), y.squeeze()) for (x, y) in zip(predLst, obsLst)] statDictLst_res = [stat.statError_res(x.squeeze(), y.squeeze(), z.squeeze(), w.squeeze()) for (x, y, z, w) in zip(predLst, obsLst, predLst_res, obsLst_res)] ### save this file too # median and STD calculation count = 0 mdstd = np.zeros([len(statDictLst_res[0]),3]) for i in statDictLst_res[0].values(): median = np.nanmedian((i)) # abs(i) STD = np.nanstd((i)) # abs(i) mean = np.nanmean((i)) #abs(i) k = np.array([[median,STD, mean]]) mdstd[count] = k count = count +1 mdstd = pd.DataFrame(mdstd, index=statDictLst_res[0].keys(), columns=['median', 'STD','mean']) if retrained==True:
latC, lonC = dfC.getGeo() # case = '090303' for case in caseLst: testName = subsetPattern.format(case, 3) errLst = list() for k in levLst: if k in [0, 1]: # if k in [-1]: subset = 'ecoReg_{}_L{}_v2f1'.format(case, k) else: subset = subsetPattern.format(case, k) outName = subset + '_Forcing' out = os.path.join(pathSMAP['Out_L3_NA'], 'ecoRegionCase', outName) df, yp, yt = master.test(out, tRange=tRange, subset=testName) errLst.append(stat.statError(yp[:, :, 0], yt[:, :, 0])) # plot box cLst = 'ygbr' keyLst = ['RMSE', 'Corr'] dataBox = list() for key in keyLst: temp = list() for err in errLst: temp.append(err[key]) dataBox.append(temp) fig = plot.plotBoxFig(dataBox, ' ', figsize=(8, 6), colorLst=cLst, sharey=False)
optTrain = default.update(default.optTrainSMAP, nEpoch=100) out = os.path.join(cDir, 'output', 'CONUSv4f1_multi') masterDict = wrapMaster(out, optData, optModel, optLoss, optTrain) # train # train(masterDict) # runTrain(masterDict, cudaID=2, screen='LSTM-multi') # test df, yp, yt, sigma = test(out, tRange=[20160401, 20170401], subset='CONUSv4f1') # plot ts MAP dataGrid = list() dataTs = list() for k in range(2): statErr = stat.statError(yp[:, :, k], yt[:, :, k]) dataGrid.append(statErr['RMSE']) dataTs.append([yp[:, :, k], yt[:, :, k]]) t = df.getT() crd = df.getGeo() mapNameLst = ['RMSE ', 'RMSE'] tsNameLst = ['LSTM', 'SMAP'] plot.plotTsMap(dataGrid, dataTs, lat=crd[0], lon=crd[1], t=t, mapNameLst=mapNameLst, tsNameLst=tsNameLst, multiTS=True, isGrid=True)
daylen = xt.shape[1] Pred = np.full(yt.shape, np.nan) for ii in range(ngage): xdata = x[ii, :, :] ydata = y[ii, :, :] regmodel = LinearRegression().fit(xdata, ydata) xtest = xt[ii, :, :] ypred = regmodel.predict(xtest) Pred[ii, :, 0] = ypred.squeeze() pred = camels.transNorm(Pred, 'usgsFlow', toNorm=False) obs = camels.transNorm(yt, 'usgsFlow', toNorm=False) gageid = 'All' pred = camels.basinNorm(pred, gageid=gageid, toNorm=False) obs = camels.basinNorm(obs, gageid=gageid, toNorm=False) # plot box statDictLst = [stat.statError(pred.squeeze(), obs.squeeze())] keyLst = ['Bias', 'RMSE', 'NSE'] dataBox = list() for iS in range(len(keyLst)): statStr = keyLst[iS] temp = list() for k in range(len(statDictLst)): data = statDictLst[k][statStr] data = data[~np.isnan(data)] temp.append(data) dataBox.append(temp) # plt.style.use('classic') plt.rcParams['font.size'] = 14 plt.rcParams['font.family'] = 'Times New Roman' plt.rcParams["legend.columnspacing"] = 0.1 plt.rcParams["legend.handletextpad"] = 0.2
batchSize=100) df2, yf2, obs2 = master.test(out, tRange=[20160401, 20180401], subset=subset, batchSize=100) out = os.path.join(pathSMAP['Out_L3_NA'], 'DA', 'CONUSv2f1_LSTM2015') df1, yp1, obs1 = master.test(out, tRange=[20150402, 20180401], subset=subset, batchSize=100) df2, yp2, obs2 = master.test(out, tRange=[20160401, 20180401], subset=subset, batchSize=100) statF = stat.statError(yf2.squeeze(), obs2.squeeze()) statP = stat.statError(yp2.squeeze(), obs2.squeeze()) t = df1.getT() lat, lon = df1.getGeo() dataTS = [obs1.squeeze(), yp1.squeeze(), yf1.squeeze()] tBar = np.datetime64('2016-04-01') def funcMap(): gridF, uy, ux = utils.grid.array2grid(statF['RMSE'], lat=lat, lon=lon) gridP, uy, ux = utils.grid.array2grid(statP['RMSE'], lat=lat, lon=lon) figM, axM = plt.subplots(1, 2, figsize=(10, 4)) axplot.mapGrid(axM[0], uy, ux, gridF, vRange=[0, 0.1], cmap=plt.cm.jet) axM[0].set_title('Temporal Test RMSE of LSTM-DI') axplot.mapGrid(axM[1], uy, ux, gridP, vRange=[0, 0.1], cmap=plt.cm.jet) axM[1].set_title('Temporal Test RMSE of LSTM')
maskObs = 1 * ~np.isnan(obs.squeeze()) maskDay = np.zeros(maskObs.shape).astype(int) ngrid, nt = maskObs.shape for j in range(ngrid): temp = 0 for i in range(nt): maskDay[j, i] = temp if maskObs[j, i] == 1: temp = 1 else: if temp != 0: temp = temp + 1 ind = np.random.randint(0, ngrid) maskObsDay = maskObs * maskDay maskF = (maskDay >= 1) & (maskDay <= 3) statP = stat.statError(yp, obs) statLst = [ stat.statError(utils.fillNan(x, maskF), utils.fillNan(obs, maskF)) for x in yfLst ] # if 'post' in doLst: caseLst = ['Predict'] + [str(nd) + 'd latency' for nd in dLst] keyLst = list(statLst[0].keys()) dataBox = list() for iS in range(len(keyLst)): key = keyLst[iS] temp = list() temp.append(statP[key]) print(key, np.nanmedian(statP[key])) for k in range(len(statLst)):
temp = 0 for i in range(nt): maskDay[j, i] = temp if maskObs[j, i] == 1: temp = 1 else: if temp != 0: temp = temp + 1 ind = np.random.randint(0, ngrid) maskObsDay = maskObs * maskDay unique, counts = np.unique(maskObsDay, return_counts=True) maskF = (maskDay >= 1) & (maskDay <= 3) statPLst = list() statFLst = list() for k in range(3): statP = stat.statError(utils.fillNan(ypLst[k], maskF), utils.fillNan(obs, maskF)) statF = stat.statError(utils.fillNan(yfLst[k], maskF), utils.fillNan(obs, maskF)) statPLst.append(statP) statFLst.append(statF) cropFile = r'/mnt/sdb/Data/Crop/cropRate_CONUSv2f1.csv' cropRate = pd.read_csv(cropFile, dtype=np.float, header=None).values # croprate - 0 corn, 4 soybean, 22 spring wheat, 23 winter wheat dataGrid = [ (statPLst[0]['RMSE'] - statFLst[0]['RMSE']) / statPLst[0]['RMSE'], (statPLst[1]['RMSE'] - statFLst[1]['RMSE']) / statPLst[1]['RMSE'], (statPLst[2]['RMSE'] - statFLst[2]['RMSE']) / statPLst[2]['RMSE'], ] prcp = df.getDataTs('APCP_FORA').squeeze() dataTs = [[obs, ypLst[0], yfLst[0]], [obs, ypLst[1], yfLst[1]],
subset='CONUSv4f1', tRange=[sd, ed]) obs = df.getData(varT='SMAP_AM', doNorm=True, rmNan=False) modelName = 'LSTM-DA-' + str(k) model = train.loadModel(outFolder, nEpoch, modelName=modelName) yP = train.testModel(model, (x, obs), batchSize=100).squeeze() ypLst.append( dbCsv.transNorm(yP, rootDB=rootDB, fieldName='SMAP_AM', fromRaw=False)) if 'post' in doLst: statDictLst = list() for k in range(0, len(ypLst)): statDictLst.append(stat.statError(ypLst[k], yT)) keyLst = ['RMSE', 'ubRMSE', 'Bias', 'Corr'] caseLst = ['LSTM'] for k in dLst: caseLst.append('DA-' + str(k)) # plot box dataBox = list() cmap = plt.cm.jet cLst = cmap(np.linspace(0, 1, len(caseLst))) for iS in range(len(keyLst)): statStr = keyLst[iS] temp = list() for k in range(len(statDictLst)): temp.append(statDictLst[k][statStr]) dataBox.append(temp)
maskDay[j, i] = temp if maskObs[j, i] == 1: temp = 1 else: if temp != 0: temp = temp + 1 ind = np.random.randint(0, ngrid) maskObsDay = maskObs * maskDay maskF = (maskDay >= 1) & (maskDay <= 3) # figure out train and test time index tR0 = [20150402, 20180401] tA0 = utils.time.tRange2Array(tR0) nt = len(tA0) tTrainLst = list() tTestLst = list() for k in range(len(yrLst)): tR = tRangeLst[k] tA = utils.time.tRange2Array(tR) ind0 = np.array(range(nt)) ind1, ind2 = utils.time.intersect(tA0, tA) tTestLst.append(np.delete(ind0, ind1)) tTrainLst.append(ind1) # calculate stat for k in range(len(yrLst)): yfTemp = utils.fillNan(yfLst[k], maskF) yfTemp = yfTemp[:, tTestLst[k]] statP = stat.statError(yfTemp, utils.fillNan(obs, maskF)) statF = stat.statError(yfTemp, utils.fillNan(obs, maskF))
if iEns == 0: predLst = predtempLst else: for ii in range(len(outLst)): predLst[ii] = np.concatenate([predLst[ii], predtempLst[ii]], axis=2) # predLst: List of all experiments with shape: Ntime*Nbasin*Nensemble # get the ensemble mean from simulations of different seeds ensLst = [] for ii in range(len(outLst)): temp = np.nanmean(predLst[ii], axis=2, keepdims=True) ensLst.append(temp) # plot boxplots for different experiments statDictLst = [stat.statError(x.squeeze(), obsAll.squeeze()) for x in ensLst] keyLst = ["NSE", "KGE"] # which metric to show dataBox = list() for iS in range(len(keyLst)): statStr = keyLst[iS] temp = list() for k in range(len(statDictLst)): data = statDictLst[k][statStr] data = data[~np.isnan(data)] temp.append(data) dataBox.append(temp) plt.rcParams["font.size"] = 14 labelname = ["PUR", "PUR-FDC", "PUR-1/3FDC"] xlabel = ["NSE", "KGE"] fig = plot.plotBoxFig(dataBox, xlabel, labelname, sharey=False, figsize=(6, 5))
for i in range(nt): maskDay[j, i] = temp if maskObs[j, i] == 1: temp = 1 else: if temp != 0: temp = temp + 1 maskObsDay = maskObs * maskDay fLst = [1, 2, 3] statLst = list() for nf in fLst: maskF = maskDay == nf temp = list() for yf in yfLst: statErr = stat.statError(utils.fillNan(yf, maskF), utils.fillNan(obs, maskF)) temp.append(statErr) statLst.append(temp) # load result from RK dirRK = r'D:\\data\\Koster17\\' fileNameLst = ['rmse_lead_{}.dat'.format(x) for x in [1, 2, 3]] tempLst = list() for k in range(3): # lon lat are identical. Tested temp = np.loadtxt(os.path.join(dirRK, fileNameLst[k])) tempLst.append(temp[:, 2]) RKlon = temp[:, 0] RKlat = temp[:, 1] lat, lon = df.getGeo() errLst = list()
temp = 1 else: if temp != 0: temp = temp + 1 ind = np.random.randint(0, ngrid) print(np.array([maskObs[ind, :], maskDay[ind, :]])) maskObsDay = maskObs * maskDay unique, counts = np.unique(maskDay, return_counts=True) print(np.asarray((unique, counts)).T) print(counts / ngrid / nt) fLst = [1, 2, 3] statLstF = list() statLstP = list() maskF = (maskDay >= 1) & (maskDay <= 3) statP = stat.statError(utils.fillNan(yp, maskF), utils.fillNan(obs, maskF)) statF = stat.statError(utils.fillNan(yf, maskF), utils.fillNan(obs, maskF)) for nf in fLst: xp = np.full([ngrid, nt], np.nan) xf = np.full([ngrid, nt], np.nan) y = np.full([ngrid, nt], np.nan) xf[maskObsDay == nf] = yf[maskObsDay == nf] xp[maskObsDay == nf] = yp[maskObsDay == nf] y[maskObsDay == nf] = obs[maskObsDay == nf] statLstF.append(stat.statError(xf, y)) statLstP.append(stat.statError(xp, y)) # plot box - forecast matplotlib.rcParams.update({'font.size': 11}) matplotlib.rcParams.update({'lines.linewidth': 2}) matplotlib.rcParams.update({'lines.markersize': 12})
maskDay = np.zeros(maskObs.shape).astype(int) ngrid, nt = maskObs.shape for j in range(ngrid): temp = 0 for i in range(nt): maskDay[j, i] = temp if maskObs[j, i] == 1: temp = 1 else: if temp != 0: temp = temp + 1 ind = np.random.randint(0, ngrid) maskObsDay = maskObs * maskDay unique, counts = np.unique(maskObsDay, return_counts=True) maskF = (maskDay >= 1) & (maskDay <= 3) statP = stat.statError(utils.fillNan(yp, maskF), utils.fillNan(obs, maskF)) statF = stat.statError(utils.fillNan(yf, maskF), utils.fillNan(obs, maskF)) maskObsDay = maskObs * maskDay print(np.array([maskObs[ind, :], maskDay[ind, :]])) print(np.asarray((unique, counts)).T) print(counts / ngrid / nt) # see result for different seasons tRangeLst = [[20160401, 20160701], [20160701, 20161001], [20161001, 20170101], [20170101, 20170401], [20170401, 20170701], [20170701, 20171001], [20171001, 20180101], [20180101, 20180401]] tAllA = utils.time.tRange2Array(tAllR) statPLst = list() statFLst = list()
# %% load data and stat kcLst = [7, 8, 13] tRange = [20160401, 20180401] statLst = list() statRefLst = list() for kc in kcLst: tempLst = list() for k in range(1, 18): testName = subsetLst[kc - 1] if k != kc: outName = 'ecoRegion{:02d}{:02d}_v2f1_Forcing'.format(kc, k) else: outName = 'ecoRegion{:02d}_v2f1_Forcing'.format(kc) out = os.path.join(pathSMAP['Out_L3_NA'], 'ecoRegion', outName) df, yp, yt = master.test(out, tRange=tRange, subset=testName) temp = stat.statError(yp[:, :, 0], yt[:, :, 0]) tempLst.append(temp) if k == kc: statRefLst.append(temp) statLst.append(tempLst) # %% plot box keyLst = stat.keyLst ecoLst = ['{:02d}'.format(x) for x in range(1, 18)] caseLst = ['{:02d}'.format(x) for x in [7, 8, 13]] for k in range(len(caseLst)): dataBox = list() key = 'RMSE' for ii in range(len(ecoLst)): temp = list()
np.save(os.path.join(out, 'obs.npy'), obs) f = np.load( os.path.join(out, 'x.npy') ) # it has been saved previously in the out directory (forcings) T = (f[:, :, 3] + f[:, :, 4]) / 2 # mean air T for T_residual T_air = np.expand_dims(T, axis=2) pred_res = pred - T_air obs_res = obs - T_air predLst_res.append(pred_res) obsLst_res.append(obs_res) # calculate statistic metrics # statDict = stat.statError(pred.squeeze(), obs.squeeze()) # statDictLst.append([statDict]) # statDictLst1 = [stat.statError(x.squeeze(), obs.squeeze()) for x, y in predLst] statDictLst = [ stat.statError(x.squeeze(), y.squeeze()) for (x, y) in zip(predLst, obsLst) ] statDictLst_res = [ stat.statError_res(x.squeeze(), y.squeeze(), z.squeeze(), w.squeeze()) for (x, y, z, w) in zip(predLst, obsLst, predLst_res, obsLst_res) ] # median and STD calculation count = 0 mdstd = np.zeros([len(statDictLst_res[0]), 3]) for i in statDictLst_res[0].values(): median = np.nanmedian((i)) # abs(i) STD = np.nanstd((i)) # abs(i) mean = np.nanmean((i)) #abs(i) k = np.array([[median, STD, mean]])
cDir = os.path.dirname(os.path.abspath(__file__)) out = os.path.join(cDir, 'output', 'CONUSv4f1') rootDB = os.path.join(cDir, 'data') nEpoch = 100 tRange = [20160401, 20170401] # load data df, yp, yt = master.test( out, tRange=[20160401, 20170401], subset='CONUSv4f1', epoch=100, reTest=True) yp = yp.squeeze() yt = yt.squeeze() # calculate stat statErr = stat.statError(yp, yt) dataGrid = [statErr['RMSE'], statErr['Corr']] dataTs = [yp, yt] t = df.getT() crd = df.getGeo() mapNameLst = ['RMSE', 'Correlation'] tsNameLst = ['LSTM', 'SMAP'] # plot map and time series plot.plotTsMap( dataGrid, dataTs, lat=crd[0], lon=crd[1], t=t, mapNameLst=mapNameLst,
dfC = dbCsv.DataframeCsv(rootDB=rootDB, subset='CONUSv2f1', tRange=tRange) latC, lonC = dfC.getGeo() errLstAll = list() for case in caseLst: testName = subsetPattern.format(case, 3) errLst = list() for k in levLst: if k in [0, 1]: subset = 'ecoReg_{}_L{}_v2f1'.format(case, k) else: subset = subsetPattern.format(case, k) outName = subset + '_Forcing' out = os.path.join(pathSMAP['Out_L3_NA'], 'ecoRegionCase', outName) df, yp, yt = master.test(out, tRange=tRange, subset=testName) err = stat.statError(yp[:, :, 0], yt[:, :, 0]) errLst.append(err) errLstAll.append(errLst) # plot box cLst = 'ygbr' keyLst = ['RMSE', 'Corr'] for key in keyLst: dataBox = list() for errLst in errLstAll: temp = list() for err in errLst: temp.append(err[key]) dataBox.append(temp) fig = figplot.boxPlot(dataBox, label1=caseLabLst,
model = train.loadModel(outFolder, nEpoch, modelName=modelName) yP = train.testModel(model, (x, obs), c, batchSize=100).squeeze() ypLstmLst.append( dbCsv.transNorm(yP, rootDB=rootDB, fieldName='SMAP_AM', fromRaw=False)) if 'post' in doLst: # stat ypLst = [ypLstmLst, ypAnnLst] statDictLst = list() for i in range(0, len(ypLst)): tempLst = list() for j in range(0, len(ypLst[i])): tempLst.append(stat.statError(ypLst[i][j], yT)) statDictLst.append(tempLst) keyLst = list(tempLst[0].keys()) # plot box dataBox = list() caseLst1 = keyLst caseLst2 = ['LSTM', 'LSTM-DA'] for iS in range(len(keyLst)): statStr = keyLst[iS] dataBox = list() for iS in range(len(keyLst)): statStr = keyLst[iS] temp = list() for k in range(len(statDictLst)): temp.append(statDictLst[k][statStr])
dfz2 = camels.DataframeCsv(subset='all', tRange=[20141227, 20091227]) z2 = dfz2.getDataObs(doNorm=True, rmNan=False) df2 = camels.DataframeCsv(subset='all', tRange=[20100101, 20150101]) x2 = df2.getDataTS(varLst=camels.forcingLst, doNorm=True, rmNan=True) c2 = df2.getDataConst(varLst=camels.attrLstSel, doNorm=True, rmNan=True) yt2 = df2.getDataObs(doNorm=False, rmNan=False).squeeze() model = train.loadModel(outFolder, 100, modelName='test') yp1 = train.testModel(model, x1, c1) yp1 = camels.transNorm(yp1, 'usgsFlow', toNorm=False).squeeze() yp2 = train.testModel(model, x2, c2) yp2 = camels.transNorm(yp2, 'usgsFlow', toNorm=False).squeeze() statErr1 = stat.statError(yp1, yt2) statErr2 = stat.statError(yp2, yt2) dataMap = [statErr2['Corr'], statErr1['Corr'] - statErr2['Corr']] dataTs = [yt2, yp2] t = df2.getT() crd = df2.getGeo() mapNameLst = ['Test Corr', 'Train Corr - Test Corr'] tsNameLst = ['USGS', 'LSTM'] colorMap = None colorTs = None import imp imp.reload(plot) plot.plotTsMap(dataMap, dataTs, lat=crd[:, 0],