def readSMAP(varLst, usgsIdLst): tSMAPRange = [20150402, 20180401] tSMAPLst = tRange2Array(tSMAPRange) dataDir = "/scratch/feng/extractData/SMAPInv" ntime = len(tSMAPLst) x = np.empty([len(usgsIdLst), ntime, len(varLst)]) for k in range(len(usgsIdLst)): dataTemp = readcsvGage(dataDir, usgsIdLst[k], varLst, ntime) x[k, :, :] = dataTemp # load the statistics file and transform back with open(os.path.join(dataDir, "statDictOri.json"), "r") as fp: smapstaDict = json.load(fp) for ivar in range(len(varLst)): x[:, :, ivar] = (x[:, :, ivar] * smapstaDict[varLst[ivar]][3] + smapstaDict[varLst[ivar]][2]) # get the new statDict of SMAP statnewFile = os.path.join(dataDir, "statDictNew.json") if not os.path.isfile(statnewFile): smapnewDict = dict() for ivar in range(len(varLst)): var = varLst[ivar] smapnewDict[var] = calStat(x[:, :, ivar]) with open(statnewFile, "w") as fp: json.dump(smapnewDict, fp, indent=4) with open(statnewFile, "r") as fp: smapDict = json.load(fp) return x, tSMAPLst, smapDict # x is transformed back
def getCSV(self, *, doNorm=True, rmNan=True, dataRange=[20150401, 20201002], readRange=[20150402, 20160402], csvdataDir="/scratch/feng/extractData/SMAP/csv/SMAPUpdate/", csvvarLst=["soil_moisture_pm"]): data, tcsvdataLst, csvstatDict = readCSV( dataDir=csvdataDir, dataRange=dataRange, varLst=csvvarLst, usgsIdLst=self.usgsId, ) # gage, time, var readtLst = tRange2Array(readRange) C, ind1, ind2 = np.intersect1d(readtLst, tcsvdataLst, return_indices=True) data = data[:, ind2, :] if doNorm is True: for ivar in range(len(csvvarLst)): tempvar = csvvarLst[ivar] data[:, :, ivar] = (data[:, :, ivar] - csvstatDict[tempvar][2] ) / csvstatDict[tempvar][3] if rmNan is True: data[np.where(np.isnan(data))] = 0 return data
def readhour(varLst, usgsIdLst): thourRange = [19851001, 20051001] thourLst = tRange2Array(thourRange) dataDir = "/scratch/feng/extractData/NLDAS/csvLst/NLDAS" ntime = len(thourLst) * 24 x = np.empty([len(usgsIdLst), ntime, len(varLst)]) for k in range(len(usgsIdLst)): dataTemp = readcsvGage(dataDir, usgsIdLst[k], varLst, ntime) x[k, :, :] = dataTemp return x, thourLst
def readSAC(tRangeLst): outpathSAC = pathCamels["Out"] + "/trend/SAC" tSACRange = [19801001, 20150101] tSACLst = tRange2Array(tSACRange) ## load SAC-SMA prediction fname_predSAC = outpathSAC + "/predSAC.npy" predSAC = np.load(fname_predSAC, allow_pickle=True) C, ind1, ind2 = np.intersect1d(tRangeLst, tSACLst, return_indices=True) dataPred = predSAC[:, ind2] dataPred = np.expand_dims(dataPred, 2) return dataPred # Ngage*Ntime*Nvar
def readLstm(tRangeLst): tLstmRange = [19801001, 20150101] tLstmLst = tRange2Array(tLstmRange) lstmDir = "EnsemRun/DI_N/PNorm/SAC-LSTM/epochs300_batch100_rho365_hiddensize256_Tstart19801001_Tend19951001" outpathLstm = os.path.join( pathCamels["Out"], lstmDir, "All-90-95", str(tLstmRange[0]) + "_" + str(tLstmRange[1]), ) ## load Lstm prediction fname_predLstm = outpathLstm + "/pred.npy" predLstm = np.load(fname_predLstm, allow_pickle=True) predLstm = np.nanmean(predLstm, axis=0) C, ind1, ind2 = np.intersect1d(tRangeLst, tLstmLst, return_indices=True) dataPred = predLstm[:, ind2, :] return dataPred # Ngage*Ntime*Nvar
def getSMAP(self, *, doNorm=True, rmNan=True, SMAPinvrange=[20150402, 20160402]): varsmapLst = ["APCP", "TMP", "PEVAP", "SMAP"] data, tSMAPLst, smapDict = readSMAP( varLst=varsmapLst, usgsIdLst=self.usgsId) # gage, time, var: 1 precip SMAPinvt = tRange2Array(SMAPinvrange) C, ind1, ind2 = np.intersect1d(SMAPinvt, tSMAPLst, return_indices=True) data = data[:, ind2, :] if doNorm is True: for ivar in range(len(varsmapLst)): tempvar = varsmapLst[ivar] data[:, :, ivar] = (data[:, :, ivar] - smapDict[tempvar][2]) / smapDict[tempvar][3] if rmNan is True: data[np.where(np.isnan(data))] = 0 return data
def __init__(self, *, subset="All", tRange): self.subset = subset if subset == "All": # change to read subset later self.usgsId = gageDict["id"] crd = np.zeros([len(self.usgsId), 2]) crd[:, 0] = gageDict["lat"] crd[:, 1] = gageDict["lon"] self.crd = crd elif type(subset) is list: self.usgsId = np.array(subset) crd = np.zeros([len(self.usgsId), 2]) ind = np.full(len(self.usgsId), np.nan).astype(int) for ii in range(len(self.usgsId)): tempind = np.where(gageDict["id"] == self.usgsId[ii]) ind[ii] = tempind[0][0] crd[:, 0] = gageDict["lat"][ind] crd[:, 1] = gageDict["lon"][ind] self.crd = crd else: raise Exception("The format of subset is not correct!") self.time = tRange2Array(tRange)
def readCSV(dataDir, dataRange, varLst, usgsIdLst): tdataRangeLst = tRange2Array(dataRange) ntime = len(tdataRangeLst) x = np.empty([len(usgsIdLst), ntime, len(varLst)]) for k in range(len(usgsIdLst)): dataTemp = readcsvGage(dataDir, usgsIdLst[k], varLst, ntime) x[k, :, :] = dataTemp # make -9999 as np.nan x[x <= -999] = np.nan # get the statistics for normalization, write to a dict statnewFile = os.path.join(dataDir, "statDictCSV.json") if not os.path.isfile(statnewFile): statnewDict = dict() for ivar in range(len(varLst)): var = varLst[ivar] statnewDict[var] = calStat(x[:, :, ivar]) with open(statnewFile, "w") as fp: json.dump(statnewDict, fp, indent=4) with open(statnewFile, "r") as fp: statcsvDict = json.load(fp) return x, tdataRangeLst, statcsvDict
predLst_res = list() obsLst_res = list() statDictLst = [] for i, out in enumerate(outLst): #df, pred, obs = master.test(out, TempTarget, forcing_path[i], attr_path[i], tRange=tRange, subset=subset, basinnorm=True, epoch=TestEPOCH, reTest=True) df, pred, obs, x = master.test(out, TempTarget, forcing_path, attr_path, D_N_P_path, tRange=tRange, subset=subset, basinnorm=False, epoch=TestEPOCH, reTest=True) # change the units ft3/s to m3/s #obs = obs * 0.0283168 #pred = pred * 0.0283168 ### We are substitute the obs into NaN outside the testing period, according to D_N_P file D_N_P = pd.read_excel(D_N_P_path) for ii in range(obs.shape[0]): tLst1 = D_N_P.iloc[ii]['S_Testing'] tLst2 = D_N_P.iloc[ii]['E_Testing'] tArray1 = time.tRange2Array([int(str(tLst1.year) + str(tLst1.month).zfill(2) + str(tLst1.day).zfill(2)), int(str(tLst2.year) + str(tLst2.month).zfill(2) + str(tLst2.day).zfill(2))]) tArray2 = time.tRange2Array(tRange) C, ind1, ind2 = np.intersect1d(tArray1, tArray2, return_indices=True) obs[ii, ~ind2, :] = np.nan ##################################################################### predLst.append(pred) # the prediction list for all the models obsLst.append(obs) np.save(os.path.join(out, 'pred.npy'), pred) np.save(os.path.join(out, 'obs.npy'), obs) f = np.load(os.path.join(out, 'x.npy')) # it has been saved previously in the out directory (forcings) T = (f[:, :, 3] + f[:, :, 4]) / 2 # mean air T for T_residual
# read camels dataset import os import pandas as pd import numpy as np import datetime as dt from hydroDL import utils, pathCamels from hydroDL.utils.time import tRange2Array from pandas.api.types import is_numeric_dtype, is_string_dtype import time import json from hydroDL.dataset import Dataframe # module variable tRange = [19800101, 20150101] tRangeobs = [19790101, 20150101] # streamflow observations tLst = tRange2Array(tRange) tLstobs = tRange2Array(tRangeobs) nt = len(tLst) ntobs = len(tLstobs) # forcingLst = ['dayl', 'prcp', 'srad', 'swe', 'tmax', 'tmin', 'vp'] forcingLst = ["dayl", "prcp", "srad", "tmax", "tmin", "vp"] attrLstSel = [ "elev_mean", "slope_mean", "area_gages2", "frac_forest", "lai_max", "lai_diff", "dom_land_cover_frac", "dom_land_cover", "root_depth_50",